Jump to content

Module:User:Benwing2/hi-adjective

From Wiktionary, the free dictionary


local export = {}


--[=[

Authorship: Ben Wing <benwing2>

]=]

--[=[

TERMINOLOGY:

-- "slot" = A particular combination of case/gender/number.
	 Example slot names for adjectives are "dir_m_s" (direct masculine singular) and
	 "voc_f_p" (vocative feminine plural). Each slot is filled with zero or more forms.

-- "form" = The declined Hindi form representing the value of a given slot.

-- "lemma" = The dictionary form of a given Hindi term. Generally the nominative
     masculine singular, but may occasionally be another form if the nominative
	 masculine singular is missing.
]=]

local lang = require("Module:languages").getByCode("hi")
local m_links = require("Module:links")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:User:Benwing2/inflection utilities")
local m_para = require("Module:parameters")
local com = require("Module:hi-common")

local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local uupper = mw.ustring.upper


-- vowel diacritics; don't display nicely on their own
local M = u(0x0901)
local N = u(0x0902)
local AA = u(0x093e)
local AAM = AA .. M
local E = u(0x0947)
local EN = E .. N
local I = u(0x093f)
local II = u(0x0940)
local IIN = II .. N
local TILDE = u(0x0303)


-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end


local function tag_text(text)
	return m_script_utilities.tag_text(text, lang)
end


local adjective_slots = {
	dir_m_s = "dir|m|s",
	obl_m_s = "obl|m|s",
	voc_m_s = "voc|m|s",
	dir_m_p = "dir|m|p",
	obl_m_p = "obl|m|p",
	voc_m_p = "voc|m|p",
	dir_f_s = "dir|f|s",
	obl_f_s = "obl|f|s",
	voc_f_s = "voc|f|s",
	dir_f_p = "dir|f|p",
	obl_f_p = "obl|f|p",
	voc_f_p = "voc|f|p",
}

local adjective_slots_with_linked = m_table.shallowcopy(adjective_slots)
adjective_slots_with_linked["dir_m_s_linked"] = "dir|m|s"


local function add(base, stem, translit_stem, slot, ending, footnotes)
	com.add_form(base, stem, translit_stem, slot, ending, footnotes)
end


local function add_decl(base, stem, translit_stem,
	dir_m_s, obl_m_s, voc_m_s, dir_m_p, obl_m_p, voc_m_p,
	dir_f_s, obl_f_s, voc_f_s, dir_f_p, obl_f_p, voc_f_p,
	footnotes)
	assert(stem)
	add(base, stem, translit_stem, "dir_m_s", dir_m_s, footnotes)
	add(base, stem, translit_stem, "obl_m_s", obl_m_s, footnotes)
	add(base, stem, translit_stem, "voc_m_s", voc_m_s, footnotes)
	add(base, stem, translit_stem, "dir_m_p", dir_m_p, footnotes)
	add(base, stem, translit_stem, "obl_m_p", obl_m_p, footnotes)
	add(base, stem, translit_stem, "voc_m_p", voc_m_p, footnotes)
	add(base, stem, translit_stem, "dir_f_s", dir_f_s, footnotes)
	add(base, stem, translit_stem, "obl_f_s", obl_f_s, footnotes)
	add(base, stem, translit_stem, "voc_f_s", voc_f_s, footnotes)
	add(base, stem, translit_stem, "dir_f_p", dir_f_p, footnotes)
	add(base, stem, translit_stem, "obl_f_p", obl_f_p, footnotes)
	add(base, stem, translit_stem, "voc_f_p", voc_f_p, footnotes)
end


local decls = {}
local declprops = {}

decls["ā"] = function(base)
	if rfind(base.lemma, "या$") then
		local stem, translit_stem = com.strip_ending(base, "या")
		add_decl(base, stem, translit_stem, "या", "ए", "ए", "ए", "ए", "ए", "ई", "ई", "ई", "ई", "ई", "ई")
		add_decl(base, stem, translit_stem, nil, "ये", "ये", "ये", "ये", "ये", "यी", "यी", "यी", "यी", "यी", "यी")
	else
		local stem, translit_stem = com.strip_ending(base, AA)
		add_decl(base, stem, translit_stem, AA, E, E, E, E, E, II, II, II, II, II, II)
	end
end

decls["ind-ā"] = function(base)
	local stem, translit_stem = com.strip_ending(base, "आ")
	add_decl(base, stem, translit_stem, "आ", "ए", "ए", "ए", "ए", "ए", "ई", "ई", "ई", "ई", "ई", "ई")
end

decls["part-ā"] = function(base)
	local stem, translit_stem = com.strip_ending(base, AA)
	add_decl(base, stem, translit_stem, AA, E, E, E, E, E, II, II, II, IIN, IIN, IIN)
end

declprops["part-ā"] = {
	desc = "ā-stem participle",
	cat = "",
}

decls["part-ind-ā"] = function(base)
	local stem, translit_stem = com.strip_ending(base, "आ")
	add_decl(base, stem, translit_stem, "आ", "ए", "ए", "ए", "ए", "ए", "ई", "ई", "ई", "ईं", "ईं", "ईं")
end

declprops["part-ind-ā"] = {
	desc = "ind-ā-stem participle",
	cat = "",
}

decls["ān"] = function(base)
	local nasal = rfind(base.lemma, M) and M or N

	if rfind(base.lemma, "या" .. nasal .. "$") then
		local stem, translit_stem = com.strip_ending(base, "या" .. nasal)
		add_decl(base, stem, translit_stem, "या" .. nasal, "एँ", "एँ", "एँ", "एँ", "एँ", "ईं", "ईं", "ईं", "ईं", "ईं", "ईं")
		add_decl(base, stem, translit_stem, nil, "यें", "यें", "यें", "यें", "यें", "यीं", "यीं", "यीं", "यीं", "यीं", "यीं")
	else
		local stem, translit_stem = com.strip_ending(base, AA .. nasal)
		add_decl(base, stem, translit_stem, AA .. nasal, EN, EN, EN, EN, EN, IIN, IIN, IIN, IIN, IIN, IIN)
	end
end

decls["ind-ān"] = function(base)
	local nasal = rfind(base.lemma, M) and M or N

	local stem, translit_stem = com.strip_ending(base, "आ" .. nasal)
	add_decl(base, stem, translit_stem, "आ" .. nasal, "एँ", "एँ", "एँ", "एँ", "एँ", "ईं", "ईं", "ईं", "ईं", "ईं", "ईं")
end

decls["indecl"] = function(base)
	local stem, translit_stem = base.lemma, base.lemma_translit
	add_decl(base, stem, translit_stem, "", "", "", "", "", "", "", "", "", "", "", "")
end

declprops["indecl"] = {
	desc = "indecl",
	cat = "indeclinable ~",
}


local function parse_indicator_spec(angle_bracket_spec)
	local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
	assert(inside)
	local base = {forms = {}}
	if inside ~= "" then
		local parts = rsplit(inside, ".", true)
		for _, part in ipairs(parts) do
			if part == "$" then
				if base.indecl then
					error("Can't specify '$' twice: '" .. inside .. "'")
				end
				base.indecl = true
			elseif part == "part" then
				if base.participle then
					error("Can't specify 'part' twice: '" .. inside .. "'")
				end
				base.participle = true
			else
				error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
			end
		end
	end
	return base
end


local function detect_indicator_spec(base)
	if base.indecl then
		base.decl = "indecl"
	elseif base.participle then
		if rfind(base.lemma, AA .. "$") then
			base.decl = "part-ā"
		elseif rfind(base.lemma, "आ$") then
			base.decl = "part-ind-ā"
		else
			error("Unrecognized participle lemma: " .. base.lemma)
		end
	elseif rfind(base.lemma, AA .. "$") then
		base.decl = "ā"
	elseif rfind(base.lemma, "आ$") then
		base.decl = "ind-ā"
	elseif rfind(base.lemma, AA .. "[" .. M .. N .. "]$") then
		base.decl = "ān"
	elseif rfind(base.lemma, "आ[" .. M .. N .. "]$") then 
		base.decl = "ind-ān"
	else
		error("Unrecognized adjective lemma: " .. base.lemma)
	end
end


local function detect_all_indicator_specs(alternant_multiword_spec)
	iut.map_word_specs(alternant_multiword_spec, function(base)
		detect_indicator_spec(base)
	end)
end


local function decline_adjective(base)
	if not decls[base.decl] then
		error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
	end
	decls[base.decl](base)
	-- handle_derived_slots_and_overrides(base)
end


local function process_overrides(forms, args)
	for slot, _ in pairs(adjective_slots) do
		if args[slot] then
			forms[slot] = nil
			if args[slot] ~= "-" and args[slot] ~= "—" then
				for _, form in ipairs(rsplit(args[slot], "%s*,%s*")) do
					iut.insert_form(forms, slot, {form=form})
				end
			end
		end
	end
end


local function compute_category_and_desc(base)
	local props = declprops[base.decl]
	if props then
		return props.cat, props.desc
	end
	local ind, stem = rmatch(base.decl, "^(ind%-)(.*)$")
	if not ind then
		stem = base.decl
	end
	stem = rsub(stem, "n$", TILDE)
	if ind then
		return "independent " .. stem .. "-stem ~", "ind " .. stem .. "-stem"
	else
		return stem .. "-stem ~", stem .. "-stem"
	end
end


-- Compute the categories to add the adjective to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
	local cats = {}
	local function insert(cattype)
		cattype = rsub(cattype, "~", alternant_multiword_spec.pos)
		m_table.insertIfNot(cats, "Hindi " .. cattype)
	end
	local annotation
	if alternant_multiword_spec.manual then
		alternant_multiword_spec.annotation = ""
	else
		local annparts = {}
		local decldescs = {}
		iut.map_word_specs(alternant_multiword_spec, function(base)
			local cat, desc = compute_category_and_desc(base)
			if cat ~= "" then
				insert(cat)
			end
			m_table.insertIfNot(decldescs, desc)
			if base.phon_lemma and base.lemma ~= base.phon_lemma then
				insert("~ with phonetic respelling")
			end
		end)
		if #decldescs == 0 then
			table.insert(annparts, "indecl")
		else
			table.insert(annparts, table.concat(decldescs, " // "))
		end
		alternant_multiword_spec.annotation = table.concat(annparts, " ")
		if #decldescs > 1 then
			insert("~ with multiple declensions")
		end
	end
	alternant_multiword_spec.categories = cats
end


local function show_forms(alternant_multiword_spec)
	local lemmas = alternant_multiword_spec.forms.dir_m_s or {}
	local props = {
		lemmas = lemmas,
		slot_table = adjective_slots_with_linked,
		lang = lang,
		include_translit = true,
		-- Explicit additional top-level footnotes only occur with {{hi-adecl-manual}}.
		footnotes = alternant_multiword_spec.footnotes,
		allow_footnote_symbols = not not alternant_multiword_spec.footnotes,
	}
	iut.show_forms(alternant_multiword_spec.forms, props)
end


local function make_table(alternant_multiword_spec)
	local forms = alternant_multiword_spec.forms

	local table_spec = [=[
<div class="NavFrame" style="display: inline-block;min-width: 50em">
<div class="NavHead" style="background:#eff7ff" >{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;min-width:50em" class="inflection-table"
|-
! rowspan="2" style="width:20%;background:#d9ebff" |
! colspan="2" style="background:#d9ebff" | masculine
! colspan="2" style="background:#d9ebff" | feminine
|-
! style="background:#d9ebff" | singular
! style="background:#d9ebff" | plural
! style="background:#d9ebff" | singular
! style="background:#d9ebff" | plural
|-
!style="background:#eff7ff" | direct
| {dir_m_s}
| {dir_m_p}
| {dir_f_s}
| {dir_f_p}
|-
!style="background:#eff7ff" | oblique
| {obl_m_s}
| {obl_m_p}
| {obl_f_s}
| {obl_f_p}
|-
!style="background:#eff7ff" | vocative
| {voc_m_s}
| {voc_m_p}
| {voc_f_s}
| {voc_f_p}
|{\cl}{notes_clause}</div></div>]=]

	local notes_template = [===[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]

	if alternant_multiword_spec.title then
		forms.title = alternant_multiword_spec.title
	else
		forms.title = 'Declension of <i lang="hi" class="Deva">' .. forms.lemma .. '</i>'
	end

	local annotation = alternant_multiword_spec.annotation
	if annotation == "" then
		forms.annotation = ""
	else
		forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
	end

	forms.notes_clause = forms.footnote ~= "" and
		m_string_utilities.format(notes_template, forms) or ""
	return m_string_utilities.format(table_spec, forms)
end


-- Externally callable function to parse and decline an adjective given
-- user-specified arguments. Return value is ALTERNANT_MULTIWORD_SPEC, an
-- object where the declined forms are in `ALTERNANT_MULTIWORD_SPEC.forms` for
-- each slot. If there are no values for a slot, the slot key will be missing.
-- The value for a given slot is a list of objects
-- {form=FORM, translit=TRANSLIT, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, pos, from_headword, def)
	local params = {
		[1] = {template_default = def or "अच्छा"},
		footnote = {list = true},
		title = {},
		pagename = {},
	}
	for slot, _ in ipairs(adjective_slots) do
		params[slot] = {}
	end

	local args = m_para.process(parent_args, params)

	if not args[1] then
		args[1] = ""
	end
	local parse_props = {
		parse_indicator_spec = parse_indicator_spec,
		allow_default_indicator = true,
		allow_blank_lemma = true,
	}
	local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
	alternant_multiword_spec.title = args.title
	alternant_multiword_spec.footnotes = args.footnote
	alternant_multiword_spec.pos = pos or "adjectives"
	alternant_multiword_spec.forms = {}
	alternant_multiword_spec.pagename = args.pagename
	com.normalize_all_lemmas(alternant_multiword_spec, "always transliterate")
	detect_all_indicator_specs(alternant_multiword_spec)
	local inflect_props = {
		lang = lang,
		slot_table = adjective_slots_with_linked,
		inflect_word_spec = decline_adjective,
	}
	iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
	process_overrides(alternant_multiword_spec.forms, args)
	compute_categories_and_annotation(alternant_multiword_spec)
	return alternant_multiword_spec
end


-- Externally callable function to parse and decline an adjective where all
-- forms are given manually. Return value is WORD_SPEC, an object where the
-- declined forms are in `WORD_SPEC.forms` for each slot. If there are no values
-- for a slot, the slot key will be missing. The value for a given slot is a
-- list of objects {form=FORM, translit=TRANSLIT, footnotes=FOOTNOTES}.
function export.do_generate_forms_manual(parent_args, pos, from_headword, def)
	local params = {
		footnote = {list = true},
		title = {},
	}
	for slot, _ in ipairs(adjective_slots) do
		params[slot] = {}
	end

	local args = m_para.process(parent_args, params)
	local alternant_multiword_spec = {
		title = args.title,
		footnotes = args.footnote,
		forms = {},
		manual = true,
	}
	process_overrides(alternant_multiword_spec.forms, args)
	set_accusative(alternant_multiword_spec)
	add_categories(alternant_multiword_spec)
	return alternant_multiword_spec
end


-- Entry point for {{hi-adecl}}. Template-callable function to parse and decline 
-- an adjective given user-specified arguments and generate a displayable table
-- of the declined forms.
function export.show(frame)
	local parent_args = frame:getParent().args
	local alternant_multiword_spec = export.do_generate_forms(parent_args)
	show_forms(alternant_multiword_spec)
	return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang)
end


-- Entry point for {{hi-adecl-manual}}. Template-callable function to parse and
-- decline an adjective given manually-specified inflections and generate a
-- displayable table of the declined forms.
function export.show_manual(frame)
	local parent_args = frame:getParent().args
	local alternant_multiword_spec = export.do_generate_forms_manual(parent_args)
	show_forms(alternant_multiword_spec)
	return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang)
end


-- Concatenate all forms of all slots into a single string of the form
-- "SLOT=FORM,FORM,...|SLOT=FORM,FORM,...|...". Each FORM is either a string in Devanagari or
-- (if manual translit is present) a specification of the form "FORM//TRANSLIT" where FORM is the
-- Devanagari representation of the form and TRANSLIT its manual transliteration. Embedded pipe symbols
-- (as might occur in embedded links) are converted to <!>. If INCLUDE_PROPS is given, also include
-- additional properties (currently, none). This is for use by bots.
local function concat_forms(alternant_multiword_spec, include_props)
	local ins_text = {}
	for slot, _ in pairs(adjective_slots) do
		local formtext = iut.concat_forms_in_slot(alternant_multiword_spec.forms[slot])
		if formtext then
			table.insert(ins_text, slot .. "=" .. formtext)
		end
	end
	return table.concat(ins_text, "|")
end

-- Template-callable function to parse and decline an adjective given user-specified arguments and
-- return the forms as a string of the same form as documented in concat_forms() above.
function export.generate_forms(frame)
	local include_props = frame.args["include_props"]
	local parent_args = frame:getParent().args
	local alternant_multiword_spec = export.do_generate_forms(parent_args)
	return concat_forms(alternant_multiword_spec, include_props)
end


return export