Jump to content

Module:User:Benwing2/ky-noun

From Wiktionary, the free dictionary


local export = {}


--[=[

Authorship: Ben Wing <benwing2>

]=]

--[=[

TERMINOLOGY:

-- "slot" = A particular combination of case/number.
	 Example slot names for nouns are "gen_" (genitive singular) and
	 "voc_p" (vocative plural). Each slot is filled with zero or more forms.

-- "form" = The declined Kyrgyz form representing the value of a given slot.

-- "lemma" = The dictionary form of a given Kyrgyz term. Generally the nominative
     masculine singular, but may occasionally be another form if the nominative
	 masculine singular is missing.
]=]

local lang = require("Module:languages").getByCode("ky")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local m_script_utilities = require("Module:script utilities")
local iut = require("Module:inflection utilities")
local m_para = require("Module:parameters")

local current_title = mw.title.getCurrentTitle()
local NAMESPACE = current_title.nsText
local PAGENAME = current_title.text

local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end


local vowel = "аоөүуыэяёюие"
local vowel_c = "[" .. vowel .. "]"

local jr = "йр"
local voiced_cons_not_jr = "дмлнңбвгжз"

letter_classes = {
	-- Subclasses of vowels. These are named after the predominant vowel of the associated suffix.
	a_type_vowel = "аыяюу", -- low or high back vowels
	e_type_vowel = "эие", -- front unrounded vowels
	o_type_vowel = "оё", -- mid back vowels
	oe_type_vowel = "өү", -- front rounded vowels
	y_type_vowel = "аыя", -- back unrounded vowels
	u_type_vowel = "оёюу", -- back rounded vowels

	-- Subclasses of final letters.
	vowel = vowel,
	vowel_or_jr = vowel .. jr,
	voiced_cons_not_jr = voiced_cons_not_jr,
	voiced_cons = voiced_cons_not_jr .. jr,
	unvoiced_cons = "кпстфхчцшщ",
}


local output_noun_slots = {
	nom_s = "nom|s",
	gen_s = "gen|s",
	dat_s = "dat|s",
	acc_s = "acc|s",
	loc_s = "loc|s",
	abl_s = "abl|s",
	nom_p = "nom|p",
	gen_p = "gen|p",
	dat_p = "dat|p",
	acc_p = "acc|p",
	loc_p = "loc|p",
	abl_p = "abl|p",
}


local function skip_slot(decl_spec, slot)
	return decl_spec.number == "sg" and rfind(slot, "_p$") or
		decl_spec.number == "pl" and rfind(slot, "_s$")
end


local function decline_noun(decl_spec, lemma)
	local last_letter = usub(lemma, -1)
	local last_vowel = rsub(lemma, "^.*(" .. vowel_c .. ").-$", "%1")

	local function add(slot, endings)
		if skip_slot(decl_spec, slot) then
			return
		end
		for class, second_endings in pairs(endings) do
			assert(letter_classes[class], "Unrecognized letter class " .. class)
			if rmatch(last_vowel, "[" .. letter_classes[class] .. "]") then
				for class2, ending in pairs(second_endings) do
					assert(letter_classes[class2], "Unrecognized letter class " .. class2)
					if rmatch(last_letter, "[" .. letter_classes[class2] .. "]") then
						iut.insert_form(decl_spec.forms, slot, {form=lemma .. ending})
						return
					end
				end
				error("Last letter '" .. last_letter .. "' of lemma '" .. lemma .. "' doesn't match any known letter class")
			end
		end
		error("Last vowel '" .. last_vowel .. "' of lemma '" .. lemma .. "' doesn't match any known letter class")
	end

	if not skip_slot(decl_spec, "nom_s") then
		iut.insert_form(decl_spec.forms, "nom_s", {form=lemma})
	end

	add("nom_p", {
		a_type_vowel = { vowel_or_jr = "лар", voiced_cons_not_jr = "дар", unvoiced_cons = "тар" },
		e_type_vowel = { vowel_or_jr = "лер", voiced_cons_not_jr = "дер", unvoiced_cons = "тер" },
		o_type_vowel = { vowel_or_jr = "лор", voiced_cons_not_jr = "дор", unvoiced_cons = "тор" },
		oe_type_vowel = { vowel_or_jr = "лөр", voiced_cons_not_jr = "дөр", unvoiced_cons = "төр" },
	})
	add("gen_s", {
		y_type_vowel = { vowel = "нын", voiced_cons = "дын", unvoiced_cons = "тын" },
		e_type_vowel = { vowel = "нин", voiced_cons = "дин", unvoiced_cons = "тин" },
		u_type_vowel = { vowel = "нун", voiced_cons = "дун", unvoiced_cons = "тун" },
		oe_type_vowel = { vowel = "нүн", voiced_cons = "дүн", unvoiced_cons = "түн" },
	})
    add("gen_p", {
        a_type_vowel = { vowel_or_jr = "лардын", voiced_cons_not_jr = "дардын", unvoiced_cons = "тардын" },
        e_type_vowel = { vowel_or_jr = "лердин", voiced_cons_not_jr = "дердин", unvoiced_cons = "тердин" },
        o_type_vowel = { vowel_or_jr = "лордун", voiced_cons_not_jr = "дордун", unvoiced_cons = "тордун" },
        oe_type_vowel = { vowel_or_jr = "лөрдүн", voiced_cons_not_jr = "дөрдүн", unvoiced_cons = "төрдүн" },
    })
	add("dat_s", {
		a_type_vowel = { vowel = "га", voiced_cons = "га", unvoiced_cons = "ка" },
		e_type_vowel = { vowel = "ге", voiced_cons = "ге", unvoiced_cons = "ке" },
		o_type_vowel = { vowel = "го", voiced_cons = "го", unvoiced_cons = "ко" },
		oe_type_vowel = { vowel = "гө", voiced_cons = "гө", unvoiced_cons = "кө" },
	})
	add("dat_p", {
		a_type_vowel = { vowel_or_jr = "ларга", voiced_cons_not_jr = "дарга", unvoiced_cons = "тарга" },
		e_type_vowel = { vowel_or_jr = "лерге", voiced_cons_not_jr = "дерге", unvoiced_cons = "терге" },
		o_type_vowel = { vowel_or_jr = "лорго", voiced_cons_not_jr = "дорго", unvoiced_cons = "торго" },
		oe_type_vowel = { vowel_or_jr = "лөргө", voiced_cons_not_jr = "дөргө", unvoiced_cons = "төргө" },
	})
	add("acc_s", {
		y_type_vowel = { vowel = "ны", voiced_cons = "ды", unvoiced_cons = "ты" },
		e_type_vowel = { vowel = "ни", voiced_cons = "ди", unvoiced_cons = "ти" },
		u_type_vowel = { vowel = "ну", voiced_cons = "ду", unvoiced_cons = "ту" },
		oe_type_vowel = { vowel = "нү", voiced_cons = "дү", unvoiced_cons = "тү" },
	})
	add("acc_p", {
		a_type_vowel = { vowel_or_jr = "ларды", voiced_cons_not_jr = "дарды", unvoiced_cons = "тарды" },
		e_type_vowel = { vowel_or_jr = "лерди", voiced_cons_not_jr = "дерди", unvoiced_cons = "терди" },
		o_type_vowel = { vowel_or_jr = "лорду", voiced_cons_not_jr = "дорду", unvoiced_cons = "торду" },
		oe_type_vowel = { vowel_or_jr = "лөрдү", voiced_cons_not_jr = "дөрдү", unvoiced_cons = "төрдү" },
	})
	add("loc_s", {
		a_type_vowel = { vowel = "да", voiced_cons = "да", unvoiced_cons = "та" },
		e_type_vowel = { vowel = "де", voiced_cons = "де", unvoiced_cons = "те" },
		o_type_vowel = { vowel = "до", voiced_cons = "до", unvoiced_cons = "то" },
		oe_type_vowel = { vowel = "дө", voiced_cons = "дө", unvoiced_cons = "тө" },
	})
	add("loc_p", {
		a_type_vowel = { vowel_or_jr = "ларда", voiced_cons_not_jr = "дарда", unvoiced_cons = "тарда" },
		e_type_vowel = { vowel_or_jr = "лерде", voiced_cons_not_jr = "дерде", unvoiced_cons = "терде" },
		o_type_vowel = { vowel_or_jr = "лордо", voiced_cons_not_jr = "дордо", unvoiced_cons = "тордо" },
		oe_type_vowel = { vowel_or_jr = "лөрдө", voiced_cons_not_jr = "дөрдө", unvoiced_cons = "төрдө" },
	})
	add("abl_s", {
		a_type_vowel = { vowel = "дан", voiced_cons = "дан", unvoiced_cons = "тан" },
		e_type_vowel = { vowel = "ден", voiced_cons = "ден", unvoiced_cons = "тен" },
		o_type_vowel = { vowel = "дон", voiced_cons = "дон", unvoiced_cons = "тон" },
		oe_type_vowel = { vowel = "дөн", voiced_cons = "дөн", unvoiced_cons = "төн" },
	})
	add("abl_p", {
		a_type_vowel = { vowel_or_jr = "лардан", voiced_cons_not_jr = "дардан", unvoiced_cons = "тардан" },
		e_type_vowel = { vowel_or_jr = "лерден", voiced_cons_not_jr = "дерден", unvoiced_cons = "терден" },
		o_type_vowel = { vowel_or_jr = "лордон", voiced_cons_not_jr = "дордон", unvoiced_cons = "тордон" },
		oe_type_vowel = { vowel_or_jr = "лөрдөн", voiced_cons_not_jr = "дөрдөн", unvoiced_cons = "төрдөн" },
	})
end


-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(decl_spec)
	local cats = {}
	local function insert(cattype)
		m_table.insertIfNot(cats, "Kyrgyz " .. cattype)
	end
	if decl_spec.number == "sg" then
		insert("uncountable nouns")
	elseif decl_spec.number == "pl" then
		insert("pluralia tantum")
	end
	decl_spec.annotation =
		decl_spec.number == "sg" and "sg-only" or
		decl_spec.number == "pl" and "pl-only" or
		""
	decl_spec.categories = cats
end


local function show_forms(decl_spec)
	local lemmas = {}
	if decl_spec.forms.nom_s then
		for _, nom_s in ipairs(decl_spec.forms.nom_s) do
			table.insert(lemmas, nom_s.form)
		end
	elseif decl_spec.forms.nom_p then
		for _, nom_p in ipairs(decl_spec.forms.nom_p) do
			table.insert(lemmas, nom_p.form)
		end
	end
	local props = {
		lang = lang,
		canonicalize = function(form)
			return form
		end,
	}
	iut.show_forms_with_translit(decl_spec.forms, lemmas, output_noun_slots, props)
end


local function make_table(decl_spec)
	local forms = decl_spec.forms

	local table_spec_both = [=[
<div class="NavFrame" style="display: inline-block;min-width: 45em">
<div class="NavHead" style="background:#eff7ff" >{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;min-width:45em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | singular {zhekelik}
! style="background:#d9ebff" | plural {koeptoegoen}
|-
!style="background:#eff7ff"|nominative {atooch}
| {nom_s}
| {nom_p}
|-
!style="background:#eff7ff"|genitive {ilik}
| {gen_s}
| {gen_p}
|-
!style="background:#eff7ff"|dative {barysh}
| {dat_s}
| {dat_p}
|-
!style="background:#eff7ff"|accusative {tabysh}
| {acc_s}
| {acc_p}
|-
!style="background:#eff7ff"|locative {zhatysh}
| {loc_s}
| {loc_p}
|-
!style="background:#eff7ff"|ablative {chygysh}
| {abl_s}
| {abl_p}
|{\cl}</div></div>]=]

	local table_spec_sg = [=[
<div class="NavFrame" style="width:30em">
<div class="NavHead" style="background:#eff7ff">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | singular {zhekelik}
|-
!style="background:#eff7ff"|nominative {atooch}
| {nom_s}
|-
!style="background:#eff7ff"|genitive {ilik}
| {gen_s}
|-
!style="background:#eff7ff"|dative {barysh}
| {dat_s}
|-
!style="background:#eff7ff"|accusative {tabysh}
| {acc_s}
|-
!style="background:#eff7ff"|locative {zhatysh}
| {loc_s}
|-
!style="background:#eff7ff"|ablative {chygysh}
| {abl_s}
|{\cl}</div></div>]=]

	local table_spec_pl = [=[
<div class="NavFrame" style="width:30em">
<div class="NavHead" style="background:#eff7ff">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | plural {koeptoegoen}
|-
!style="background:#eff7ff"|nominative {atooch}
| {nom_p}
|-
!style="background:#eff7ff"|genitive {ilik}
| {gen_p}
|-
!style="background:#eff7ff"|dative {barysh}
| {dat_p}
|-
!style="background:#eff7ff"|accusative {tabysh}
| {acc_p}
|-
!style="background:#eff7ff"|locative {zhatysh}
| {loc_p}
|-
!style="background:#eff7ff"|ablative {chygysh}
| {abl_p}
|{\cl}</div></div>]=]

	if decl_spec.title then
		forms.title = decl_spec.title
	else
		forms.title = 'Declension of <i lang="ky" class="Cyrl">' .. forms.lemma .. '</i>'
	end

	local function make_text_smaller(text)
		return "(<span style=\"font-size: smaller;\">" .. text .. "</span>)"
	end

	local annotation = decl_spec.annotation
	if annotation == "" then
		forms.annotation = ""
	else
		forms.annotation = " " .. make_text_smaller(annotation)
	end

	local function tag_text(text)
		return make_text_smaller(m_script_utilities.tag_text(text, lang))
	end

	-- grammatical terms used in the table
	forms.zhekelik = tag_text("жекелик")
	forms.koeptoegoen = tag_text("көптөгөн")
	forms.atooch = tag_text("атооч")
	forms.ilik = tag_text("илик")
	forms.barysh = tag_text("барыш")
	forms.tabysh = tag_text("табыш")
	forms.zhatysh = tag_text("жатыш")
	forms.chygysh = tag_text("чыгыш")

	local table_spec =
		decl_spec.number == "sg" and table_spec_sg or
		decl_spec.number == "pl" and table_spec_pl or
		table_spec_both
	return m_string_utilities.format(table_spec, forms)
end


-- Externally callable function to parse and decline a noun where all forms
-- are given manually. Return value is WORD_SPEC, an object where the declined
-- forms are in `WORD_SPEC.forms` for each slot. If there are no values for a
-- slot, the slot key will be missing. The value for a given slot is a list of
-- objects {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, number)
	if number ~= "sg" and number ~= "pl" and number ~= "both" then
		error("Internal error: number (arg 1) must be 'sg', 'pl' or 'both': '" .. number .. "'")
	end

	local params = {
		[1] = {},
		title = {},
	}

	local args = m_para.process(parent_args, params)
	local decl_spec = {
		title = args.title,
		forms = {},
		number = number,
	}
	local lemma = args[1] or PAGENAME
	if number == "pl" then
		local sg_lemma = rmatch(lemma, "(.*)[дтл][аеоө]р$")
		if not sg_lemma then
			error("Plural lemma doesn't end with nominative plural ending (-лар, -дер, -тор, etc.): " .. lemma)
		end
		lemma = sg_lemma
	end
	decline_noun(decl_spec, lemma)
	compute_categories_and_annotation(decl_spec)
	return decl_spec
end


-- Entry point for {{ky-decl-noun}}, {{ky-decl-noun-sg}} and {{ky-decl-noun-pl}}.
function export.show(frame)
	local iparams = {
		[1] = {required = true},
	}
	local iargs = m_para.process(frame.args, iparams)
	local parent_args = frame:getParent().args
	local decl_spec = export.do_generate_forms(parent_args, iargs[1])
	show_forms(decl_spec)
	return make_table(decl_spec) .. require("Module:utilities").format_categories(decl_spec.categories, lang)
end

return export