Module:mn-noun
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_table = require( "Module:table" )
local m_links = require( "Module:links" )
local m_str_utils = require("Module:string utilities")
local lang = require( "Module:languages" ).getByCode( "mn" )
local com = require( "Module:mn-common" )
local iut = require( "Module:inflection utilities" )
local put = require("Module:parse utilities")
local m_para = require( "Module:parameters" )
local data = { overrides = {}, forms = {} }
local find = m_str_utils.find
local format = m_str_utils.format
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local u = m_str_utils.char
local FVS = "[" .. com.FVS1 .. com.FVS2 .. com.FVS3 .. com.FVS4 .. "]"
local n = u( 0xFFF0 )
local g = u( 0xFFF1 )
local bor = u( 0xFFF2 )
local Russian = u( 0xFFF3 )
local proper = u( 0xFFF4 )
local nar = u( 0xFFF5 )
local nuud = u( 0xFFF6 )
local chuud = u( 0xFFF7 )
local d = u( 0xFFF8 )
local s = u( 0xFFF9 )
local output_noun_slots = {
attr = "attr|form",
nom_sg = "nom|s",
gen_sg = "gen|s",
acc_sg = "acc|s",
dat_loc_sg = "dat|-|loc|s",
gen_dat_sg = "gen|-|dat|s",
abl_sg = "abl|s",
dat_abl_sg = "dat|-|abl|s",
ins_sg = "ins|s",
com_sg = "com|s",
priv_sg = "priv|s",
dirc_sg = "dirc|s",
nom_pl = "nom|p",
gen_pl = "gen|p",
acc_pl = "acc|p",
dat_loc_pl = "dat|-|loc|p",
gen_dat_pl = "gen|-|dat|p",
abl_pl = "abl|p",
dat_abl_pl = "dat|-|abl|p",
ins_pl = "ins|p",
com_pl = "com|p",
priv_pl = "priv|p",
dirc_pl = "dirc|p",
spos_indgen_sg = "spos|indgen|s",
cpos_indgen_sg = "cpos|indgen|s",
spos_indgen_pl = "spos|indgen|p",
cpos_indgen_pl = "cpos|indgen|p",
spos_indloc_sg = "spos|indloc|s",
cpos_indloc_sg = "cpos|indloc|s",
spos_indloc_pl = "spos|indloc|p",
cpos_indloc_pl = "cpos|indloc|p",
nom_sg_refl = "refl poss|form|of the|nom|s",
gen_sg_refl = "refl poss|form|of the|gen|s",
acc_sg_refl = "refl poss|form|of the|acc|s",
dat_loc_sg_refl = "refl poss|form|of the|dat|-|loc|s",
gen_dat_sg_refl = "refl poss|form|of the|gen|-|dat|s",
abl_sg_refl = "refl poss|form|of the|abl|s",
dat_abl_sg_refl = "refl poss|form|of the|dat|-|abl|s",
ins_sg_refl = "refl poss|form|of the|ins|s",
com_sg_refl = "refl poss|form|of the|com|s",
priv_sg_refl = "refl poss|form|of the|priv|s",
dirc_sg_refl = "refl poss|form|of the|dirc|s",
nom_pl_refl = "refl poss|form|of the|nom|p",
gen_pl_refl = "refl poss|form|of the|gen|p",
acc_pl_refl = "refl poss|form|of the|acc|p",
dat_loc_pl_refl = "refl poss|form|of the|dat|-|loc|p",
gen_dat_pl_refl = "refl poss|form|of the|gen|-|dat|p",
abl_pl_refl = "refl poss|form|of the|abl|p",
dat_abl_pl_refl = "refl poss|form|of the|dat|-|abl|p",
ins_pl_refl = "refl poss|form|of the|ins|p",
com_pl_refl = "refl poss|form|of the|com|p",
priv_pl_refl = "refl poss|form|of the|priv|p",
dirc_pl_refl = "refl poss|form|of the|dirc|p",
nom = "nom",
gen = "gen",
acc = "acc",
dat_loc = "dat|loc",
abl = "abl",
ins = "ins",
com = "com",
priv = "priv",
dirc = "dirc",
nom_refl = "refl poss|form|of the|nom",
gen_refl = "refl poss|form|of the|gen",
acc_refl = "refl poss|form|of the|acc",
dat_loc_refl = "refl poss|form|of the|dat|-|loc",
abl_refl = "refl poss|form|of the|abl",
ins_refl = "refl poss|form|of the|ins",
com_refl = "refl poss|form|of the|com",
priv_refl = "refl poss|form|of the|priv",
dirc_refl = "refl poss|form|of the|dirc",
}
local output_noun_slots_with_linked = m_table.shallowCopy( output_noun_slots )
output_noun_slots_with_linked["nom_sg_linked"] = "nom|s"
output_noun_slots_with_linked["nom_pl_linked"] = "nom|p"
local input_params_to_slots_both = {
[1] = "attr",
[2] = "nom_sg",
[3] = "gen_sg",
[4] = "acc_sg",
[5] = "dat_loc_sg",
[6] = "gen_dat_sg",
[7] = "abl_sg",
[8] = "dat_abl_sg",
[9] = "ins_sg",
[10] = "com_sg",
[11] = "priv_sg",
[12] = "dirc_sg",
[13] = "nom_pl",
[14] = "gen_pl",
[15] = "acc_pl",
[16] = "dat_loc_pl",
[17] = "gen_dat_pl",
[18] = "abl_pl",
[19] = "dat_abl_pl",
[20] = "ins_pl",
[21] = "com_pl",
[22] = "priv_pl",
[23] = "dirc_pl",
[24] = "spos_indgen_sg",
[25] = "cpos_indgen_sg",
[26] = "spos_indgen_pl",
[27] = "cpos_indgen_pl",
[28] = "spos_indloc_sg",
[29] = "cpos_indloc_sg",
[30] = "spos_indloc_pl",
[31] = "cpos_indloc_pl",
[32] = "nom_sg_refl",
[33] = "gen_sg_refl",
[34] = "acc_sg_refl",
[35] = "dat_loc_sg_refl",
[36] = "gen_dat_sg_refl",
[37] = "abl_sg_refl",
[38] = "dat_abl_sg_refl",
[39] = "ins_sg_refl",
[40] = "com_sg_refl",
[41] = "priv_sg_refl",
[42] = "dirc_sg_refl",
[43] = "nom_pl_refl",
[44] = "gen_pl_refl",
[45] = "acc_pl_refl",
[46] = "dat_loc_pl_refl",
[47] = "gen_dat_pl_refl",
[48] = "abl_pl_refl",
[49] = "dat_abl_pl_refl",
[50] = "ins_pl_refl",
[51] = "com_pl_refl",
[52] = "priv_pl_refl",
[53] = "dirc_pl_refl",
}
local input_params_to_slots_sg = {
[1] = "attr",
[2] = "nom_sg",
[3] = "gen_sg",
[4] = "acc_sg",
[5] = "dat_loc_sg",
[6] = "gen_dat_sg",
[7] = "abl_sg",
[8] = "dat_abl_sg",
[9] = "ins_sg",
[10] = "com_sg",
[11] = "priv_sg",
[12] = "dirc_sg",
[13] = "spos_indgen_sg",
[14] = "cpos_indgen_sg",
[15] = "spos_indloc_sg",
[16] = "cpos_indloc_sg",
[17] = "nom_sg_refl",
[18] = "gen_sg_refl",
[19] = "acc_sg_refl",
[20] = "dat_loc_sg_refl",
[21] = "gen_dat_sg_refl",
[22] = "abl_sg_refl",
[23] = "dat_abl_sg_refl",
[24] = "ins_sg_refl",
[25] = "com_sg_refl",
[26] = "priv_sg_refl",
[27] = "dirc_sg_refl",
}
local input_params_to_slots_pl = {
[1] = "attr",
[2] = "nom_pl",
[3] = "gen_pl",
[4] = "acc_pl",
[5] = "dat_loc_pl",
[6] = "gen_dat_pl",
[7] = "abl_pl",
[8] = "dat_abl_pl",
[9] = "ins_pl",
[10] = "com_pl",
[11] = "priv_pl",
[12] = "dirc_pl",
[13] = "spos_indgen_pl",
[14] = "cpos_indgen_pl",
[15] = "spos_indloc_pl",
[16] = "cpos_indloc_pl",
[17] = "nom_pl_refl",
[18] = "gen_pl_refl",
[19] = "acc_pl_refl",
[20] = "dat_loc_pl_refl",
[21] = "gen_dat_pl_refl",
[22] = "abl_pl_refl",
[23] = "dat_abl_pl_refl",
[24] = "ins_pl_refl",
[25] = "com_pl_refl",
[26] = "priv_pl_refl",
[27] = "dirc_pl_refl",
}
local input_params_to_slots_poss = {
[1] = "nom",
[2] = "gen",
[3] = "acc",
[4] = "dat_loc",
[5] = "abl",
[6] = "ins",
[7] = "com",
[8] = "priv",
[9] = "dirc",
[10] = "nom_refl",
[11] = "gen_refl",
[12] = "acc_refl",
[13] = "dat_loc_refl",
[14] = "abl_refl",
[15] = "ins_refl",
[16] = "com_refl",
[17] = "priv_refl",
[18] = "dirc_refl",
}
local cases = {
attr = true,
nom = true,
gen = true,
dat_loc = true,
gen_dat = true,
acc = true,
abl = true,
dat_abl = true,
ins = true,
com = true,
priv = true,
dirc = true,
}
local accented_cases = {
["áttr"] = "attr",
["nóm"] = "nom",
["gén"] = "gen",
["dátloc"] = "dat_loc",
["géndat"] = "gen_dat",
["ácc"] = "acc",
["ábl"] = "abl",
["dátabl"] = "dat_abl",
["íns"] = "ins",
["cóm"] = "com",
["prív"] = "priv",
["dírc"] = "dirc",
}
local function skip_slot( number, slot )
return number == "sg" and find( slot, "_p$" ) or
number == "pl" and find( slot, "_s$" )
end
local function add( data, slot, stem_and_ending, footnotes )
local stem
local ending
if not stem_and_ending then
return
end
if skip_slot( data.number, slot ) then
return
end
if type( stem_and_ending ) == "string" then
stem = stem_and_ending
ending = ""
else
stem = stem_and_ending[1]
ending = stem_and_ending[2]
end
iut.add_forms( data.forms, slot, stem, ending, com.combine_stem_ending, lang )
end
local function process_slot_overrides( data, do_slot )
for slot, overrides in pairs( data.overrides ) do
if skip_slot( data.number, slot ) then
error( "Override specified for invalid slot '" .. slot .. "' due to '" .. data.number .. "' number restriction" )
end
if do_slot( slot ) then
data.forms[slot] = nil
local slot_is_plural = find( slot, "_p$" )
for _, override in ipairs( overrides ) do
for _, value in ipairs( override.values ) do
local form = value.form
local combined_notes = iut.combine_footnotes( data.footnotes, value.footnotes )
end
end
end
end
end
local function preprocess_inflection( infl )
local params = {}
if match( infl, n ) then params.n = true
elseif match( infl, g ) then params.g = true end
if match( infl, Russian ) then params.bor = "Russian"
elseif match( infl, bor ) then params.bor = true end
if match( infl, proper ) then params.proper = true end
if match( infl, nar ) then params.nar = true
elseif match( infl, nuud ) then params.nuud = true
elseif match( infl, chuud ) then params.chuud = true
elseif match( infl, d ) then params.d = true
elseif match( infl, s ) then params.s = true end
infl = gsub( infl, "[" .. n .. g .. bor .. Russian .. proper .. nar .. nuud .. chuud .. d .. s .. "]", "" )
local infl_orig = gsub( infl, com.stem_barrier, "" )
return infl, params, infl_orig
end
local function vowelharmony( infl, data )
return com.vowelharmony( infl, data )[#com.vowelharmony( infl, data )]
end
local function propernoun( infl )
if sub( infl, 1, 1 ) ~= lower( sub( infl, 1, 1 ) ) then
return true
else
return false
end
end
local function insert_stem_barrier( data )
local syllables = com.syllables( data.lemma )
if #syllables > 1 and data.bor then
syllables[#syllables] = syllables[#syllables] .. com.stem_barrier
syllables[#syllables] = gsub(syllables[#syllables], "([иь])" .. com.stem_barrier, com.stem_barrier .. "%1")
else
if #syllables > 1 then
syllables[#syllables-1] = syllables[#syllables-1] .. com.stem_barrier
end
end
return table.concat( syllables )
end
local function hidden_n( infl, params, vh )
infl = gsub( infl, n, "" )
local vh = vh or vowelharmony( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local matches = {
{ { match( infl_no_stress, "[аеёийоөуүыэюя]$" ) },
"н"
},
{ { match( infl_no_stress, "[жчшщь]$" ) },
"ин"
},
{ { match( infl_no_stress, "[бвгдзклмнпрстфхц]$" ) },
vh.Cyrl.a .. "н"
},
{ { match( infl_no_stress, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
"ᠨ"
},
{ { match( infl_no_stress, "[ᠨ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
vh.Mong.u .. "ᠨ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m then
infl = gsub( infl, com.MVS, "" )
if match( infl_no_stress, "ь$" ) then
return sub( infl, 1, len( infl ) - 1 ) .. t[2], {}
else
return infl .. t[2], {}
end
end
end
end
end
local function hidden_g( infl )
infl = gsub( infl, g, "" )
return infl .. "г"
end
local function attributive( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.n then
infl = hidden_n( infl, params, vh )
end
return com.remove_penultimate_short_vowel( infl, params )
end
local function plural( infl, vh )
local infl, params, infl_orig = preprocess_inflection( infl )
if (params.n or params.nuud) and not (params.chuud or params.d or params.s) then infl = hidden_n( infl, params, vh )
elseif params.g then infl = hidden_g( infl ) end
local vh = vh or vowelharmony( infl, params )
local infl_no_fv, no_fv = com.remove_final_short_vowel( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local infl_no_fv_no_stress = gsub( (lang:makeEntryName( infl_no_fv )), com.stem_barrier, "" )
local matches = {
{ { params.nar and not match( infl_no_stress, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) },
" [[нар"
},
{ { params.chuud and not match( infl_no_stress, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) },
"ч" .. vh.Cyrl.uu .. "д"
},
{ {
params.s and match( infl_no_stress, "[аеёийоөуүыэюя]$" ),
params.s and match( infl_no_stress, "[влмнр]ь?$" ),
params.s and match( infl_no_stress, "[аеёийоөуүэюяы][бгкпф]ь?$" )
},
"с"
},
{ {
params.s and match( infl_no_stress, "[жчшщ]$" ),
params.s and match( infl, "[джзстхцчшщ]ь$" )
},
"ис"
},
{ { params.s and match( infl_no_stress, "[бгдзкпстфхц]$" ) },
vh.Cyrl.a .. "с"
},
{ {
params.d,
match( infl_no_stress, "ч.н$" ),
match( infl_no_stress, "ён$" )
},
"д"
},
{ { match( infl_no_stress, "[йы]$" ) },
"н" .. vh.Cyrl.uu .. "д"
},
{ {
match( infl_no_stress, "[аеёийоөуүэюя][аоөуүэ]$" ),
match( infl_no_fv_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөуүэ]$" ), -- C[уү], and also loanwords with a word-final long vowel written as a single vowel
match( infl_no_stress, "[еёюя]$" ) and params.bor
},
"г" .. vh.Cyrl.uu .. "д"
},
{ { match( infl_no_stress, "[иь]$" ) },
"и" .. vh.Cyrl.u .. "д"
},
{ { match( infl_no_stress, "[еёюя]$" ) },
vh.Cyrl.u .. "д"
},
{ { match( infl_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөэ]?$" ) },
vh.Cyrl.uu .. "д"
},
{ { params.nar and match( infl_no_stress, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) },
" [[ᠨᠠᠷ"
},
{ { params.chuud and match( infl_no_stress, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) },
"ᠴ" .. vh.Mong.u .. "ᠳ"
},
{ {
params.d,
match( infl_orig, "^ᠠᠮᠢᠲᠠᠨ$" ),
match( infl_orig, "^ᠬᠠᠭᠠᠨ$" ),
match( infl_orig, "^ᠬᠥᠪᠡᠭᠦᠨ$" ),
match( infl_orig, "[ᠴᠶ][ᠠᠡᠢᠣᠤᠥᠦᠧᠶ]ᠨ$" )
},
"ᠳ"
},
{ { params.s and match( infl_no_stress, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
"ᠰ"
},
{ { params.s and match( infl_no_stress, "[ᠨ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
vh.Mong.u .. "ᠰ"
},
{ { match( infl_no_stress, "[ᠠ-ᠨᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠨ" .. vh.Mong.u .. "ᠭ" .. vh.Mong.u .. "ᠳ"
},
{ { match( infl_no_stress, "[ᠩ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. vh.Mong.u .. "ᠳ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if match( infl_no_stress, "^хүн$" ) or match( infl_no_stress, "^хүмүүн$" ) then
if data.plural and data.plural[1] == "unknown" then
data.plural = {"irreg"}
end
return "хүмүүс"
elseif match( infl_no_stress, "^ᠬᠦᠮᠦᠨ$") then
if data.plural and data.plural[1] == "unknown" then
data.plural = {"irreg"}
end
return "ᠬᠦᠮᠦᠰ"
elseif t[2] == "д" or t[2] == "ᠳ" then
if data.plural and data.plural[1] == "unknown" then
data.plural = {"d"}
end
return sub( infl, 1, len( infl ) - 1 ) .. t[2]
elseif t[2] == "ᠰ" then
infl = gsub( infl, com.MVS, "" )
return infl .. t[2]
elseif t[2]:find("^ ") then
return infl .. t[2]
else
if data.plural and data.plural[1] == "unknown" then
data.plural = {"uud"}
end
if m and no_fv then
return infl_no_fv .. t[2]
elseif m then
return com.remove_penultimate_short_vowel( infl .. t[2], params )
end
end
end
end
end
local function genitive( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.n then infl, params = hidden_n( infl, params, vh )
elseif params.g then infl = hidden_g( infl ) end
local vh = vh or vowelharmony( infl, params )
local infl_no_fv, no_fv = com.remove_final_short_vowel( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local infl_no_fv_no_stress = gsub( (lang:makeEntryName( infl_no_fv )), com.stem_barrier, "" )
local matches = {
{ { match( infl_no_stress, "н$" ) },
vh.Cyrl.ii
},
{ {
match( infl_no_stress, "[йы]$" ),
match( infl_no_fv_no_stress, "и$" )
},
"н"
},
{ {
match( infl_no_stress, "[аеёийоөуүэюя][аоөуүэ]$" ),
match( infl_no_fv_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөуүэ]$" ), -- C[уү], and also loanwords with a word-final long vowel written as a single vowel
match( infl_no_stress, "[еёюя]$" ) and params.bor
},
"гийн"
},
{ { match( infl_no_stress, "[гжикчшщь]$" ) },
"ийн"
},
{ {
match( infl_no_stress, "[бвдзлмпрстфхц]$" ),
match( infl_no_stress, "[еёюя]$" ),
match( infl_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөэ]$" ) -- not C[уү]
},
vh.Cyrl.ii .. "н"
},
{ { match( infl_no_stress, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠶᠢᠨ"
},
{ { match( infl_no_stress, "ᠨ" .. FVS .. "?$" ) },
com.NNBSP .. vh.Mong.u
},
{ { match( infl_no_stress, "[ᠩ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. vh.Mong.u .. "ᠨ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m and no_fv then
return infl_no_fv .. t[2]
elseif m then
return com.remove_penultimate_short_vowel( infl .. t[2], params )
end
end
end
end
local function accusative( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.n then infl = gsub( infl, n, "" )
elseif params.g then return infl .. "г" end
local vh = vh or vowelharmony( infl, params )
local infl_no_fv, no_fv = com.remove_final_short_vowel( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local infl_no_fv_no_stress = gsub( (lang:makeEntryName( infl_no_fv )), com.stem_barrier, "" )
local matches = {
{ {
match( infl_no_stress, "[йы]$" ),
match( infl_no_stress, "[аеёийоөуүэюя][аоөуүэ]$" ),
match( infl_no_fv_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөуүэ]$" ), -- C[уү], and also loanwords with a word-final long vowel written as a single vowel
match( infl_no_fv_no_stress, "и$" ),
match( infl_no_stress, "[еёюя]$" ) and params.bor
},
"г"
},
{ { match( infl_no_stress, "[гжикчшщь]$" ) },
"ийг"
},
{ {
match( infl_no_stress, "[бвдзлмнпрстфхц]$" ),
match( infl_no_stress, "[еёюя]$" ),
match( infl_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөэ]$" ) -- not C[уү]
},
vh.Cyrl.ii .. "г"
},
{ { match( infl_no_stress, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠶᠢ"
},
{ { match( infl_no_stress, "[ᠨ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠢ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m and no_fv then
return infl_no_fv .. t[2]
elseif m then
return com.remove_penultimate_short_vowel( infl .. t[2], params )
end
end
end
end
local function dative_locative( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.n then
infl = hidden_n( infl, params, vh )
end
local vh = vh or vowelharmony( infl, params )
local syllable_count = #com.syllables( infl )
local infl_no_fv = com.remove_final_short_vowel( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local infl_no_fv_no_stress = gsub( (lang:makeEntryName( infl_no_fv )), com.stem_barrier, "" )
local vowel = "[аеёийоөуүэюяы]"
local consonant = "[бвгджзклмнпрстфхцчшщ]"
local matches = {
{ {
match( infl_no_stress, "^гэр$" ),
match( infl_no_stress, "[гкф]$" ) or match( infl_no_stress, "[^бвгклмнф]п$" ),
( match( infl_no_stress, "в$" ) or match( infl_no_stress, "[^вгклмнпф]б$" ) ) and ( syllable_count ~= 1 or match( infl_no_stress, "^" .. consonant .. consonant ) ),
match( infl_no_stress, vowel .. vowel .. "[рс]$" ) or match( infl_no_stress, consonant .. "р$" ),
( match( infl_no_stress, "р$" ) or match( infl_no_stress, vowel .. "с$" ) ) and ( syllable_count ~= 1 or params.proper ),
syllable_count > 1 and match( infl_no_stress, "[уүю]" .. "[бгкпсф]ь?с$" ) and not match( infl_no_stress, "[уүю][уүю]" .. "[бгкпсф]ь?с$" ),
match( infl_no_stress, "[аеёийоөэяы]" .. "[бгкпсф]ь?с$" ),
match( infl_no_stress, vowel .. "сь?с$" ),
match( infl_no_stress, "[влмнр]ьс$" ),
match( infl_no_stress, "и[влмнр]с$" ),
},
"т"
},
{ {
match( infl_no_stress, vowel .. "$" ),
match( infl_no_stress, "[^вгклмнпф]б$" ),
match( infl_no_stress, "[влмнр]$" ),
match( infl_no_stress, "[бвгклмнпрф]ь$" ),
},
"д"
},
{ {
match( infl_no_stress, "[жчшщ]ь?$" ),
match( infl_no_stress, "[дзстфхц]ь$" )
},
"ид"
},
{ { match( infl_no_stress, "[ᠪ-ᠭᠰ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠲ" .. vh.Mong.u
},
{ { match( infl_no_stress, "[ᠠ-ᠩᠮᠯᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠳ" .. vh.Mong.u
},
{ { match( infl_no_stress, ".$" ) },
vh.Cyrl.a .. "д"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m then
if match( infl_no_stress, "[джзстхцчшщ]ь$" ) then
return infl_no_fv .. t[2]
else
return com.remove_penultimate_short_vowel( infl .. t[2], params )
end
end
end
end
end
local function aa( infl, params )
local vh = vh or vowelharmony( infl, params )
local infl_no_fv, no_fv = com.remove_final_short_vowel( infl, params )
local infl_no_stress = gsub( (lang:makeEntryName( infl )), com.stem_barrier, "" )
local infl_no_fv_no_stress = gsub( (lang:makeEntryName( infl_no_fv )), com.stem_barrier, "" )
local matches = {
{ {
match( infl_no_stress, "[йы]$" ),
match( infl_no_stress, "[аеёийоөуүэюя][аоөуүэ]$" ),
match( infl_no_fv_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөуүэ]$" ), -- C[уү], and also loanwords with a word-final long vowel written as a single vowel
match( infl_no_fv_no_stress, "и$" ),
match( infl_no_stress, "[еёюя]$" ) and params.bor
},
"г" .. vh.Cyrl.aa
},
{ { match( infl_no_stress, "[еёюя]$" ) },
vh.Cyrl.a
},
{ { match( infl_no_stress, "[иь]$" ) },
"и" .. vh.Cyrl.a
},
{ {
match( infl_no_stress, "[бвгджзклмнпрстфхцчшщ]$" ),
match( infl_no_stress, "[бвгджзклмнпрстфхцчшщ][аоөэ]$" ) -- not C[уү]
},
vh.Cyrl.aa
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m and no_fv then
return infl_no_fv .. t[2]
elseif m then
return infl .. t[2]
end
end
end
end
local function ablative( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.n then infl, params = hidden_n( infl, params, vh )
elseif params.g then infl = hidden_g( infl ) end
local vh = vh or vowelharmony( infl, params )
if match( infl, com.NNBSP .. "[ᠲᠳ]" .. vh.Mong.u .. "$" ) then
return sub( infl, 1, len( infl ) - 1 ) .. vh.Mong.a .. "ᠴ" .. vh.Mong.a
elseif match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. vh.Mong.a .. "ᠴ" .. vh.Mong.a
else
return com.remove_penultimate_short_vowel( aa( infl, params ) .. "с", params )
end
end
local function instrumental( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.g then infl = hidden_g( infl ) end
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
local matches = {
{ { match( infl, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠪ" .. vh.Mong.a .. "ᠷ"
},
{ { match( infl, "[ᠨ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠢᠶ" .. vh.Mong.a .. "ᠷ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m then return infl .. t[2] end
end
end
else
return com.remove_penultimate_short_vowel( aa( infl, params ) .. "р", params )
end
end
local function reflexive( infl, vh )
local infl, params = preprocess_inflection( infl )
if params.g then infl = hidden_g( infl ) end
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
local matches = {
{ { match( infl, com.NNBSP .. "ᠶ?ᠢ$" ) },
"ᠶ" .. vh.Mong.u .. "ᠭ" .. vh.Mong.a .. "ᠨ"
},
{ { match( infl, com.NNBSP .. "[ᠲᠳ]" .. vh.Mong.u .. "$" ) },
vh.Mong.a .. "ᠭ" .. vh.Mong.a .. "ᠨ"
},
{ { match( infl, com.NNBSP .. vh.Mong.a .. "ᠴ" .. vh.Mong.a .. "$" ) },
"ᠭ" .. vh.Mong.a .. "ᠨ"
},
{ { match( infl, com.NNBSP .. "ᠲ" .. vh.Mong.a .. "ᠢ$" ) },
"ᠶᠢᠭ" .. vh.Mong.a .. "ᠨ"
},
{ { match( infl, "[ᠠ-ᠧᠶ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠪ" .. vh.Mong.a .. "ᠨ"
},
{ { match( infl, "[ᠨ-ᠵᠷ-ᡂᡸ]" .. FVS .. "?$" ) },
com.NNBSP .. "ᠢᠶ" .. vh.Mong.a .. "ᠨ"
}
}
for s,t in ipairs( matches ) do
for _,m in pairs( t[1] ) do
if m then
if match( infl, com.NNBSP .. "ᠶᠢ$" ) then
return sub( infl, 1, len( infl ) - 2 ) .. t[2]
elseif match( infl, com.NNBSP .. "ᠢ$" ) or match( infl, com.NNBSP .. "[ᠲᠳ]" .. vh.Mong.u .. "$" ) or match( infl, com.NNBSP .. "ᠲ" .. vh.Mong.a .. "ᠢ$" ) then
return sub( infl, 1, len( infl ) - 1 ) .. t[2]
else
return infl .. t[2]
end
end
end
end
else
return com.remove_penultimate_short_vowel( aa( infl, params ), params ) .. n -- unstable nasal
end
end
local function comitative( infl, vh )
local infl, params = preprocess_inflection( infl )
local vh = vh or vowelharmony( infl, params )
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. "ᠲ" .. vh.Mong.a .. "ᠢ"
elseif match( infl, "[джзстхцчшщ]ь$" ) then
return sub( infl, 1, len( infl ) - 1 ) .. "ит" .. vh.Cyrl.ai
else
return com.remove_penultimate_short_vowel( infl .. "т" .. vh.Cyrl.ai, params )
end
end
local function privative( infl )
local infl = preprocess_inflection( infl )
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. "ᠦᠭᠡᠢ"
else
return infl .. "гүй"
end
end
local function directive( infl, vh )
local infl, params = preprocess_inflection( infl )
local vh = vh or vowelharmony( infl, params )
local _, brcount1 = gsub( infl, " %[%[", "")
local __, brcount2 = gsub( infl, " %]%]", "")
if brcount1 > brcount2 then infl = infl .. "]]" end
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. "ᠤᠷᠤᠭᠤ"
elseif match( infl, "рь?" .. com.stem_barrier .. "?$" ) then
return infl .. " [[л" .. vh.Cyrl.uu
else
return infl .. " [[р" .. vh.Cyrl.uu
end
end
local function equative( infl )
end
local function singular_independent_genitive( infl )
local infl = preprocess_inflection( infl )
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. "ᠬᠢ"
else
return infl .. "х"
end
end
local function collective_independent_genitive( infl, vh )
local infl, params = preprocess_inflection( infl )
local vh = vh or vowelharmony( infl, params )
if match( infl, "[ᠠ-ᡂᡸ]" .. FVS .. "?$" ) then
return infl .. com.NNBSP .. "ᠬᠢᠨ"
else
return infl .. "х" .. vh.Cyrl.a .. "н"
end
end
local function handle_derived_slots_and_overrides( data )
local orig_lemma = data.lemma
data.lemma = insert_stem_barrier( data )
if data.decl == "n" then data.lemma = data.lemma .. n end
if data.decl == "g" then data.lemma = data.lemma .. g end
if data.bor == "Russian" then
data.lemma = data.lemma .. Russian
elseif data.bor then
data.lemma = data.lemma .. bor
end
if data.proper then data.lemma = data.lemma .. proper end
if data.number ~= "spos" and data.number ~= "cpos" then
add( data, "attr", attributive( data.lemma ) )
add( data, "nom_sg", data.lemma )
add( data, "gen_sg", genitive( data.lemma ) )
add( data, "acc_sg", accusative( data.lemma ) )
add( data, "dat_loc_sg", dative_locative( data.lemma ) )
add( data, "abl_sg", ablative( data.lemma ) )
add( data, "ins_sg", instrumental( data.lemma ) )
add( data, "com_sg", comitative( data.lemma ) )
add( data, "priv_sg", privative( data.lemma ) )
add( data, "dirc_sg", directive( data.lemma ) )
if not data.plural then data.plural = {"unknown"} end
for i, pl in ipairs(data.plural) do
local modified_lemma
if pl == "nar" then
modified_lemma = data.lemma .. nar
elseif pl == "nuud" then
modified_lemma = data.lemma .. nuud
elseif pl == "chuud" then
modified_lemma = data.lemma .. chuud
elseif pl == "d" then
modified_lemma = data.lemma .. d
elseif pl == "s" then
modified_lemma = data.lemma .. s
elseif pl == "uud" or "irreg" or "unknown" then
modified_lemma = data.lemma
end
add( data, "nom_pl", plural( modified_lemma ) )
add( data, "gen_pl", genitive( data.forms["nom_pl"][i].form ) )
add( data, "acc_pl", accusative( data.forms["nom_pl"][i].form ) )
add( data, "dat_loc_pl", dative_locative( data.forms["nom_pl"][i].form ) )
add( data, "abl_pl", ablative( data.forms["nom_pl"][i].form ) )
add( data, "ins_pl", instrumental( data.forms["nom_pl"][i].form ) )
add( data, "com_pl", comitative( data.forms["nom_pl"][i].form ) )
add( data, "priv_pl", privative( data.forms["nom_pl"][i].form ) )
add( data, "dirc_pl", directive( data.forms["nom_pl"][i].form ) )
end
add( data, "spos_indgen_sg", singular_independent_genitive( data.forms["gen_sg"][1].form ) )
add( data, "cpos_indgen_sg", collective_independent_genitive( data.forms["gen_sg"][1].form ) )
for i, pl in ipairs(data.plural) do
add( data, "spos_indgen_pl", singular_independent_genitive( data.forms["gen_pl"][i].form ) )
add( data, "cpos_indgen_pl", collective_independent_genitive( data.forms["gen_pl"][i].form ) )
end
--"spos_indloc_sg",
--"cpos_indloc_sg",
--for i, pl in ipairs(data.plural) do
--"spos_indloc_pl",
--"cpos_indloc_pl",
--end
add( data, "nom_sg_refl", reflexive( data.forms["nom_sg"][1].form ) )
add( data, "gen_sg_refl", reflexive( data.forms["spos_indgen_sg"][1].form ) )
add( data, "acc_sg_refl", reflexive( data.forms["acc_sg"][1].form ) )
add( data, "dat_loc_sg_refl", reflexive( data.forms["dat_loc_sg"][1].form ) )
add( data, "abl_sg_refl", reflexive( data.forms["abl_sg"][1].form ) )
add( data, "ins_sg_refl", reflexive( data.forms["ins_sg"][1].form ) )
add( data, "com_sg_refl", reflexive( data.forms["com_sg"][1].form ) )
add( data, "priv_sg_refl", reflexive( data.forms["priv_sg"][1].form ) )
add( data, "dirc_sg_refl", reflexive( data.forms["dirc_sg"][1].form ) )
for i, pl in ipairs(data.plural) do
add( data, "nom_pl_refl", reflexive( data.forms["nom_pl"][i].form ) )
add( data, "gen_pl_refl", reflexive( data.forms["spos_indgen_pl"][i].form ) )
add( data, "acc_pl_refl", reflexive( data.forms["acc_pl"][i].form ) )
add( data, "dat_loc_pl_refl", reflexive( data.forms["dat_loc_pl"][i].form ) )
add( data, "abl_pl_refl", reflexive( data.forms["abl_pl"][i].form ) )
add( data, "ins_pl_refl", reflexive( data.forms["ins_pl"][i].form ) )
add( data, "com_pl_refl", reflexive( data.forms["com_pl"][i].form ) )
add( data, "priv_pl_refl", reflexive( data.forms["priv_pl"][i].form ) )
add( data, "dirc_pl_refl", reflexive( data.forms["dirc_pl"][i].form ) )
end
if data.gen_dat then
add( data, "gen_dat_sg", dative_locative( data.forms["gen_sg"][1].form ) )
add( data, "gen_dat_sg_refl", reflexive( data.forms["gen_dat_sg"][1].form ) )
for i, pl in ipairs(data.plural) do
add( data, "gen_dat_pl", dative_locative( data.forms["gen_pl"][i].form ) )
add( data, "gen_dat_pl_refl", reflexive( data.forms["gen_dat_pl"][i].form ) )
end
end
if data.dat_abl then
add( data, "dat_abl_sg", ablative( data.forms["dat_loc_sg"][1].form ) )
add( data, "dat_abl_sg_refl", reflexive( data.forms["dat_abl_sg"][1].form ) )
end
else
add( data, "nom", data.lemma )
if data.number == "cpos" then
add( data, "gen", genitive( data.lemma ) )
end
add( data, "acc", accusative( data.lemma ) )
add( data, "dat_loc", dative_locative( data.lemma ) )
add( data, "abl", ablative( data.lemma ) )
add( data, "ins", instrumental( data.lemma ) )
add( data, "com", comitative( data.lemma ) )
add( data, "priv", privative( data.lemma ) )
add( data, "dirc", directive( data.lemma ) )
add( data, "nom_refl", reflexive( data.forms["nom"][1].form ) )
if data.number == "cpos" then
add( data, "gen_refl", reflexive( data.forms["gen"][1].form ) )
end
add( data, "acc_refl", reflexive( data.forms["acc"][1].form ) )
add( data, "dat_loc_refl", reflexive( data.forms["dat_loc"][1].form ) )
add( data, "abl_refl", reflexive( data.forms["abl"][1].form ) )
add( data, "ins_refl", reflexive( data.forms["ins"][1].form ) )
add( data, "com_refl", reflexive( data.forms["com"][1].form ) )
add( data, "priv_refl", reflexive( data.forms["priv"][1].form ) )
add( data, "dirc_refl", reflexive( data.forms["dirc"][1].form ) )
end
for _,form in pairs( data.forms ) do
for _,variant in ipairs( form ) do
variant.form = gsub( variant.form, "[" .. n .. g .. bor .. Russian .. proper .. nar .. nuud .. chuud .. d .. s .. com.stem_barrier .. "]", "" )
if match( gsub( variant.form, data.orig_lemma, "" ), " " ) then variant.form = variant.form .. "]]" end
end
end
-- Compute linked versions of potential lemma slots, for use in {{mn-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs( { "nom_sg", "nom_pl" } ) do
iut.insert_forms( data.forms, slot .. "_linked", iut.map_forms( data.forms[slot], function( form )
if form == data.orig_lemma_no_links and find( data.orig_lemma, "%[%[" ) then
return data.orig_lemma
else
return form
end
end ) )
end
end
local function fetch_footnotes( separated_group )
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error( "Extraneous text after bracketed footnotes: '" .. table.concat( separated_group ) .. "'" )
end
if not footnotes then
footnotes = {}
end
table.insert( footnotes, separated_group[j] )
end
return footnotes
end
local function parse_override( segments, case )
local retval = { values = {} }
local part = segments[1]
if cases[case] then
-- ok
elseif accented_cases[case] then
case = accented_cases[case]
retval.stemstressed = true
else
error("Internal error: unrecognized case in override: '" .. table.concat(segments) .. "'")
end
local rest = sub( part, len(case)+1, len(case)+3 )
local slot
if find( rest, "^pl" ) then
rest = gsub( rest, "^pl", "" )
slot = case .. "_pl"
else
slot = case .. "_sl"
end
if find( rest, "^:" ) then
retval.full = true
rest = gsub( rest, "^:", "" )
end
segments[1] = rest
local colon_separated_groups = put.split_alternating_runs( segments, ":" )
for i, colon_separated_group in ipairs( colon_separated_groups ) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error( "Use - to indicate an empty ending for slot '" .. slot .. "': '" .. table.concat( segments .. "'" ) )
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes( colon_separated_group )
table.insert( retval.values, value )
end
return slot, retval
end
local function parse_indicator_spec( angle_bracket_spec )
local inside = match( angle_bracket_spec, "^<(.*)>$" )
if inside ~= "" then
local segments = put.parse_balanced_segment_run( inside, "[", "]" )
local dot_separated_groups = put.split_alternating_runs( segments, "%." )
for i, dot_separated_group in ipairs( dot_separated_groups ) do
local part = dot_separated_group[1]
local case_prefix
for case,_ in pairs( cases ) do
if match( part, "^" .. case .. "[:$]" ) then
case_prefix = match( part, "^" .. case )
local slot, override = parse_override( dot_separated_group, case_prefix )
if data.overrides[slot] then
table.insert( data.overrides[slot], override )
else
data.overrides[slot] = { override }
end
end
end
if case_prefix ~= nil then
elseif part == "" then
if #dot_separated_group == 1 then
error( "Blank indicator: '" .. inside .. "'" )
end
data.footnotes = fetch_footnotes( dot_separated_group )
elseif part == "r" or part == "n" or part == "g" then
if data.decl then
error( "Can't specify declension twice: '" .. inside .. "'" )
end
data.decl = part
elseif part == "sg" or part == "pl" or part == "spos" or part == "cpos" then
if data.number then
error( "Can't specify number twice: '" .. inside .. "'" )
end
data.number = part
elseif part == "uud" or part == "nar" or part == "nuud" or part == "chuud" or part == "d" or part == "s" then
if not data.plural then data.plural = {} end
table.insert(data.plural, part)
elseif part == "gen_dat" then
if data.gen_dat then
error( "Can't specify genitive-dative twice: '" .. inside .. "'" )
end
data.gen_dat = true
elseif part == "dat_abl" then
if data.dat_abl then
error( "Can't specify dative-ablative twice: '" .. inside .. "'" )
end
data.dat_abl = true
elseif part == "а" or part == "о" or part == "ө" or part == "э" then
if data.vh_override then
error( "Can't specify vowel harmony twice: '" .. inside .. "'" )
end
data.vh_override = part
elseif part == "bor" or part == "Russian" then
if data.bor then
error( "Can't specify borrowing twice: '" .. inside .. "'" )
end
data.bor = part
elseif part == "proper" then
if data.proper then
error( "Can't specify proper noun twice: '" .. inside .. "'" )
end
data.proper = true
else
error( "Unrecognized indicator '" .. part .. "': '" .. inside .. "'" )
end
end
else
error( "Blank indicator: '" .. inside .. "'" )
end
return data
end
local function set_defaults_and_check_bad_indicators( data )
-- Set default values.
if not data.adj then
if data.proper then
data.number = data.number or "sg"
else
data.number = data.number or "both"
end
end
end
local function check_indicators_match_lemma( data )
-- Check for indicators that don't make sense given the context.
if data.decl == "n" and match( data.lemma, "н$" ) then
error( "Hidden-n declension cannot be specified with a lemma ending in н" )
end
if data.decl == "g" and not match( data.lemma, "н$" ) then
error( "Hidden-g declension can only be specified with a lemma ending in н" )
end
end
local function detect_indicator_spec( data )
if propernoun( data.lemma ) then data.proper = true end
set_defaults_and_check_bad_indicators( data )
check_indicators_match_lemma( data )
end
local function detect_all_indicator_specs( alternant_multiword_spec )
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs( alternant_multiword_spec, function( data )
detect_indicator_spec( data )
data.multiword = is_multiword
end )
end
local propagate_multiword_properties
local function propagate_alternant_properties( alternant_spec, property, mixed_value, nouns_only )
local seen_property
for _, multiword_spec in ipairs( alternant_spec.alternants ) do
propagate_multiword_properties( multiword_spec, property, mixed_value, nouns_only )
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function( multiword_spec, property, mixed_value, nouns_only )
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties( word_specs[i], property, mixed_value )
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = not word_specs[i].adj
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error( "Internal error: noun-type word spec without " .. property .. " set" )
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward( alternant_multiword_spec, property, default_propval )
local propval1 = alternant_multiword_spec[property] or default_propval
for _, alternant_or_word_spec in ipairs( alternant_multiword_spec.alternant_or_word_specs ) do
local propval2 = alternant_or_word_spec[property] or propval1
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs( alternant_or_word_spec.alternants ) do
local propval3 = multiword_spec[property] or propval2
for _, word_spec in ipairs( multiword_spec.word_specs ) do
local propval4 = word_spec[property] or propval3
if propval4 == "mixed" then
error( "Attempt to assign mixed " .. property .. " to word" )
end
word_spec[property] = propval4
end
end
else
if propval2 == "mixed" then
error( "Attempt to assign mixed " .. property .. " to word" )
end
alternant_or_word_spec[property] = propval2
end
end
end
local function propagate_properties( alternant_multiword_spec, property, default_propval, mixed_value )
propagate_multiword_properties( alternant_multiword_spec, property, mixed_value, "nouns only" )
propagate_multiword_properties( alternant_multiword_spec, property, mixed_value, false )
propagate_properties_downward( alternant_multiword_spec, property, default_propval )
end
local function normalize_all_lemmas( alternant_multiword_spec )
iut.map_word_specs( alternant_multiword_spec, function( data )
data.orig_lemma = data.lemma
data.orig_lemma_no_links = m_links.remove_links( data.lemma )
data.lemma = data.orig_lemma_no_links
end )
end
local function compute_categories_and_annotation( alternant_multiword_spec )
local cats = {}
local function insert( cattype )
m_table.insertIfNot( cats, "Mongolian " .. cattype )
end
if alternant_multiword_spec.pos == "noun" then
if alternant_multiword_spec.number == "sg" then
insert( "uncountable nouns" )
elseif alternant_multiword_spec.number == "pl" then
insert( "pluralia tantum" )
end
end
local annotation
if alternant_multiword_spec.manual then
alternant_multiword_spec.annotation =
alternant_multiword_spec.number == "sg" and "sg-only" or
alternant_multiword_spec.number == "pl" and "pl-only" or
alternant_multiword_spec.number == "spos" and "sg poss" or
alternant_multiword_spec.number == "cpos" and "col poss" or
""
else
local annparts = {}
local bor = nil
local decl = {}
local irregs = {}
local stems = {}
local reducible = nil
local vh = {}
local function do_word_spec( data )
m_table.insertIfNot( vh, vowelharmony( data.lemma, data ).Cyrl.a )
insert( vowelharmony( data.lemma, data ).Cyrl.a .. "-harmonic nouns" )
if data.decl == "r" then
m_table.insertIfNot( decl, "regular" )
insert( "regular declension nouns" )
elseif data.decl == "n" then
m_table.insertIfNot( decl, "hidden-n" )
insert( "hidden-n declension nouns" )
elseif data.decl == "g" then
m_table.insertIfNot( decl, "hidden-g" )
insert( "hidden-g declension nouns" )
end
if data.bor then bor = true end
--if voweldeletion( data.lemma, data ) ~= data.lemma then reducible = true end
pl = {}
if data.number == "pl" or data.number == "both" then
for i, plural in ipairs(data.plural) do
if plural == "uud" then
table.insert(pl, vowelharmony( data.lemma, data ).Cyrl.uu .. "д-pl")
elseif plural == "nar" then
table.insert(pl, "нар-pl")
elseif plural == "nuud" then
table.insert(pl, "н" .. vowelharmony( data.lemma, data ).Cyrl.uu .. "д-pl")
elseif plural == "chuud" then
table.insert(pl, "ч" .. vowelharmony( data.lemma, data ).Cyrl.uu .. "д-pl")
elseif plural == "d" then
table.insert(pl, "д-pl")
elseif plural == "s" then
table.insert(pl, "с-pl")
elseif plural == "irreg" then
table.insert(pl, "irreg pl")
end
end
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs( alternant_or_word_spec.alternants ) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec( multiword_spec.word_specs[key_entry] )
end
end
else
do_word_spec( alternant_or_word_spec )
end
end
if alternant_multiword_spec.pos == "proper noun" then
table.insert( annparts, "proper" )
elseif alternant_multiword_spec.number == "sg" then
table.insert( annparts, "sg-only" )
elseif alternant_multiword_spec.number == "pl" then
table.insert( annparts, "pl-only" )
elseif alternant_multiword_spec.number == "spos" then
table.insert( annparts, "sg poss" )
elseif alternant_multiword_spec.number == "cpos" then
table.insert( annparts, "col poss" )
end
if #vh > 0 then
table.insert( annparts, table.concat( vh, "/" ) .. "-harmonic" )
end
if #decl > 0 then
table.insert( annparts, table.concat( decl, "/" ) )
end
if bor then
table.insert( annparts, "borr" )
end
if reducible then
table.insert( annparts, "reduc" )
end
if pl then
for i, plural in ipairs(pl) do
table.insert(irregs, plural)
end
end
if #irregs > 0 then
table.insert( annparts, table.concat( irregs, " // " ) )
end
alternant_multiword_spec.annotation = table.concat( annparts, " " )
if #stems > 1 then
insert( "nouns with multiple stems" )
end
end
alternant_multiword_spec.categories = cats
end
local function combine_stem_ending( stem, ending )
return stem .. ending
end
local function show_forms( alternant_multiword_spec )
local lemmas = {}
if alternant_multiword_spec.forms.nom_sg then
for _, nom_sg in ipairs( alternant_multiword_spec.forms.nom_sg ) do
table.insert( lemmas, nom_sg.form )
end
elseif alternant_multiword_spec.forms.nom_pl then
for _, nom_pl in ipairs( alternant_multiword_spec.forms.nom_pl ) do
table.insert( lemmas, nom_pl.form )
end
end
local props = {
lemmas = lemmas,
slot_table = output_noun_slots_with_linked,
lang = lang,
canonicalize = function( form ) return form end,
include_translit = true,
footnotes = alternant_multiword_spec.footnotes,
allow_footnote_symbols = not not alternant_multiword_spec.footnotes,
}
iut.show_forms( alternant_multiword_spec.forms, props )
end
local function make_table( alternant_multiword_spec )
local forms = alternant_multiword_spec.forms
local function header( min_width )
min_width = min_width or "70"
return gsub( [===[
<div class="NavFrame" style="display:inline-block;min-width:MINWIDTHem">
<div class="NavHead" style="background:#eff7ff">{title}{annotation} </div>
<div class="NavContent">
{\op}| style="background:#f9f9f9;text-align:center;min-width:MINWIDTHem;width:100%" class="inflection-table"
|-
]===], "MINWIDTH", min_width )
end
local function reflexive( min_width, color )
min_width = min_width or "70"
color = color or "d9ebff"
return gsub( gsub( [===[|-
|{\cl}
<div class="NavFrame" min-width:MINWIDTHem">
<div class="NavHead" style="background:#COLOR">Reflexive possessive forms </div>
<div class="NavContent">
{\op}| style="background:#f9f9f9;text-align:center;min-width:MINWIDTHem;width:100%" class="inflection-table"
|-
]===], "MINWIDTH", min_width ), "COLOR", color )
end
local function add_case( case, name, sg, pl, refl )
if forms[case .. "_sg"] ~= "—" or forms[case .. "_pl"] ~= "—" then
local output = gsub( [===[|-
! style="background:#eff7ff" | NAME
]===], "NAME", name )
if sg then
if refl then
output = output .. gsub( [===[
| {CASE_sg_refl}
]===], "CASE", case )
else
output = output .. gsub( [===[
| {CASE_sg}
]===], "CASE", case )
end
end
if pl then
if refl then
output = output .. gsub( [===[
| {CASE_pl_refl}
]===], "CASE", case )
else
output = output .. gsub( [===[
| {CASE_pl}
]===], "CASE", case )
end
end
return output
else
return ""
end
end
local function template_footer()
return [===[|-
|{\cl}{notes_clause}</div></div>]===]
end
local table_spec_both = header( "45" ) .. [===[
! style="background:#d9ebff" | attributive
| colspan=2 | {attr}
|-
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:17.5em" | singular / indefinite
! style="background:#d9ebff;width:17.5em" | definite plural
|-
! style="background:#eff7ff" | nominative
| {nom_sg}
| {nom_pl}
|-
! style="background:#eff7ff" | genitive
| {gen_sg}
| {gen_pl}
|-
! style="background:#eff7ff" | accusative
| {acc_sg}
| {acc_pl}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc_sg}
| {dat_loc_pl}
]===] .. add_case( "gen_dat", "genitive-dative", true, true ) .. [===[
|-
! style="background:#eff7ff" | ablative
| {abl_sg}
| {abl_pl}
]===] .. add_case( "dat_abl", "dative-ablative", true, true ) .. [===[
|-
! style="background:#eff7ff" | instrumental
| {ins_sg}
| {ins_pl}
|-
! style="background:#eff7ff" | comitative
| {com_sg}
| {com_pl}
|-
! style="background:#eff7ff" | privative
| {priv_sg}
| {priv_pl}
|-
! style="background:#eff7ff" | directive
| {dirc_sg}
| {dirc_pl}
]===] .. reflexive( "45" ) .. [===[
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:17.5em" | singular / indefinite
! style="background:#d9ebff;width:17.5em" | definite plural
|-
! style="background:#eff7ff" | nominative
| {nom_sg_refl}
| {nom_pl_refl}
|-
! style="background:#eff7ff" | genitive
| {gen_sg_refl}
| {gen_pl_refl}
|-
! style="background:#eff7ff" | accusative
| {acc_sg_refl}
| {acc_pl_refl}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc_sg_refl}
| {dat_loc_pl_refl}
]===] .. add_case( "gen_dat", "genitive-dative", true, true, true ) .. [===[
|-
! style="background:#eff7ff" | ablative
| {abl_sg_refl}
| {abl_pl_refl}
]===] .. add_case( "dat_abl", "dative-ablative", true, true, true ) .. [===[
|-
! style="background:#eff7ff" | instrumental
| {ins_sg_refl}
| {ins_pl_refl}
|-
! style="background:#eff7ff" | comitative
| {com_sg_refl}
| {com_pl_refl}
|-
! style="background:#eff7ff" | privative
| {priv_sg_refl}
| {priv_pl_refl}
|-
! style="background:#eff7ff" | directive
| {dirc_sg_refl}
| {dirc_pl_refl}
|{\cl}</div></div>
{\op}| style="background:#f9f9f9;text-align:center;min-width:45em;width:100%" class="inflection-table"
|-
! style="background:#d9ebff;width:10em" | independent<br>genitive
! style="background:#d9ebff;width:17.5em" | singular / indefinite
! style="background:#d9ebff;width:17.5em" | definite plural
|-
! style="background:#eff7ff" | singular<br>possession
| {spos_indgen_sg}
| {spos_indgen_pl}
|-
! style="background:#eff7ff" | collective<br>possession
| {cpos_indgen_sg}
| {cpos_indgen_pl}
]===] .. template_footer()
local function table_spec_one( num, number )
return gsub( gsub( header( "30" ) .. [===[
! style="background:#d9ebff" | attributive
| {attr}
|-
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:20em" | NUMBER
|-
! style="background:#eff7ff" | nominative
| {nom_NUM}
|-
! style="background:#eff7ff" | genitive
| {gen_NUM}
|-
! style="background:#eff7ff" | accusative
| {acc_NUM}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc_NUM}
]===] .. add_case( "gen_dat", "genitive-dative", true ) .. [===[
|-
! style="background:#eff7ff" | ablative
| {abl_NUM}
]===] .. add_case( "dat_abl", "dative-ablative", true ) .. [===[
|-
! style="background:#eff7ff" | instrumental
| {ins_NUM}
|-
! style="background:#eff7ff" | comitative
| {com_NUM}
|-
! style="background:#eff7ff" | privative
| {priv_NUM}
|-
! style="background:#eff7ff" | directive
| {dirc_NUM}
]===] .. reflexive( "30" ) .. [===[
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:20em" | NUMBER
|-
! style="background:#eff7ff" | nominative
| {nom_NUM_refl}
|-
! style="background:#eff7ff" | genitive
| {gen_NUM_refl}
|-
! style="background:#eff7ff" | accusative
| {acc_NUM_refl}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc_NUM_refl}
]===] .. add_case( "gen_dat", "genitive-dative", true, nil, true ) .. [===[
|-
! style="background:#eff7ff" | ablative
| {abl_NUM_refl}
]===] .. add_case( "dat_abl", "dative-ablative", true, nil, true ) .. [===[
|-
! style="background:#eff7ff" | instrumental
| {ins_NUM_refl}
|-
! style="background:#eff7ff" | comitative
| {com_NUM_refl}
|-
! style="background:#eff7ff" | privative
| {priv_NUM_refl}
|-
! style="background:#eff7ff" | directive
| {dirc_NUM_refl}
|{\cl}</div></div>
{\op}| style="background:#f9f9f9;text-align:center;min-width:30em;width:100%" class="inflection-table"
|-
! style="background:#d9ebff;width:10em" | independent<br>genitive
! style="background:#d9ebff;width:20em" | NUMBER
|-
! style="background:#eff7ff" | singular<br>possession
| {spos_indgen_NUM}
|-
! style="background:#eff7ff" | collective<br>possession
| {cpos_indgen_NUM}
]===] .. template_footer(), "NUMBER", number ), "NUM", num )
end
local function table_spec_poss()
return header( "30" ) .. [===[
! style="background:#eff7ff;width:10em" | nominative
| style="width:20em" | {nom}
|-
! style="background:#eff7ff" | genitive
| {gen}
|-
! style="background:#eff7ff" | accusative
| {acc}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc}
|-
! style="background:#eff7ff" | ablative
| {abl}
|-
! style="background:#eff7ff" | instrumental
| {ins}
|-
! style="background:#eff7ff" | comitative
| {com}
|-
! style="background:#eff7ff" | privative
| {priv}
|-
! style="background:#eff7ff" | directive
| {dirc}
]===] .. reflexive( "30" ) .. [===[
! style="background:#eff7ff;width:10em" | nominative
| style="width:20em" | {nom_refl}
|-
! style="background:#eff7ff" | genitive
| {gen_refl}
|-
! style="background:#eff7ff" | accusative
| {acc_refl}
|-
! style="background:#eff7ff" | dative-locative
| {dat_loc_refl}
|-
! style="background:#eff7ff" | ablative
| {abl_refl}
|-
! style="background:#eff7ff" | instrumental
| {ins_refl}
|-
! style="background:#eff7ff" | comitative
| {com_refl}
|-
! style="background:#eff7ff" | privative
| {priv_refl}
|-
! style="background:#eff7ff" | directive
| {dirc_refl}
]===] .. template_footer()
end
local notes_template = [===[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = "Declension of <i lang=\"mn\" class=\"Cyrl\">" .. forms.lemma .. "</i>"
end
local annotation = alternant_multiword_spec.annotation or ""
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-weight:normal;font-size:small\">" .. annotation .. "</span>)"
end
local table_spec =
alternant_multiword_spec.number == "sg" and table_spec_one( "sg", "singular / indefinite" ) or
alternant_multiword_spec.number == "pl" and table_spec_one( "pl", "definite plural" ) or
alternant_multiword_spec.number == "spos" and table_spec_poss() or
alternant_multiword_spec.number == "cpos" and table_spec_poss() or
table_spec_both
forms.notes_clause = forms.footnote ~= "" and
format( notes_template, forms ) or ""
return format( table_spec, forms )
end
function export.do_generate_forms( parent_args, pos, from_headword, def )
local params = {
[1] = { required = true, default = "хаан<r>" },
footnote = { list = true },
title = {},
pos = { default = "noun" }
}
local args = m_para.process( parent_args, params )
if not match( args[1], "<.*>" ) then error( "Please specify declension" ) end
local parse_props = {
parse_indicator_spec = parse_indicator_spec
}
local alternant_multiword_spec = iut.parse_inflected_text( args[1], parse_props )
alternant_multiword_spec.title = args.title
alternant_multiword_spec.pos = pos or args.pos
alternant_multiword_spec.footnotes = args.footnote
alternant_multiword_spec.args = args
normalize_all_lemmas( alternant_multiword_spec )
detect_all_indicator_specs( alternant_multiword_spec )
propagate_properties( alternant_multiword_spec, "number", "both", "both" )
local inflect_props = {
skip_slot = function( slot )
return skip_slot( alternant_multiword_spec.number, slot )
end,
slot_table = output_noun_slots_with_linked,
get_variants = get_variants,
inflect_word_spec = handle_derived_slots_and_overrides,
lang = lang
}
iut.inflect_multiword_or_alternant_multiword_spec( alternant_multiword_spec, inflect_props )
compute_categories_and_annotation( alternant_multiword_spec )
return alternant_multiword_spec
end
function export.show( frame )
local parent_args = frame:getParent().args or frame.args
local pos = propernoun( parent_args[1] or "хаан" ) and "proper noun" or "noun"
local alternant_multiword_spec = export.do_generate_forms( parent_args, pos )
show_forms( alternant_multiword_spec )
return make_table( alternant_multiword_spec ) .. require( "Module:utilities" ).format_categories( alternant_multiword_spec.categories, lang )
end
local function generate_inflection(form, inflection, vh)
if match( inflection, "attr" ) then
form = attributive( form, vh )
end
if match( inflection, "|p$" ) then
form = plural( form, vh )
vh = vowelharmony( form )
end
if match( inflection, "gen" ) then
form = genitive( form, vh )
end
if match( inflection, "spos|indgen" ) or match( inflection, "refl poss|form|of the|gen|p" ) then
form = singular_independent_genitive( form, vh )
end
if match( inflection, "cpos|indgen" ) then
form = collective_independent_genitive( form, vh )
end
if match( inflection, "acc" ) then
form = accusative( form, vh )
end
if match( inflection, "dat" ) then
form = dative_locative( form, vh )
end
if match( inflection, "abl" ) then
form = ablative( form, vh )
end
if match( inflection, "ins" ) then
form = instrumental( form, vh )
end
if match( inflection, "com" ) then
form = comitative( form, vh )
end
if match( inflection, "priv" ) then
form = privative( form, vh )
vh = vowelharmony( form )
end
if match( inflection, "dirc" ) then
form = directive( form, vh )
vh = vowelharmony( form )
end
if match( inflection, "refl" ) then
form = reflexive( form, vh )
end
form = gsub(form, n, "")
return form
end
function export.inflect_traditional_form( trad_form, cyrl_orig, cyrl_form, cyrl_form_args, inflection )
local parse_props = {
parse_indicator_spec = parse_indicator_spec
}
local alternant_multiword_spec = iut.parse_inflected_text( cyrl_form_args, parse_props )
local vh = vowelharmony( cyrl_orig )
if alternant_multiword_spec.alternant_or_word_specs[1].decl == "n" then
trad_form = trad_form .. n
cyrl_orig = cyrl_orig .. n
elseif alternant_multiword_spec.alternant_or_word_specs[1].decl == "g" then
cyrl_orig = cyrl_orig .. g
end
if alternant_multiword_spec.alternant_or_word_specs[1].plural then
for i, pl in ipairs(alternant_multiword_spec.alternant_or_word_specs[1].plural) do
local modifier
if pl == "nuud" then
modifier = nuud
elseif pl == "chuud" then
modifier = chuud
elseif pl == "d" then
modifier = d
elseif pl == "s" then
modifier = s
end
if modifier then
local check = generate_inflection(cyrl_orig .. modifier, inflection, vh)
if check == cyrl_form then
trad_form = trad_form .. modifier
break
end
end
end
end
trad_form = generate_inflection(trad_form, inflection, vh)
trad_form = gsub(trad_form, "([ᠠᠡᠣᠤᠧ])(ᠢ[ᠠ-ᡂᡸ])", "%1ᠶ%2")
return trad_form
end
return export