Module:hsb-noun
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
--[=[
Authorship: Zhnka, heavily based on [[Module:cs-noun]] by Benwing
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "gen_s" (genitive singular) and
"voc_p" (vocative plural). Each slot is filled with zero or more forms.
-- "form" = The declined form representing the value of a given slot.
-- "lemma" = The dictionary form. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
local lang = require("Module:languages").getByCode("hsb")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local m_para = require("Module:parameters")
local com = require("Module:hsb-common")
local current_title = mw.title.getCurrentTitle()
local NAMESPACE = current_title.nsText
local PAGENAME = current_title.text
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function track(track_id)
require("Module:debug/track")("hsb-noun/" .. track_id)
return true
end
local output_noun_slots = {
nom_s = "nom|s",
gen_s = "gen|s",
dat_s = "dat|s",
acc_s = "acc|s",
voc_s = "voc|s",
loc_s = "loc|s",
ins_s = "ins|s",
nom_d = "nom|d",
gen_d = "gen|d",
dat_d = "dat|d",
acc_d = "acc|d",
voc_d = "voc|d",
loc_d = "loc|d",
ins_d = "ins|d",
nom_p = "nom|p",
gen_p = "gen|p",
dat_p = "dat|p",
acc_p = "acc|p",
voc_p = "voc|p",
loc_p = "loc|p",
ins_p = "ins|p",
}
local function get_output_noun_slots(alternant_multiword_spec)
-- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to
-- this module in the same Lua invocation, and we would need to clone the table.
if alternant_multiword_spec.actual_number ~= "allthree" then
for slot, accel_form in pairs(output_noun_slots) do
output_noun_slots[slot] = accel_form:gsub("|[sp]$", "")
end
end
return output_noun_slots
end
local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"}
local cases = {
nom = true,
gen = true,
dat = true,
acc = true,
voc = true,
loc = true,
ins = true,
}
local clitic_cases = {
gen = true,
dat = true,
acc = true,
}
local function dereduce(base, stem)
local dereduced_stem = com.dereduce(base, stem)
if not dereduced_stem then
error("Unable to dereduce stem '" .. stem .. "'")
end
return dereduced_stem
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
-- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either
-- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to
-- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the
-- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use
-- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a
-- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is
-- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user
-- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has
-- a similar effect).
local function add(base, slot, stems, endings, footnotes)
if not endings then
return
end
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
return
end
local stems_footnotes = type(stems) == "table" and stems.footnotes or nil
footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes)
if type(endings) == "string" then
endings = {endings}
end
for _, ending in ipairs(endings) do
-- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it.
-- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique);
-- compute the appropriate stem based on the slot and whether the ending begins with a vowel.
local stem
if ending == "-" then
stem = base.actual_lemma
ending = ""
elseif type(stems) == "string" then
stem = stems
else
local is_vowel_ending = rfind(ending, "^" .. com.vowel_c)
if stems.oblique_slots == "all" then
if is_vowel_ending then
stem = stems.oblique_vowel_stem
else
stem = stems.oblique_nonvowel_stem
end
elseif is_vowel_ending then
stem = stems.vowel_stem
else
stem = stems.nonvowel_stem
end
end
ending = iut.combine_form_and_footnotes(ending, footnotes)
local function combine_stem_ending(stem, ending)
return com.combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending)
end
end
local function process_slot_overrides(base, do_slot)
for slot, overrides in pairs(base.overrides) do
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
if do_slot(slot) then
base.slot_overridden[slot] = true
base.forms[slot] = nil
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes)
if override.full then
if form ~= "" then
iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes})
end
else
-- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as
-- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not
-- #'Kerber/Kerbera'.
if (slot == "acc_s" or slot == "voc_s") and form == "" then
form = "-"
end
for _, stems in ipairs(base.stem_sets) do
add(base, slot, stems, form, combined_notes)
end
end
end
end
end
end
end
local function add_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nom_d, gen_d, dat_d,
nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, nom_s, footnotes
)
add(base, "nom_s", stems, "-", footnotes)
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
add(base, "nom_d", stems, nom_d, footnotes)
add(base, "gen_d", stems, gen_d, footnotes)
add(base, "dat_d", stems, dat_d, footnotes)
if base.number == "pl" then
-- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma
-- rather than generating the plural from the synthesized singular, which may not match the specified lemma
-- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]]
-- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze').
local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p)
nom_p = "-"
if acc_p_like_nom then
acc_p = "-"
end
end
add(base, "nom_p", stems, nom_p, footnotes)
add(base, "gen_p", stems, gen_p, footnotes)
add(base, "dat_p", stems, dat_p, footnotes)
add(base, "acc_p", stems, acc_p, footnotes)
add(base, "loc_p", stems, loc_p, footnotes)
add(base, "ins_p", stems, ins_p, footnotes)
add(base, "nom_s", stems, nom_s, footnotes)
end
local function add_sg_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes
)
add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nil, nil, nil,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_du_only_decl(base, stems,
gen_d, dat_d, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
"-", gen_d, dat_d,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_pl_only_decl(base, stems,
gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
nil, nil, nil,
"-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes)
end
local function handle_derived_slots_and_overrides(base)
local function is_non_derived_slot(slot)
return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s"
end
local function is_derived_slot(slot)
return not is_non_derived_slot(slot)
end
base.slot_overridden = {}
-- Handle overrides for the non-derived slots. Do this before generating the derived
-- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots.
process_slot_overrides(base, is_non_derived_slot)
-- Generate the remaining slots that are derived from other slots.
if not base.pron and not base.det then
-- Pronouns don't have a vocative (singular or plural).
iut.insert_forms(base.forms, "voc_p", base.forms.nom_p)
end
if not base.forms.acc_s and not base.slot_overridden.acc_s then
iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "inan" and "nom_s" or base.animacy == "pr" and "gen_s" or base.animacy == "anml" and "gen_s"])
end
if not base.forms.acc_d and not base.slot_overridden.acc_d then
iut.insert_forms(base.forms, "acc_d", base.forms[base.animacy == "inan" and "nom_d" or base.animacy == "pr" and "gen_d" or base.animacy == "anml" and "nom_d"])
end
if not base.forms.acc_p and not base.slot_overridden.acc_p then
iut.insert_forms(base.forms, "acc_p", base.forms[base.animacy == "inan" and "nom_p" or base.animacy == "pr" and "gen_p" or base.animacy == "anml" and "nom_p"])
end
if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then
iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "inan" and "nom_s" or "clitic_gen_s"])
end
-- Handle overrides for derived slots, to allow them to be overridden.
process_slot_overrides(base, is_derived_slot)
-- Compute linked versions of potential lemma slots, for use in {{hsb-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(potential_lemma_slots) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and
-- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun
-- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use
-- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate
-- the appropriate endings.
local decls = {}
-- Table specifying additional properties for declension types. Every declension type must have such a table, which
-- specifies which category or categories to add and what annotation to show in the title bar of the declension table.
--
-- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but
-- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either
-- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or
-- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine
-- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If
-- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end.
-- In all cases, the language name is added onto the beginning to form the full category name.
-- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title
-- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value
-- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category
-- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution.
local declprops = {}
decls["hard-m"] = function(base, stems)
local gen_s = base.animacy == "in" and {"a", "u"} or "a"
local nom_p = base.animacy == "pr" and "ojo" or "y"
local voc_s = not rmatch(base.lemma, ".*tr$") and "o"
add_decl(base, stems, gen_s, "ej", acc_s, voc_s, "u", "om",
"aj", "ow", "omaj",
nom_p, "ow", "am", nil, "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, "e", "e")
end
declprops["hard-m"] = {
desc = function(base, stems)
return "masculine hard stem"
end,
cat = function(base, stems)
return "masculine hard stem"
end
}
decls["soft-m"] = function(base, stems)
local gen_s = base.animacy == "in" and {"a", "u"} or "a"
local nom_p = base.animacy == "pr" and "ojo" or "e"
add_decl(base, com.addj(stems.oblique_vowel_stem), gen_s, "ej", acc_s, "o", "u", "om",
"ej", "ow", "omaj",
nom_p, "ow", "am", nil, "ach", "emi")
end
declprops["soft-m"] = {
desc = function(base, stems)
return "masculine soft stem"
end,
cat = function(base, stems)
return "masculine soft stem"
end
}
decls["czs-m"] = function(base, stems)
local gen_s = base.animacy == "in" and {"a", "u"} or "a"
local nom_p = base.animacy == "pr" and "ojo" or "y"
add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om",
"aj", "ow", "omaj",
nom_p, "ow", "am", nil, "ach", "ami")
end
declprops["czs-m"] = {
desc = function(base, stems)
return "masculine hard hissing stem"
end,
cat = function(base, stems)
return "masculine hard stem"
end
}
decls["velar-m"] = function(base, stems)
local gen_s = base.animacy == "in" and {"a", "u"} or "a"
local nom_p = base.animacy == "pr" and "ojo" or "i"
add_decl(base, stems, gen_s, "ej", acc_s, "o", "u", "om",
"aj", "ow", "omaj",
nom_p, "ow", "am", nil, "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e")
end
declprops["velar-m"] = {
desc = function(base, stems)
return "masculine velar stem"
end,
cat = function(base, stems)
return "masculine velar stem"
end
}
decls["adje-m"] = function(base, stems)
if rmatch(base.lemma, "^.*" .. com.velar_c .. "i$") then
add_decl(base, stems, "eho", "emu", nil, "-", "im", "im",
nom_d, "eju", "imaj",
nom_p, "ich", "im", nil, "ich", "imi")
if base.animacy == "pr" then
add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "y")
add_decl(base, stems, nil, nil, nil, nil, nil, nil, "aj")
else
add_decl(base, stems, nil, nil, nil, nil, nil, nil, "ej", nil, nil, "e")
end
elseif rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "i$") then
local nom_p = base.animacy == "pr" and "i" or "e"
add_decl(base, stems, "eho", "emu", nil, "-", "im", "im",
"ej", "eju", "imaj",
nom_p, "ich", "im", nil, "ich", "imi")
elseif rmatch(base.lemma, "^.*[czs]e$") then
local nom_p = base.animacy == "pr" and "y" or "e"
local nom_d = base.animacy == "pr" and "aj" or "ej"
add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym",
nom_d, "eju", "ymaj",
nom_p, "ych", "ym", nil, "ych", "ymi")
else
add_decl(base, stems, "eho", "emu", nil, "-", "ym", "ym",
nom_d, "eju", "ymaj",
nom_p, "ych", "ym", nil, "ych", "ymi")
if base.animacy == "pr" then
add_decl(base, com.apply_palatalization(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, nil, nil, nil, "i")
add_decl(base, stems, nil, nil, nil, nil, nil, nil, "aj")
else
add_decl(base, stems, nil, nil, nil, nil, nil, nil, "ej", nil, nil, "e")
end
end
end
declprops["adje-m"] = {
desc = function(base, stems)
return ""
end,
cat = function(base, stems)
return "masculine adjectival"
end
}
decls["hard-f"] = function(base, stems)
add_decl(base, stems, "y", nil, "u", "-", nil, "u",
nil, "ow", "omaj",
"y", "ow", "am", "y", "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e")
end
declprops["hard-f"] = {
desc = function(base, stems)
return "feminine hard stem"
end,
cat = function(base, stems)
return "feminine hard stem"
end
}
decls["soft-f"] = function(base, stems)
if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then
add_decl(base, com.addj(stems.oblique_vowel_stem), "e", nil, "-", "-", nil, "u",
nil, "ow", "omaj",
"e", "ow", "am", "e", "ach", "emi")
else
add_decl(base, stems, "e", nil, "u", "-", nil, "u",
nil, "ow", "omaj",
"e", "ow", "am", "e", "ach", "emi")
end
add_decl(base, com.removej(com.addj(stems.oblique_vowel_stem)), nil, "i", nil, nil, "i", nil, "i", nil, nil, nil, "i")
end
declprops["soft-f"] = {
desc = function(base, stems)
return "feminine soft stem"
end,
cat = function(base, stems)
return "feminine soft stem"
end
}
decls["czs-f"] = function(base, stems)
if rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$") then
add_decl(base, stems, "y", "y", "-", "-", "y", "u",
"y", "ow", "omaj",
"y", "ow", "am", "y", "ach", "ami")
else
add_decl(base, stems, "y", "y", "u", "-", "y", "u",
"y", "ow", "omaj",
"y", "ow", "am", "y", "ach", "ami")
end
end
declprops["czs-f"] = {
desc = function(base, stems)
return "feminine hard hissing stem"
end,
cat = function(base, stems)
return "feminine hard stem"
end
}
decls["velar-f"] = function(base, stems)
add_decl(base, stems, "i", nil, "u", "-", nil, "u",
nil, "ow", "omaj",
"i", "ow", "am", "i", "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, "e", nil, nil, "e", nil, "e")
end
declprops["velar-f"] = {
desc = function(base, stems)
return "feminine velar stem"
end,
cat = function(base, stems)
return "feminine velar stem"
end
}
decls["v-f"] = function(base, stems)
add_decl(base, stems, "wje", "wi", "-", "-", "wi", "wju",
"wi", "wjow", "wjomaj",
"wje", "wjow", "wjam", "wje", "wjach", "wjemi")
end
declprops["v-f"] = {
desc = function(base, stems)
return "feminine v-stem"
end,
cat = function(base, stems)
return "feminine v-stem"
end
}
decls["adje-f"] = function(base, stems)
if rmatch(base.lemma, "^.*" .. com.velar_c .. "a$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "a$") then
add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej",
"ej", "eju", "imaj",
"e", "ich", "im", "e", "ich", "imi")
else
add_decl(base, stems, "eje", "ej", "u", "-", "ej", "ej",
"ej", "eju", "ymaj",
"e", "ych", "ym", "e", "ych", "ymi")
end
end
declprops["adje-f"] = {
desc = function(base, stems)
return ""
end,
cat = function(base, stems)
return "feminine adjectival"
end
}
decls["hard-n"] = function(base, stems)
add_decl(base, stems, "a", "u", "-", "-", nil, "om",
nil, "ow", "omaj",
"a", "ow", "am", "a", "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e")
end
declprops["hard-n"] = {
desc = function(base, stems)
return "neuter hard stem"
end,
cat = function(base, stems)
return "neuter hard stem"
end
}
decls["soft-n"] = function(base, stems)
add_decl(base, stems, "a", "u", "-", "-", "u", "om",
nil, "ow", "omaj",
"a", "ow", "am", "a", "ach", "emi")
add_decl(base, com.removej(stems.oblique_vowel_stem), nil, nil, nil, nil, nil, nil, "i")
end
declprops["soft-n"] = {
desc = function(base, stems)
return "neuter soft stem"
end,
cat = function(base, stems)
return "neuter soft stem"
end
}
decls["czs-n"] = function(base, stems)
add_decl(base, stems, "a", "u", "-", "-", "u", "om",
"y", "ow", "omaj",
"a", "ow", "am", "a", "ach", "ami")
end
declprops["czs-n"] = {
desc = function(base, stems)
return "neuter hard hissing stem"
end,
cat = function(base, stems)
return "neuter hard stem"
end
}
decls["velar-n"] = function(base, stems)
add_decl(base, stems, "a", "u", "-", "-", "u", "om",
nil, "ow", "omaj",
"a", "ow", "am", "a", "ach", "ami")
add_decl(base, com.convert_paired_plain_to_palatal(com.apply_palatalization(stems.oblique_vowel_stem)), nil, nil, nil, nil, "e", nil, "e")
end
declprops["velar-n"] = {
desc = function(base, stems)
return "neuter velar stem"
end,
cat = function(base, stems)
return "neuter velar stem"
end
}
decls["adje-n"] = function(base, stems)
if rmatch(base.lemma, "^.*" .. com.velar_c .. "e$") or rmatch(base.lemma, "^.*" .. com.inherently_soft_c .. "e$") then
add_decl(base, stems, "eho", "emu", "-", "-", "im", "im",
"ej", "eju", "imaj",
"e", "ich", "im", "e", "ich", "imi")
else
add_decl(base, stems, "eje", "ej", "u", "-", "ym", "ym",
"ej", "eju", "ymaj",
"e", "ych", "ym", "e", "ych", "ymi")
end
end
declprops["adje-n"] = {
desc = function(base, stems)
return ""
end,
cat = function(base, stems)
return "neuter adjectival"
end
}
decls["tstem-n"] = function(base, stems)
add_decl(base, stems, "eća", "eću", "-", "-", "eću", "ećom",
"eći", "ećow", "ećomaj",
"ata", "atow", "atam", "ata", "atach", "atami")
end
declprops["tstem-n"] = {
desc = function(base, stems)
return "neuter t-stem"
end,
cat = function(base, stems)
return "neuter t-stem"
end
}
decls["nstem-n"] = function(base, stems)
add_decl(base, stems, "enja", "enju", "-", "-", "enju", "enjom",
"eni", "enjow", "enjomaj",
"enja", "enjow", "enjam", "enja", "enjach", "enjami")
end
declprops["nstem-n"] = {
desc = function(base, stems)
return "neuter n-stem"
end,
cat = function(base, stems)
return "neuter n-stem"
end
}
decls["adj"] = function(base, stems)
local props = {}
local propspec = table.concat(props, ".")
if propspec ~= "" then
propspec = "<" .. propspec .. ">"
end
local adj_alternant_multiword_spec = require("Module:zlw-ocs-adjective").do_generate_forms({base.lemma .. propspec})
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
if base.number ~= "pl" then
if base.gender == "m" then
copy("nom_m", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
elseif base.gender == "f" then
copy("nom_f", "nom_s")
copy("gen_f", "gen_s")
copy("dat_f", "dat_s")
copy("acc_f", "acc_s")
copy("loc_f", "loc_s")
copy("ins_f", "ins_s")
else
copy("nom_n", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("acc_n", "acc_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
end
if not base.forms.voc_s then
iut.insert_forms(base.forms, "voc_s", base.forms.nom_s)
end
end
if base.number ~= "sg" then
if base.gender == "m" then
copy("nom_mp", "nom_p")
copy("acc_mfp", "acc_p")
copy("nom_md", "nom_d")
elseif base.gender == "f" then
copy("nom_fp", "nom_p")
copy("acc_mfp", "acc_p")
copy("nom_fnd", "nom_d")
else
copy("nom_np", "nom_p")
copy("acc_np", "acc_p")
copy("nom_fnd", "nom_d")
end
copy("gen_p", "gen_p")
copy("dat_p", "dat_p")
copy("ins_p", "ins_p")
copy("loc_p", "loc_p")
copy("gen_d", "gen_d")
copy("dat_d", "dat_d")
end
end
local function get_stemtype(base)
if rfind(base.lemma, "ý$") then
return "hard"
elseif rfind(base.lemma, "í$") then
return "soft"
else
return "possessive"
end
end
declprops["adj"] = {
cat = function(base, stems)
return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"}
end,
}
decls["indecl"] = function(base, stems)
-- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms
-- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g.
-- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'.
add_decl(base, stems, "-", "-", "-", "-", "-", "-",
"-", "-", "-", "-", "-", "-", "-", "-", "-")
end
declprops["indecl"] = {
cat = function(base, stems)
if base.adj then
return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"}
else
return {"indeclinable POS", "indeclinable GENPOS"}
end
end
}
decls["manual"] = function(base, stems)
-- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale
-- tantum).
add(base, base.number == "pl" and "nom_p" or "nom_s", stems, "-")
end
declprops["manual"] = {
desc = "GENDER",
cat = {},
}
local function set_pron_defaults(base)
if base.gender or base.lemma ~= "ona" and base.number or base.animacy then
error("Can't specify gender, number or animacy for pronouns")
end
local function pron_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
if base.lemma == "štó" then
return "none", "sg", "pr", false
elseif base.lemma == "što" then
return "none", "sg", "inan", false
else
error(("Unrecognized pronoun '%s'"):format(base.lemma))
end
end
local gender, number, animacy, has_clitic = pron_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_pronoun_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with pronouns")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "pron"
end
decls["pron"] = function(base, stems)
if base.lemma == "štó" then
add_decl(base, stems, "koho", "komu", nil, nil, "kim", "kim")
elseif base.lemma == "što" then
add_decl(base, stems, "čeho", "čemu", nil, nil, "čim", "čim")
else
error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma))
end
end
declprops["pron"] = {
desc = "GENDER pronoun",
cat = {},
}
local function set_num_defaults(base)
if base.gender or base.animacy then
error("Can't specify gender, number or animacy for numeral")
end
local function num_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "pl", "none", false
end
local gender, number, animacy, has_clitic = num_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function set_det_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for determiner")
end
local function det_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "none", "none", false
end
local gender, number, animacy, has_clitic = det_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_determiner_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with determiners")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "det"
end
decls["det"] = function(base, stems)
add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a")
end
declprops["det"] = {
desc = "GENDER determiner",
cat = {},
}
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
local function parse_override(segments)
local retval = {values = {}}
local part = segments[1]
local slots = {}
while true do
local case = usub(part, 1, 3)
if cases[case] then
-- ok
else
error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
part = usub(part, 4)
local slot
if rfind(part, "^pl") then
part = usub(part, 3)
slot = case .. "_p"
elseif rfind(part, "^du") then
part = usub(part, 3)
slot = case .. "_d"
else
slot = case .. "_s"
end
table.insert(slots, slot)
if rfind(part, "^%+") then
part = usub(part, 2)
else
break
end
end
if rfind(part, "^:") then
retval.full = true
part = usub(part, 2)
end
segments[1] = part
local colon_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments)))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slots, retval
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more
dot-separated indicators within them). Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override()
...
},
forms = {}, -- forms for a single spec alternant; see `forms` below
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
stems = { -- may be missing
{
reducible = TRUE_OR_FALSE,
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
-- The following fields are filled in by determine_stems()
vowel_stem = "STEM",
nonvowel_stem = "STEM",
oblique_slots = "all",
oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
},
...
},
gender = "GENDER", -- "m", "f", "n"
number = "NUMBER", -- "sg", "pl"; may be missing
animacy = "ANIMACY", -- "inan", "an"; may be missing
hard = true, -- may be missing
soft = true, -- may be missing
mixed = true, -- may be missing
surname = true, -- may be missing
istem = true, -- may be missing
["-istem"] = true, -- may be missing
tstem = true, -- may be missing
nstem = true, -- may be missing
tech = true, -- may be missing
foreign = true, -- may be missing
mostlyindecl = true, -- may be missing
indecl = true, -- may be missing
manual = true, -- may be missing
adj = true, -- may be missing
decllemma = "DECLENSION-LEMMA", -- may be missing
declgender = "DECLENSION-GENDER", -- may be missing
declnumber = "DECLENSION-NUMBER", -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
decl = "DECL", -- declension, e.g. "hard-m"
vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas
nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas
}
]=]
local function parse_indicator_spec(angle_bracket_spec)
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local base = {overrides = {}, forms = {}}
if inside ~= "" then
local segments = iut.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = iut.split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix = usub(part, 1, 3)
if cases[case_prefix] then
local slots, override = parse_override(dot_separated_group)
for _, slot in ipairs(slots) do
if base.overrides[slot] then
error(("Two overrides specified for slot '%s'"):format(slot))
else
base.overrides[slot] = {override}
end
end
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
base.footnotes = fetch_footnotes(dot_separated_group)
elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then
if base.stem_sets then
error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'")
end
local comma_separated_groups = iut.split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local stem_sets = {}
for i, comma_separated_group in ipairs(comma_separated_groups) do
local pattern = comma_separated_group[1]
local orig_pattern = pattern
local reducible, vowelalt, oblique_slots
if pattern == "-" then
-- default reducible, no vowel alt
else
local before, after
before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$")
if before then
pattern = before .. after
reducible = reducible == "*"
end
if pattern ~= "" then
if not rfind(pattern, "^##?ě?$") then
error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'")
end
if pattern == "#ě" or pattern == "##ě" then
vowelalt = "quant-ě"
else
vowelalt = "quant"
end
-- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant.
oblique_slots = "all"
end
end
table.insert(stem_sets, {
reducible = reducible,
vowelalt = vowelalt,
oblique_slots = oblique_slots,
footnotes = fetch_footnotes(comma_separated_group)
})
end
base.stem_sets = stem_sets
elseif #dot_separated_group > 1 then
error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'")
elseif part == "m" or part == "f" or part == "n" then
if base.gender then
error("Can't specify gender twice: '" .. inside .. "'")
end
base.gender = part
elseif part == "sg" or part == "du" or part == "pl" then
if base.number then
error("Can't specify number twice: '" .. inside .. "'")
end
base.number = part
elseif part == "pr" or part == "anml" or part == "inan" then
if base.animacy then
error("Can't specify animacy twice: '" .. inside .. "'")
end
base.animacy = part
elseif part == "hard" or part == "soft" or part == "istem" or part == "tstem" or part == "nstem" or
part == "indecl" or part == "pron" or part == "det" or part == "velar" or part == "vstem" or part == "adje" then
if base[part] then
error("Can't specify '" .. part .. "' twice: '" .. inside .. "'")
end
base[part] = true
elseif part == "+" then
if base.adj then
error("Can't specify '+' twice: '" .. inside .. "'")
end
base.adj = true
elseif part == "!" then
if base.manual then
error("Can't specify '!' twice: '" .. inside .. "'")
end
base.manual = true
elseif rfind(part, "^mixedistem:") then
if base.mixedistem then
error("Can't specify 'mixedistem:' twice: '" .. inside .. "'")
end
base.mixedistem = rsub(part, "^mixedistem:", "")
elseif rfind(part, "^decllemma:") then
if base.decllemma then
error("Can't specify 'decllemma:' twice: '" .. inside .. "'")
end
base.decllemma = rsub(part, "^decllemma:", "")
elseif rfind(part, "^declgender:") then
if base.declgender then
error("Can't specify 'declgender:' twice: '" .. inside .. "'")
end
base.declgender = rsub(part, "^declgender:", "")
elseif rfind(part, "^declnumber:") then
if base.declnumber then
error("Can't specify 'declnumber:' twice: '" .. inside .. "'")
end
base.declnumber = rsub(part, "^declnumber:", "")
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return base
end
local function is_regular_noun(base)
return not base.adj and not base.pron and not base.det and not base.num
end
local function process_declnumber(base)
base.actual_number = base.number
if base.declnumber then
if base.declnumber == "sg" or base.declnumber == "du" or base.declnumber == "pl" then
base.number = base.declnumber
else
error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber))
end
end
end
local function set_defaults_and_check_bad_indicators(base)
-- Set default values.
local regular_noun = is_regular_noun(base)
if base.pron then
set_pron_defaults(base)
elseif base.det then
set_det_defaults(base)
elseif base.num then
set_num_defaults(base)
elseif not base.adj then
if not base.gender then
if base.manual then
base.gender = "none"
else
error("For nouns, gender must be specified")
end
end
base.number = base.number or "allthree"
process_declnumber(base)
base.animacy = base.animacy or "inan"
base.actual_gender = base.gender
base.actual_animacy = base.animacy
if base.declgender then
if base.declgender == "m-an" then
base.gender = "m"
base.animacy = "pr"
elseif base.declgender == "m-in" then
base.gender = "m"
base.animacy = "inan"
elseif base.declgender == "f" or base.declgender == "n" then
base.gender = base.declgender
else
error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender))
end
end
end
-- Check for bad indicator combinations.
if (base.hard and 1 or 0) + (base.soft and 1 or 0) > 1 then
error("At most one of 'hard' or 'soft' can be specified")
end
if base.istem and base["-istem"] then
error("'istem' and '-istem' cannot be specified together")
end
if (base.istem or base["-istem"]) then
if not regular_noun then
error("'istem' and '-istem' can only be specified with regular nouns")
end
end
if base.declgender and not regular_noun then
error("'declgender' can only be specified with regular nouns")
end
end
local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(base)
set_defaults_and_check_bad_indicators(base)
base.multiword = is_multiword -- FIXME: not currently used; consider deleting
alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic
if base.pron then
alternant_multiword_spec.saw_pron = true
else
alternant_multiword_spec.saw_non_pron = true
end
if base.det then
alternant_multiword_spec.saw_det = true
else
alternant_multiword_spec.saw_non_det = true
end
if base.num then
alternant_multiword_spec.saw_num = true
else
alternant_multiword_spec.saw_non_num = true
end
end)
end
local function undo_second_palatalization(base, word, is_adjective)
local function try(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
return is_adjective and try("št", "sk") or
is_adjective and try("čt", "ck") or
try("c", "k") or -- FIXME, this could be wrong and c correct
try("ř", "r") or
try("z", "h") or -- FIXME, this could be wrong and z or g correct
try("š", "ch") or
word
end
-- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be
-- theoretically correct as long as it generates all the correct plural forms.
local function synthesize_singular_lemma(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
local lemma_determined
-- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct
-- different lemmas for different stem sets, we'll throw an error below.
for _, stems in ipairs(base.stem_sets) do
local stem, lemma
while true do
if base.indecl then
-- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]].
lemma = base.lemma
break
elseif base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
if base.soft then
-- [[Blíženci]] "Gemini"
-- Since the nominative singular has no ending.
lemma = com.convert_paired_plain_to_palatal(stem, ending)
else
lemma = undo_second_palatalization(base, stem)
end
else
stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)[éyě]$") or rmatch(base.lemma, "^(.*)ie$")
if stem then
-- [[manželé]] "married couple", [[Velšané]] "Welsh people"
lemma = stem
else
error(("Masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma))
end
end
if stems.reducible == nil then
if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then
stems.reducible = true
end
if stems.reducible then
lemma = dereduce(base, lemma)
end
end
break
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
lemma = stem .. "a"
break
end
stem = rmatch(base.lemma, "^(.*)[eě]$")
if stem then
-- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical
-- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to
-- reconstruct the former type.
lemma = base.lemma
break
end
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
-- i-stems.
lemma = stem
base.istem = true
break
end
error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma))
elseif base.gender == "n" then
-- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if
-- the singular were 'slůňato' so we don't have to worry about them.
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
lemma = stem .. "o"
break
end
stem = rmatch(base.lemma, "^(.*)[eěí]$")
if stem then
-- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]"
lemma = base.lemma
break
end
error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma))
else
error(("Internal error: Unrecognized gender '%s'"):format(base.gender))
end
end
if lemma_determined and lemma_determined ~= lemma then
error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma))
end
lemma_determined = lemma
end
base.lemma = lemma_determined
end
-- For an adjectival lemma, synthesize the masc singular form.
local function synthesize_adj_lemma(base)
local stem
if base.indecl then
base.decl = "indecl"
stem = base.lemma
else
local gender, number
local function sub_ov(stem)
stem = stem:gsub("ov$", "ův")
return stem
end
while true do
if base.number == "pl" then
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
if base.soft then
-- nothing to do
else
if base.animacy ~= "pr" then
error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"):
format(base.lemma))
end
base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý"
end
break
end
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
if base.animacy == "pr" then
error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"):
format(base.lemma))
end
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$")
if stem then
if base.animacy ~= "pr" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$")
if stem then
if base.animacy == "pr" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
if base.animacy == "pr" then
error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"):
format(base.lemma))
elseif base.soft then
error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma))
else
error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"):
format(base.lemma))
end
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
end
else
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$")
if stem then
break
end
error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma))
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)á$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma))
end
end
end
base.decl = "adj"
end
-- Now set the stem sets if not given.
-- Now set the stem sets if not given.
if not base.stem_sets then
base.stem_sets = {{reducible = false}}
end
for _, stems in ipairs(base.stem_sets) do
-- Set the stems.
stems.vowel_stem = stem
stems.nonvowel_stem = stem
end
end
-- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process,
-- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a
-- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set
-- base.lemma to a new value; this is as if the user specified 'decllemma:'.
local function determine_declension(base)
if base.indecl then
base.decl = "indecl"
base.nonvowel_stem = base.lemma
return
end
-- Determine declension
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
if base.gender == "m" then
if base.animacy ~= "pr" then
error("Masculine lemma in -a must be animate")
end
base.decl = "a-m"
elseif base.gender == "f" then
if base.hard then
base.decl = "hard-f"
elseif base.soft then
base.decl = "soft-f"
elseif base.adje then
base.decl = "adje-f"
elseif rfind(base.lemma, com.velar_c .. "a$") then
base.decl = "velar-f"
elseif rfind(base.lemma, "[czs]" .. "a$") then
base.decl = "czs-f"
elseif rfind(base.lemma, com.inherently_soft_c .. "a$") then
base.decl = "soft-f"
else
base.decl = "hard-f"
end
elseif base.gender == "n" then
if rfind(stem, "m$") then
base.decl = "ma-n"
else
error("Lemma ending in -a and neuter must end in -ma")
end
end
base.vowel_stem = stem
return
end
local ending
stem, ending = rmatch(base.lemma, "^(.*)e$")
if stem then
if base.tstem then
base.decl = "tstem-n"
elseif base.adje then
base.decl = "adje-n"
else
base.decl = "soft-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)o$")
if stem then
if base.gender == "m" then
-- Cf. [[maestro]] m.
base.decl = "o-m"
elseif base.gender == "f" then
-- [[zoo]]; [[Žemaitsko]]?
error("Feminine nouns in -o are indeclinable; use '.indecl' if needed")
elseif base.hard then
base.decl = "hard-n"
elseif base.tstem then
base.decl = "tstem-n"
elseif base.nstem then
base.decl = "nstem-n"
elseif rfind(base.lemma, "[czs]" .. "o$") then
base.decl = "czs-n"
elseif rfind(base.lemma, com.inherently_soft_c .. "o$") then
base.decl = "soft-n"
elseif rfind(base.lemma, com.velar_c .. "o$") then
base.decl = "velar-n"
else
base.decl = "hard-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)[iy]$")
if stem then
if base.gender == "m" then
if base.adje then
base.decl = "adje-m"
end
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" then
if base.hard then
base.decl = "hard-m"
elseif base.soft then
base.decl = "soft-m"
elseif rfind(base.lemma, com.velar_c .. "$") then
base.decl = "velar-m"
elseif rfind(base.lemma, "[czs]" .. "$") then
base.decl = "czs-m"
elseif rfind(base.lemma, com.inherently_soft_c .. "$") then
base.decl = "soft-m"
else
base.decl = "hard-m"
end
elseif base.gender == "f" then
if base.vstem then
base.decl = "v-f"
stem = rmatch(base.lemma, "^(.*)ej$")
elseif base.soft then
base.decl = "soft-f"
elseif rfind(base.lemma, "[czs]" .. "$") then
base.decl = "czs-f"
else
base.decl = "soft-f"
end
elseif base.gender == "n" then
if base.foreign then
stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$")
if not stem then
error("Unrecognized neuter foreign ending, should be -um or -on")
end
if base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[eiuy]$") then
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem .. "o"
base.vowel_stem = stem
return
else
error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'")
end
end
base.nonvowel_stem = stem
return
end
error("Unrecognized ending for lemma: '" .. base.lemma .. "'")
end
-- Determine the default value for the 'reducible' flag.
local function determine_default_reducible(base)
-- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not
-- reducible. Note, we are never called on adjectival nouns.
if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then
base.default_reducible = false
return
end
local stem
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" and rfind(stem, "e[ck]$") and not com.is_monosyllabic(stem) then
base.default_reducible = true
elseif base.gender == "f" and rfind(stem, "eń$") then
-- pěseń
base.default_reducible = true
else
base.default_reducible = false
end
return
end
base.default_reducible = false
end
-- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular
-- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been
-- set in determine_declension(), depending on whether the lemma ends in
-- a vowel. We construct all the rest given the reducibility, vowel alternation spec and
-- any explicit stems given. We store the determined stems inside of the stem-set objects
-- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation
-- patterns, we will compute multiple sets of stems. The reason is that the stems may vary
-- depending on the reducibility and vowel alternation.
local function determine_stems(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
-- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries.
local default_mixed_reducible = false
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == nil then
stems.reducible = base.default_reducible
end
end
if default_mixed_reducible then
local new_stem_sets = {}
for _, stems in ipairs(base.stem_sets) do
table.insert(new_stem_sets, stems)
end
base.stem_sets = new_stem_sets
end
-- Now determine all the stems for each stem set.
for _, stems in ipairs(base.stem_sets) do
local lemma_is_vowel_stem = not not base.vowel_stem
if base.vowel_stem then
stems.vowel_stem = base.vowel_stem
stems.nonvowel_stem = stems.vowel_stem
-- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error
-- if the vowel being modified isn't the last vowel in the stem.
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem)
stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem)
end
else
stems.nonvowel_stem = base.nonvowel_stem
-- The user specified #. E.g. nóc nocy
if stems.oblique_slots then
stems.oblique_slots = "all"
end
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.vowel_stem = com.reduce(base.nonvowel_stem)
if not stems.vowel_stem then
error("Unable to reduce stem '" .. base.nonvowel_stem .. "'")
end
else
stems.vowel_stem = base.nonvowel_stem
end
end
stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem)
end
end
local function detect_indicator_spec(base)
if base.pron then
determine_pronoun_stems(base)
elseif base.det then
determine_determiner_stems(base)
elseif base.num then
determine_numeral_stems(base)
elseif base.adj then
process_declnumber(base)
synthesize_adj_lemma(base)
elseif base.manual then
if base.stem_sets then
-- FIXME, maybe this should be allowed?
error("Reducible and vowel alternation specs cannot be given with manual declensions")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "manual"
else
if base.number == "pl" then
synthesize_singular_lemma(base)
end
determine_declension(base)
determine_default_reducible(base)
determine_stems(base)
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
alternant_multiword_spec.sg_genders = {}
alternant_multiword_spec.pl_genders = {}
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(base)
if base.number ~= "pl" then
alternant_multiword_spec.sg_genders[base.actual_gender] = true
end
if base.number ~= "sg" then
-- All t-stem masculines are neuter in the plural.
local plgender
plgender = base.actual_gender
alternant_multiword_spec.pl_genders[plgender] = true
end
end)
if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then
error("Can't combine pronouns, determiners and/or numerals")
end
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = is_regular_noun(word_specs[i])
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local function set_and_fetch(obj, default)
local retval
if obj[property] then
retval = obj[property]
else
obj[property] = default
retval = default
end
if not obj["actual_" .. property] then
obj["actual_" .. property] = retval
end
return retval
end
local propval1 = set_and_fetch(alternant_multiword_spec, default_propval)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = set_and_fetch(alternant_or_word_spec, propval1)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = set_and_fetch(multiword_spec, propval2)
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = set_and_fetch(word_spec, propval3)
if propval4 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(word_spec, propval4)
end
end
else
if propval2 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(alternant_or_word_spec, propval2)
end
end
end
--[=[
Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent
adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in
set_defaults_and_check_bad_indicators().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword
spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun
is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have
processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g.
[[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate'
properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the
non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that
value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "allthree" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local is_noun = false
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if is_regular_noun(word_spec) then
multiword_spec.first_noun = j
is_noun = true
break
end
end
end
if is_noun then
alternant_multiword_spec.first_noun = i
end
elseif is_regular_noun(alternant_or_word_spec) then
alternant_multiword_spec.first_noun = i
return
end
end
end
-- Set the part of speech based on properties of the individual words.
local function set_pos(alternant_multiword_spec)
if alternant_multiword_spec.args.pos then
alternant_multiword_spec.pos = alternant_multiword_spec.args.pos
elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then
alternant_multiword_spec.pos = "pronoun"
elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then
alternant_multiword_spec.pos = "determiner"
elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then
alternant_multiword_spec.pos = "numeral"
else
alternant_multiword_spec.pos = "noun"
end
alternant_multiword_spec.plpos = require("Module:string utilities").pluralize(alternant_multiword_spec.pos)
end
local function normalize_all_lemmas(alternant_multiword_spec, pagename)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.orig_lemma_no_links = m_links.remove_links(base.lemma)
local lemma = base.orig_lemma_no_links
-- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it
-- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity.
-- FIXME: This may not make sense at all.
if uupper(lemma) == lemma then
base.all_uppercase = true
lemma = ulower(lemma)
end
base.actual_lemma = lemma
base.lemma = base.decllemma or lemma
end)
end
local function decline_noun(base)
for _, stems in ipairs(base.stem_sets) do
if not decls[base.decl] then
error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
end
decls[base.decl](base, stems)
end
handle_derived_slots_and_overrides(base)
local function copy(from_slot, to_slot)
base.forms[to_slot] = base.forms[from_slot]
end
if base.gender ~= "m" then
copy("nom_d", "acc_d")
end
copy("nom_d", "voc_d")
copy("dat_d", "loc_d")
copy("dat_d", "ins_d")
if base.actual_number ~= base.number then
local source_num = base.number == "sg" and "_s" or base.number == "du" and "_d" or "_p"
local dest_num = base.number == "sg" and {"_p", "_d"} or base.number == "du" and {"_s", "_p"} or {"_s", "_d"}
for case, _ in pairs(cases) do
copy(case .. source_num, case .. dest_num)
copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked")
end
if base.actual_number ~= "allthree" then
local erase_num = base.actual_number == "sg" and {"_d", "_p"} or base.actual_number == "du" and {"_s", "_p"} or {"_s", "_d"}
for case, _ in pairs(cases) do
base.forms[case .. erase_num] = nil
end
base.forms["nom" .. erase_num .. "_linked"] = nil
end
end
end
local function get_variants(form)
return nil
--[=[
FIXME
return
form:find(com.VAR1) and "var1" or
form:find(com.VAR2) and "var2" or
form:find(com.VAR3) and "var3" or
nil
]=]
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
local all_cats = {}
local function insert(cattype)
m_table.insertIfNot(all_cats, "Upper Sorbian " .. cattype)
end
if alternant_multiword_spec.pos == "noun" then
if alternant_multiword_spec.actual_number == "sg" then
insert("uncountable nouns")
elseif alternant_multiword_spec.actual_number == "du" then
insert("dualia tantum")
elseif alternant_multiword_spec.actual_number == "pl" then
insert("pluralia tantum")
end
end
local annotation
local annparts = {}
local decldescs = {}
local vowelalts = {}
local foreign = {}
local irregs = {}
local stemspecs = {}
local reducible = nil
local function get_genanim(gender, animacy)
local gender_code_to_desc = {
m = "masculine",
f = "feminine",
n = "neuter",
none = nil,
}
local animacy_code_to_desc = {
pr = "personal",
anml = "animal",
inan = "inanimate",
none = nil,
}
local descs = {}
table.insert(descs, gender_code_to_desc[gender])
if gender ~= "f" and gender ~= "n" then
-- masculine or "none" (e.g. certain pronouns and numerals)
table.insert(descs, animacy_code_to_desc[animacy])
end
return table.concat(descs, " ")
end
local function trim(text)
text = text:gsub(" +", " ")
return mw.text.trim(text)
end
local function do_word_spec(base)
local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy)
local declined_genanim = get_genanim(base.gender, base.animacy)
local genanim
genanim = actual_genanim
if base.actual_gender == "m" then
insert(actual_genanim .. " " .. alternant_multiword_spec.plpos)
end
for _, stems in ipairs(base.stem_sets) do
local props = declprops[base.decl]
local cats = props.cat
if type(cats) == "function" then
cats = cats(base, stems)
end
if type(cats) == "string" then
cats = {cats}
end
local default_desc
for i, cat in ipairs(cats) do
if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then
cat = cat
end
cat = cat:gsub("GENPOS", "GENDER POS")
if not cat:find("POS") then
cat = cat .. " POS"
end
if i == #cats then
default_desc = cat:gsub(" POS", "")
end
cat = cat:gsub("GENDER", actual_genanim)
cat = cat:gsub("POS", alternant_multiword_spec.plpos)
-- Need to trim `cat` because actual_genanim may be an empty string.
insert(trim(cat))
end
local desc = props.desc
if type(desc) == "function" then
desc = desc(base, stems)
end
desc = desc or default_desc
desc = desc:gsub("GENDER", genanim)
-- Need to trim `desc` because genanim may be an empty string.
m_table.insertIfNot(decldescs, trim(desc))
local vowelalt
if stems.vowelalt == "quant" then
vowelalt = "quant-alt"
insert("nouns with quantitative vowel alternation")
elseif stems.vowelalt == "quant-ě" then
vowelalt = "í-ě-alt"
insert("nouns with í-ě alternation")
end
if vowelalt then
m_table.insertIfNot(vowelalts, vowelalt)
end
if reducible == nil then
reducible = stems.reducible
end
if stems.reducible then
insert("nouns with reducible stem")
end
if base.foreign then
m_table.insertIfNot(foreign, "foreign")
if not base.decllemma then
-- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]].
insert("nouns with regular foreign declension")
end
end
-- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or
-- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular;
-- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'.
if base.decllemma then
m_table.insertIfNot(irregs, "irreg-stem")
insert("nouns with irregular stem")
end
m_table.insertIfNot(stemspecs, stems.vowel_stem)
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.actual_number == "sg" or alternant_multiword_spec.actual_number == "pl" or alternant_multiword_spec.actual_number == "du" then
-- not "allthree" or "none" (for [[sebe]])
table.insert(annparts, alternant_multiword_spec.actual_number == "sg" and "sg-only" or alternant_multiword_spec.actual_number == "du" and "du-only" or "pl-only")
end
if #decldescs == 0 then
table.insert(annparts, "indecl")
else
table.insert(annparts, table.concat(decldescs, " // "))
end
if #vowelalts > 0 then
table.insert(annparts, table.concat(vowelalts, "/"))
end
if reducible == "mixed" then
table.insert(annparts, "mixed-reducible")
elseif reducible then
table.insert(annparts, "reducible")
end
if #foreign > 0 then
table.insert(annparts, table.concat(foreign, " // "))
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stemspecs > 1 then
insert("nouns with multiple stems")
end
if alternant_multiword_spec.actual_number == "allthree" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then
insert("nouns that change gender in the plural")
end
alternant_multiword_spec.categories = all_cats
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
for _, slot in ipairs(potential_lemma_slots) do
if alternant_multiword_spec.forms[slot] then
for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do
-- FIXME, now can support footnotes as qualifiers in headwords?
table.insert(lemmas, formobj.form)
end
break
end
end
local props = {
lemmas = lemmas,
slot_table = alternant_multiword_spec.output_noun_slots,
lang = lang,
canonicalize = function(form)
-- return com.remove_variant_codes(form)
return form
end,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function template_prelude(min_width)
return rsub([=[
<div>
<div class="NavFrame" style="max-width: MINWIDTHem">
<div class="NavHead" style="background:#eff7ff">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:100%;display:table" class="inflection-table"
|-
]=], "MINWIDTH", min_width)
end
local function template_postlude()
return [=[
|{\cl}{notes_clause}</div></div></div>]=]
end
local table_spec_allthree = template_prelude("45") .. [=[
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | singular
! style="background:#d9ebff" | dual
! style="background:#d9ebff" | plural
|-
!style="background:#eff7ff"|nominative
| {nom_s}
| {nom_d}
| {nom_p}
|-
!style="background:#eff7ff"|genitive
| {gen_s}
| {gen_d}
| {gen_p}
|-
!style="background:#eff7ff"|dative
| {dat_s}
| {dat_d}
| {dat_p}
|-
!style="background:#eff7ff"|accusative
| {acc_s}
| {acc_d}
| {acc_p}
|-
!style="background:#eff7ff"|instrumental
| {ins_s}
| {ins_d}
| {ins_p}
|-
!style="background:#eff7ff"|locative
| {loc_s}
| {loc_d}
| {loc_p}
|-
!style="background:#eff7ff"|vocative
| {voc_s}
| {voc_d}
| {voc_p}
]=] .. template_postlude()
local function get_table_spec_one_number(number, numcode)
local table_spec_one_number = [=[
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | NUMBER
|-
!style="background:#eff7ff"|nominative
| {nom_CODE}
|-
!style="background:#eff7ff"|genitive
| {gen_CODE}
|-
!style="background:#eff7ff"|dative
| {dat_CODE}
|-
!style="background:#eff7ff"|accusative
| {acc_CODE}
|-
!style="background:#eff7ff"|vocative
| {voc_CODE}
|-
!style="background:#eff7ff"|locative
| {loc_CODE}
|-
!style="background:#eff7ff"|instrumental
| {ins_CODE}
]=]
return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local function get_table_spec_one_number_clitic(number, numcode)
local table_spec_one_number_clitic = [=[
! rowspan=2 style="width:33%;background:#d9ebff"|
! colspan=2 style="background:#d9ebff" | NUMBER
|-
! style="width:33%;background:#d9ebff" | stressed
! style="background:#d9ebff" | clitic
|-
!style="background:#eff7ff"|nominative
| colspan=2 | {nom_CODE}
|-
!style="background:#eff7ff"|genitive
| {gen_CODE}
| {clitic_gen_CODE}
|-
!style="background:#eff7ff"|dative
| {dat_CODE}
| {clitic_dat_CODE}
|-
!style="background:#eff7ff"|accusative
| {acc_CODE}
| {clitic_acc_CODE}
|-
!style="background:#eff7ff"|vocative
| colspan=2 | {voc_CODE}
|-
!style="background:#eff7ff"|locative
| colspan=2 | {loc_CODE}
|-
!style="background:#eff7ff"|instrumental
| colspan=2 | {ins_CODE}
]=]
return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'Declension of <i lang="hsb">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
end
local number, numcode
if alternant_multiword_spec.actual_number == "sg" then
number, numcode = "singular", "s"
elseif alternant_multiword_spec.actual_number == "du" then
number, numcode = "dual", "d"
elseif alternant_multiword_spec.actual_number == "pl" then
number, numcode = "plural", "p"
elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]]
number, numcode = "", "s"
end
local table_spec =
alternant_multiword_spec.actual_number == "allthree" and table_spec_allthree or
alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or
get_table_spec_one_number(number, numcode)
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
local function compute_headword_genders(alternant_multiword_spec)
local genders = {}
local number
if alternant_multiword_spec.actual_number == "pl" then
number = "-p"
elseif alternant_multiword_spec.actual_number == "du" then
number = "-d"
else
number = ""
end
iut.map_word_specs(alternant_multiword_spec, function(base)
local animacy = base.animacy
if animacy == "inan" then
animacy = "in"
end
m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number)
end)
return genders
end
-- Externally callable function to parse and decline a noun given user-specified arguments.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in
-- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the
-- slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, from_headword)
local params = {
[1] = {required = true, default = "žona<f>"},
title = {},
pagename = {},
json = {type = "boolean"},
pos = {},
}
if from_headword then
params["head"] = {list = true}
params["lemma"] = {list = true}
params["g"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["adj"] = {list = true}
params["dim"] = {list = true}
params["id"] = {}
end
local args = m_para.process(parent_args, params)
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
angle_brackets_omittable = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.args = args
local pagename = args.pagename or from_headword and args.head[1] or mw.title.getCurrentTitle().subpageText
normalize_all_lemmas(alternant_multiword_spec, pagename)
set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
-- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set
-- appropriately, which are needed to correctly synthesize the adjective lemma.
propagate_properties(alternant_multiword_spec, "animacy", "inan", "mixed")
propagate_properties(alternant_multiword_spec, "number", "allthree", "allthree")
-- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to
-- plural adjectives, where it didn't matter; but here, plural adjectives are distinguished for gender and
-- animacy. Make sure 'mixed' works.
propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed")
detect_all_indicator_specs(alternant_multiword_spec)
-- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives.
propagate_properties(alternant_multiword_spec, "actual_number", "allthree", "allthree")
determine_noun_status(alternant_multiword_spec)
set_pos(alternant_multiword_spec)
alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.actual_number, slot)
end,
slot_table = alternant_multiword_spec.output_noun_slots,
get_variants = get_variants,
inflect_word_spec = decline_noun,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec)
if args.json then
alternant_multiword_spec.args = nil
return require("Module:JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Entry point for {{hsb-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
-- JSON return value
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
return export