Module:User:Benwing2/la-nominal
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}
--[=[
Authorship: Ben Wing <benwing2>, with many ideas and a little code coming from
the old [[Module:la-decl-multi]] by KC Kenny Lau.
]=]
-- TODO:
-- (DONE) Eliminate specification of noteindex from la-adj/data
-- (DONE?) Finish autodetection of adjectives
-- (DONE) Remove old noun code
-- (DONE) Implement <.sufn>
-- (DONE) Look into adj voc=false
-- (DONE) Handle loc in adjectives
-- Error on bad subtypes
-- Make sure Google Books link still works.
-- (DONE) Make sure .sufn triggers insertion of 'with m -> n in compounds' in title.
-- (DONE) Make sure title returned to la-adj lowercases the first letter even with a custom title.
--[=[
TERMINOLOGY:
-- "slot" = A particular case/number combination (for nouns) or
case/number/gender combination (for adjectives). Example slot names are
"abl_sg" (for noun) or "acc_pl_f" (for adjectives). Each slot is filled
with zero or more forms.
-- "form" = The declined Latin form representing the value of a given slot.
For example, rēge is a form, representing the value of the abl_sg slot of
the lemma rēx.
-- "lemma" = The dictionary form of a given Latin term. For nouns, it's
generally the nominative singular, but will be the nominative plural of
plurale tantum nouns (e.g. [[castra]]), and may occasionally be another
form (e.g. the genitive singular) if the nominative singular is missing.
For adjectives, it's generally the masculine nominative singular, but
will be the masculine nominative plural of plurale tantum adjectives
(e.g. [[dēnī]]).
-- "plurale tantum" (plural "pluralia tantum") = A noun or adjective that
exists only in the plural. Examples are castra "army camp", faucēs "throat",
and dēnī "ten each" (used for counting pluralia tantum nouns).
-- "singulare tantum" (plural "singularia tantum") = A noun or adjective that
exists only in the singular. Examples are geōlogia "geology" (and in
general most non-count nouns) and the adjective ūnus "one".
]=]
local lang = require("Module:languages").getByCode("la")
local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local ut = require("Module:utils")
local m_string_utilities = require("Module:string utilities")
local m_para = require("Module:parameters")
local current_title = mw.title.getCurrentTitle()
local NAMESPACE = current_title.nsText
local PAGENAME = current_title.text
local m_noun_decl = require("Module:User:Benwing2/la-noun/data")
local m_noun_table = require("Module:la-noun/table")
local m_adj_decl = require("Module:User:Benwing2/la-adj/data")
local m_adj_table = require("Module:la-adj/table")
local m_la_utilities = require("Module:la-utilities")
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local uupper = mw.ustring.upper
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local ligatures = {
['Ae'] = 'Æ',
['ae'] = 'æ',
['Oe'] = 'Œ',
['oe'] = 'œ',
}
local cases = {
"nom", "gen", "dat", "acc", "abl", "voc", "loc"
}
local nums = {
"sg", "pl"
}
local genders = {
"m", "f", "n"
}
local irreg_noun_to_decl = {
["bōs"] = "3",
["cherub"] = "irreg",
["deus"] = "2",
["Deus"] = "2",
["domus"] = "4,2",
["Iēsus"] = "4",
["Jēsus"] = "4",
["iūgerum"] = "2,3",
["jūgerum"] = "2,3",
["sūs"] = "3",
["ēthos"] = "3",
["Athōs"] = "2",
["lexis"] = "3",
["vēnum"] = "4,2",
["vīs"] = "3",
}
local irreg_adj_to_decl = {
["duo"] = "irreg+",
["ambō"] = "irreg+",
["mīlle"] = "3-1+",
["plūs"] = "3-1+",
["is"] = "1&2+",
["īdem"] = "1&2+",
["ille"] = "1&2+",
["ipse"] = "1&2+",
["iste"] = "1&2+",
["quis"] = "irreg+",
["quī"] = "irreg+",
["quisquis"] = "irreg+",
}
local declension_to_english = {
["1"] = "first",
["2"] = "second",
["3"] = "third",
["4"] = "fourth",
["5"] = "fifth",
}
local number_to_english = {
"one", "two", "three", "four", "five"
}
local linked_prefixes = {
"", "linked_"
}
-- List of adjective slots for which we generate linked variants. Include
-- feminine and neuter variants because they will be needed if the adjective
-- is part of a multiword feminine or neuter noun.
local potential_adj_lemma_slots = {
"nom_sg_m",
"nom_pl_m",
"nom_sg_f",
"nom_pl_f",
"nom_sg_n",
"nom_pl_n"
}
local linked_to_non_linked_adj_slots = {}
for _, slot in ipairs(potential_adj_lemma_slots) do
linked_to_non_linked_adj_slots["linked_" .. slot] = slot
end
local potential_noun_lemma_slots = {
"nom_sg",
"nom_pl"
}
local linked_to_non_linked_noun_slots = {}
for _, slot in ipairs(potential_noun_lemma_slots) do
linked_to_non_linked_noun_slots["linked_" .. slot] = slot
end
-- Iterate over all the "slots" associated with a noun declension, where a slot
-- is a particular case/number combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg, linked_nom_pl), which aren't overridable.
local function iter_noun_slots(overridable_only)
local case = 1
local num = 1
local linked_variant = 0
local function iter()
linked_variant = linked_variant + 1
local max_linked_variant = overridable_only and 1 or cases[case] == "nom" and 2 or 1
if linked_variant > max_linked_variant then
linked_variant = 1
num = num + 1
if num > #nums then
num = 1
case = case + 1
if case > #cases then
return nil
end
end
end
return linked_prefixes[linked_variant] .. cases[case] .. "_" .. nums[num]
end
return iter
end
-- Iterate over all the "slots" associated with an adjective declension, where a slot
-- is a particular case/number/gender combination. If overridable_only, don't include the
-- "linked_" variants (linked_nom_sg_m, linked_nom_pl_m, etc.), which aren't overridable.
local function iter_adj_slots(overridable_only)
local case = 1
local num = 1
local gen = 1
local linked_variant = 0
local function iter()
linked_variant = linked_variant + 1
local max_linked_variant = overridable_only and 1 or cases[case] == "nom" and genders[gen] == "m" and 2 or 1
if linked_variant > max_linked_variant then
linked_variant = 1
gen = gen + 1
if gen > #genders then
gen = 1
num = num + 1
if num > #nums then
num = 1
case = case + 1
if case > #cases then
return nil
end
end
end
end
return linked_prefixes[linked_variant] .. cases[case] .. "_" .. nums[num] .. "_" .. genders[gen]
end
return iter
end
-- Iterate over all the "slots" associated with a noun or adjective declension (depending on
-- the value of IS_ADJ), where a slot is a particular case/number combination (in the case of
-- nouns) or case/number/gender combination (in the case of adjectives). If OVERRIDABLE_ONLY
-- is specified, only include overridable slots (not including linked_ variants).
local function iter_slots(is_adj, overridable_only)
if is_adj then
return iter_adj_slots(overridable_only)
else
return iter_noun_slots(overridable_only)
end
end
local function concat_forms_in_slot(forms)
if forms and forms ~= "" and forms ~= "—" and #forms > 0 then
local new_vals = {}
for _, v in ipairs(forms) do
table.insert(new_vals, rsub(v, "|", "<!>"))
end
return table.concat(new_vals, ",")
else
return nil
end
end
local function glossary_link(anchor, text)
text = text or anchor
return "[[Appendix:Glossary#" .. anchor .. "|" .. text .. "]]"
end
local function track(page)
require("Module:debug").track("la-nominal/" .. page)
return true
end
local function set_union(sets)
local union = {}
for _, set in ipairs(sets) do
for key, _ in pairs(set) do
union[key] = true
end
end
return union
end
local function set_difference(set1, set2)
local diff = {}
for key, _ in pairs(set1) do
if not set2[key] then
diff[key] = true
end
end
return diff
end
local function process_noun_forms_and_overrides(data, args)
local redlink = false
-- Process overrides and canonicalize forms.
for slot in iter_noun_slots() do
local val = nil
if args[slot] then
val = args[slot]
data.user_specified[slot] = true
else
-- Overridding nom_sg etc. should override linked_nom_sg so that
-- the correct value gets displayed in the headword, which uses
-- linked_nom_sg.
local non_linked_equiv_slot = linked_to_non_linked_noun_slots[slot]
if non_linked_equiv_slot and args[non_linked_equiv_slot] then
val = args[non_linked_equiv_slot]
data.user_specified[slot] = true
else
val = data.forms[slot]
end
end
if val then
if type(val) == "string" then
val = mw.text.split(val, "/")
end
if (data.num == "pl" and slot:find("sg")) or (data.num == "sg" and slot:find("pl")) then
data.forms[slot] = ""
elseif val[1] == "" or val[1] == "-" or val[1] == "—" then
data.forms[slot] = "—"
else
data.forms[slot] = val
end
end
end
-- Compute the lemma for accelerators. Do this after processing
-- overrides in case we overrode the lemma form(s).
local accel_lemma
if data.num and data.num ~= "" then
accel_lemma = data.forms["nom_" .. data.num]
else
accel_lemma = data.forms["nom_sg"]
end
if type(accel_lemma) == "table" then
accel_lemma = accel_lemma[1]
end
-- Set the accelerators, and determine if there are red links.
for slot in iter_noun_slots() do
local val = data.forms[slot]
if val and val ~= "" and val ~= "—" and #val > 0 then
for i, form in ipairs(val) do
local accel_form = slot
accel_form = accel_form:gsub("_([sp])[gl]$", "|%1")
data.accel[slot] = {form = accel_form, lemma = accel_lemma}
if not redlink and NAMESPACE == '' then
local title = lang:makeEntryName(form)
local t = mw.title.new(title)
if t and not t.exists then
table.insert(data.categories, "Latin " .. data.pos .. " with red links in their declension tables")
redlink = true
end
end
end
end
end
end
local function process_adj_forms_and_overrides(data, args)
local redlink = false
-- Process overrides and canonicalize forms.
for slot in iter_adj_slots() do
-- If noneut=1 passed, clear out all neuter forms.
if data.noneut and slot:find("_n") then
data.forms[slot] = nil
end
local val = nil
if args[slot] then
val = args[slot]
data.user_specified[slot] = true
else
-- Overridding nom_sg_m etc. should override linked_nom_sg_m so that
-- the correct value gets displayed in the headword, which uses
-- linked_nom_sg_m.
local non_linked_equiv_slot = linked_to_non_linked_adj_slots[slot]
if non_linked_equiv_slot and args[non_linked_equiv_slot] then
val = args[non_linked_equiv_slot]
data.user_specified[slot] = true
else
val = data.forms[slot]
end
end
if val then
if type(val) == "string" then
val = mw.text.split(val, "/")
end
if (data.num == "pl" and slot:find("sg")) or (data.num == "sg" and slot:find("pl")) then
data.forms[slot] = ""
elseif val[1] == "" or val[1] == "-" or val[1] == "—" then
data.forms[slot] = "—"
else
data.forms[slot] = val
end
end
end
-- Compute the lemma for accelerators. Do this after processing
-- overrides in case we overrode the lemma form(s).
local accel_lemma, accel_lemma_f
if data.num and data.num ~= "" then
accel_lemma = data.forms["nom_" .. data.num .. "_m"]
accel_lemma_f = data.forms["nom_" .. data.num .. "_f"]
else
accel_lemma = data.forms["nom_sg_m"]
accel_lemma_f = data.forms["nom_sg_f"]
end
if type(accel_lemma) == "table" then
accel_lemma = accel_lemma[1]
end
if type(accel_lemma_f) == "table" then
accel_lemma_f = accel_lemma_f[1]
end
-- Set the accelerators, and determine if there are red links.
for slot in iter_adj_slots() do
local val = data.forms[slot]
if val and val ~= "" and val ~= "—" and #val > 0 then
for i, form in ipairs(val) do
local accel_form = slot
accel_form = accel_form:gsub("_([sp])[gl]_", "|%1|")
if data.noneut then
-- If noneut=1, we're being asked to do a noun like
-- Aquītānus or Rōmānus that has masculine and feminine
-- variants, not an adjective. In that case, make the
-- accelerators correspond to nominal case/number forms
-- without the gender, and use the feminine as the
-- lemma for feminine forms.
if slot:find("_f") then
data.accel[slot] = {form = accel_form:gsub("|f$", ""), lemma = accel_lemma_f}
else
data.accel[slot] = {form = accel_form:gsub("|m$", ""), lemma = accel_lemma}
end
else
if not data.forms.nom_sg_n and not data.forms.nom_pl_n then
-- use multipart tags if called for
accel_form = accel_form:gsub("|m$", "|m//f//n")
elseif not data.forms.nom_sg_f and not data.forms.nom_pl_f then
accel_form = accel_form:gsub("|m$", "|m//f")
end
-- use the order nom|m|s, which is more standard than nom|s|m
accel_form = accel_form:gsub("|(.-)|(.-)$", "|%2|%1")
data.accel[slot] = {form = accel_form, lemma = accel_lemma}
end
if not redlink and NAMESPACE == '' then
local title = lang:makeEntryName(form)
local t = mw.title.new(title)
if t and not t.exists then
table.insert(data.categories, "Latin " .. data.pos .. " with red links in their declension tables")
redlink = true
end
end
end
end
end
-- See if the masculine and feminine/neuter are the same across all slots.
-- If so, blank out the feminine/neuter so we use a table that combines
-- masculine and feminine, or masculine/feminine/neuter.
for _, gender in ipairs({"f", "n"}) do
local other_is_masc = true
for _, case in ipairs(cases) do
for _, num in ipairs(nums) do
if not ut.equals(data.forms[case .. "_" .. num .. "_" .. gender],
data.forms[case .. "_" .. num .. "_m"]) then
other_is_masc = false
break
end
end
if not other_is_masc then
break
end
end
if other_is_masc then
for _, case in ipairs(cases) do
for _, num in ipairs(nums) do
data.forms[case .. "_" .. num .. "_" .. gender] = nil
end
end
end
end
end
-- Convert data.forms[slot] for all slots into displayable text. This is
-- an older function, still currently used for nouns but not for adjectives.
-- For adjectives, the adjective table module has special code to combine
-- adjacent slots, and needs the original forms plus other text that will
-- go into the displayable text for the slot; this is handled below by
-- partial_show_forms() and finish_show_form().
local function show_forms(data, is_adj)
local noteindex = 1
local notes = {}
local seen_notes = {}
for slot in iter_slots(is_adj) do
local val = data.forms[slot]
if val and val ~= "" and val ~= "—" then
for i, form in ipairs(val) do
local link = m_links.full_link({lang = lang, term = form, accel = data.accel[slot]})
local this_notes = data.notes[slot .. i]
if this_notes and not data.user_specified[slot] then
if type(this_notes) == "string" then
this_notes = {this_notes}
end
local link_indices = {}
for _, this_note in ipairs(this_notes) do
local this_noteindex = seen_notes[this_note]
if not this_noteindex then
-- Generate a footnote index.
this_noteindex = noteindex
noteindex = noteindex + 1
table.insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
seen_notes[this_note] = this_noteindex
end
ut.insert_if_not(link_indices, this_noteindex)
end
val[i] = link .. '<sup style="color: red">' .. table.concat(link_indices, ",") .. '</sup>'
else
val[i] = link
end
end
-- FIXME, do we want this difference?
data.forms[slot] = table.concat(val, is_adj and ", " or "<br />")
end
end
data.footnote = table.concat(notes, "<br />") .. data.footnote
end
-- Generate the display form for a set of slots with identical content. We
-- verify that the slots are actually identical, and throw an assertion error
-- if not. The display form is as in show_forms() but combines together all the
-- accelerator forms for all the slots.
local function finish_show_form(data, slots, is_adj)
assert(#slots > 0)
local slot1 = slots[1]
local forms = data.forms[slot1]
local notetext = data.notetext[slot1]
for _, slot in ipairs(slots) do
if not ut.equals(data.forms[slot], forms) then
error("data.forms[" .. slot1 .. "] = " .. (concat_forms_in_slot(forms) or "nil") ..
", but data.forms[" .. slot .. "] = " .. (concat_forms_in_slot(data.forms[slot]) or "nil"))
end
assert(ut.equals(data.notetext[slot], notetext))
end
if not forms then
return "—"
else
local accel_forms = {}
local accel_lemma = data.accel[slot1].lemma
for _, slot in ipairs(slots) do
assert(data.accel[slot].lemma == accel_lemma)
table.insert(accel_forms, data.accel[slot].form)
end
local combined_accel_form = table.concat(accel_forms, "|;|")
local accel = {form = combined_accel_form, lemma = accel_lemma}
local formtext = {}
for i, form in ipairs(forms) do
table.insert(formtext, m_links.full_link({lang = lang, term = form, accel = accel}) .. notetext[i])
end
-- FIXME, do we want this difference?
return table.concat(formtext, is_adj and ", " or "<br />")
end
end
-- Used by the adjective table module. This does some of the work of
-- show_forms(); in particular, it converts all empty forms of any format
-- (nil, "", "—") to nil and, if the forms aren't empty, generates the footnote
-- text associated with each form.
local function partial_show_forms(data, is_adj)
local noteindex = 1
local notes = {}
local seen_notes = {}
data.notetext = {}
-- Store this function in DATA so that it can be called from the adjective
-- table module without needing to require this module, which will (or
-- could) lead to recursive module requiring.
data.finish_show_form = finish_show_form
for slot in iter_slots(is_adj) do
local val = data.forms[slot]
if not val or val == "" or val == "—" then
data.forms[slot] = nil
else
local notetext = {}
for i, form in ipairs(val) do
local this_notes = data.notes[slot .. i]
if this_notes and not data.user_specified[slot] then
if type(this_notes) == "string" then
this_notes = {this_notes}
end
local link_indices = {}
for _, this_note in ipairs(this_notes) do
local this_noteindex = seen_notes[this_note]
if not this_noteindex then
-- Generate a footnote index.
this_noteindex = noteindex
noteindex = noteindex + 1
table.insert(notes, '<sup style="color: red">' .. this_noteindex .. '</sup>' .. this_note)
seen_notes[this_note] = this_noteindex
end
ut.insert_if_not(link_indices, this_noteindex)
end
table.insert(notetext, '<sup style="color: red">' .. table.concat(link_indices, ",") .. '</sup>')
else
table.insert(notetext, "")
end
end
data.notetext[slot] = notetext
end
end
data.footnote = table.concat(notes, "<br />") .. data.footnote
end
local function make_noun_table(data)
if data.num == "sg" then
return m_noun_table.make_table_sg(data)
elseif data.num == "pl" then
return m_noun_table.make_table_pl(data)
else
return m_noun_table.make_table(data)
end
end
local function concat_forms(data, is_adj, include_props)
local ins_text = {}
for slot in iter_slots(is_adj) do
local formtext = concat_forms_in_slot(data.forms[slot])
if formtext then
table.insert(ins_text, slot .. "=" .. formtext)
end
end
if include_props then
if data.gender then
table.insert(ins_text, "g=" .. mw.ustring.lower(data.gender))
end
local num = data.num
if not num or num == "" then
num = "both"
end
table.insert(ins_text, "num=" .. num)
end
return table.concat(ins_text, "|")
end
-- Given an ending (or possibly a full regex matching the entire lemma, if
-- a regex group is present), return the base minus the ending, or nil if
-- the ending doesn't match.
local function extract_base(lemma, ending)
if ending:find("%(") then
return rmatch(lemma, ending)
else
return rmatch(lemma, "^(.*)" .. ending .. "$")
end
end
-- Given ENDINGS_AND_SUBTYPES (a list of pairs of endings with associated
-- subtypes, where each pair consists of a single ending spec and a list of
-- subtypes), check each ending in turn against LEMMA. If it matches, return
-- the pair BASE, STEM2, SUBTYPES where BASE is the remainder of LEMMA minus
-- the ending, STEM2 is as passed in, and SUBTYPES is the subtypes associated
-- with the ending. But don't return SUBTYPES if any of the subtypes in the
-- list is specifically canceled in SPECIFIED_SUBTYPES (a set, i.e. a table
-- where the keys are strings and the value is always true); instead, consider
-- the next ending in turn. If no endings match, throw an error if DECLTYPE is
-- non-nil, mentioning the DECLTYPE (the user-specified declension); but if
-- DECLTYPE is nil, just return nil, nil, nil.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
-- the beginning with ^ and at the end with $), and contains a single
-- capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
-- SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
-- STEM2_ENDING is a string specifying the corresponding ending that must
-- be present in STEM2. If this form is used, the combination of
-- base + STEM2_ENDING must exactly match STEM2 in order for this entry
-- to be considered a match. An example is {"is", ""}, which will match
-- lemma == "follis", stem2 == "foll", but not lemma == "lapis",
-- stem2 == "lapid".
local function get_noun_subtype_by_ending(lemma, stem2, decltype, specified_subtypes,
endings_and_subtypes)
for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
local ending = ending_and_subtypes[1]
local subtypes = ending_and_subtypes[2]
not_this_subtype = false
if specified_subtypes.pl and not ut.contains(subtypes, "pl") then
-- We now require that plurale tantum terms specify a plural-form lemma.
-- The autodetected subtypes will include 'pl' for such lemmas; if not,
-- we fail this entry.
not_this_subtype = true
else
for _, subtype in ipairs(subtypes) do
-- A subtype is directly canceled by specifying -SUBTYPE.
-- In addition, M or F as a subtype is canceled by N, and
-- vice-versa, but M doesn't cancel F or vice-versa; instead,
-- we simply ignore the conflicting gender specification when
-- constructing the combination of specified and inferred subtypes.
-- The reason for this is that neuters have distinct declensions
-- from masculines and feminines, but masculines and feminines have
-- the same declension, and various nouns in Latin that are
-- normally masculine are exceptionally feminine and vice-versa
-- (nauta, agricola, fraxinus, malus "apple tree", manus, rēs,
-- etc.).
--
-- In addition, sg as a subtype is canceled by pl and vice-versa.
-- It's also possible to specify both, which will override sg but
-- not cancel it (in the sense that it won't prevent the relevant
-- rule from matching). For example, there's a rule specifying that
-- lemmas beginning with a capital letter and ending in -ius take
-- the ius.voci.sg subtypes. Specifying such a lemma with the
-- subtype both will result in the ius.voci.both subtypes, whereas
-- specifying such a lemma with the subtype pl will cause this rule
-- not to match, and it will fall through to a less specific rule
-- that returns just the ius subtype, which will be combined with
-- the explicitly specified pl subtype to produce ius.pl.
if specified_subtypes["-" .. subtype] or
subtype == "N" and (specified_subtypes.M or specified_subtypes.F) or
(subtype == "M" or subtype == "F") and specified_subtypes.N or
subtype == "sg" and specified_subtypes.pl or
subtype == "pl" and specified_subtypes.sg then
not_this_subtype = true
break
end
end
end
if not not_this_subtype then
if type(ending) == "table" then
local lemma_ending = ending[1]
local stem2_ending = ending[2]
local base = extract_base(lemma, lemma_ending)
if base and base .. stem2_ending == stem2 then
return base, stem2, subtypes
end
else
local base = extract_base(lemma, ending)
if base then
return base, stem2, subtypes
end
end
end
end
if decltype then
error("Unrecognized ending for declension-" .. decltype .. " noun: " .. lemma)
end
return nil, nil, nil
end
-- Autodetect the subtype of a noun given all the information specified by the
-- user: lemma, stem2, declension type and specified subtypes. Three values are
-- returned: the lemma base (i.e. the stem of the lemma, as required by the
-- declension functions), the new stem2 and the autodetected subtypes. Note
-- that this will not detect a given subtype if the explicitly specified
-- subtypes are incompatible (i.e. if -SUBTYPE is specified for any subtype
-- that would be returned; or if M or F is specified when N would be returned,
-- and vice-versa; or if pl is specified when sg would be returned, and
-- vice-versa).
--
-- NOTE: This function has intimate knowledge of the way that the declension
-- functions handle subtypes, particularly for the third declension.
local function detect_noun_subtype(lemma, stem2, typ, subtypes)
local base, ending
if typ == "1" then
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"ām", {"F", "am"}},
{"ās", {"M", "Greek", "Ma"}},
{"ēs", {"M", "Greek", "Me"}},
{"ē", {"F", "Greek"}},
{"ae", {"F", "pl"}},
{"a", {"F"}},
})
elseif typ == "2" then
local detected_subtypes
lemma, stem2, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(.*r)$", {"M", "er"}},
{"^(.*v)os$", {"M", "vos"}},
{"os", {"M", "Greek"}},
{"on", {"N", "Greek"}},
-- -ius beginning with a capital letter is assumed a proper name,
-- and takes the voci subtype (vocative in -ī) along with the ius
-- subtype and sg-only. Other nouns in -ius just take the ius
-- subtype. Explicitly specify "sg" so that if .pl is given,
-- this rule won't apply.
{"^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*)ius$", {"M", "ius", "voci", "sg"}},
{"ius", {"M", "ius"}},
{"ium", {"N", "ium"}},
-- If the lemma ends in -us and the user said N or -M, then the
-- following won't apply, and the second (neuter) -us will applly.
{"us", {"M"}},
{"us", {"N", "us"}},
{"um", {"N"}},
{"iī", {"M", "ius", "pl"}},
{"ia", {"N", "ium", "pl"}},
-- If the lemma ends in -ī and the user said N or -M, then the
-- following won't apply, and the second (neuter) -ī will applly.
{"ī", {"M", "pl"}},
{"ī", {"N", "us", "pl"}},
{"a", {"N", "pl"}},
})
stem2 = stem2 or lemma
return lemma, stem2, detected_subtypes
elseif typ == "3" then
if subtypes.pl then
if subtypes.Greek then
base = rmatch(lemma, "^(.*)erēs$")
if base then
return base .. "ēr", base .. "er", {"er"}
end
base = rmatch(lemma, "^(.*)ontēs$")
if base then
return base .. "ōn", base .. "ont", {"on"}
end
base = rmatch(lemma, "^(.*)es$")
if base then
return "foo", stem2 or base, {}
end
error("Unrecognized ending for declension-3 plural Greek noun: " .. lemma)
end
base = rmatch(lemma, "^(.*)ia$")
if base then
return "foo", stem2 or base, {"N", "I", "pure"}
end
base = rmatch(lemma, "^(.*)a$")
if base then
return "foo", stem2 or base, {"N"}
end
base = rmatch(lemma, "^(.*)ēs$")
if base then
return "foo", stem2 or base, {}
end
error("Unrecognized ending for declension-3 plural noun: " .. lemma)
end
stem2 = stem2 or m_la_utilities.make_stem2(lemma)
local detected_subtypes
if subtypes.Greek then
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"is", ""}, {"I"}},
{"ēr", {"er"}},
{"ōn", {"on"}},
})
if base then
return lemma, stem2, detected_subtypes
end
return lemma, stem2, {}
end
if not subtypes.N then
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*pol)is$", ""}, {"F", "polis", "sg", "loc"}},
{{"tūdō", "tūdin"}, {"F"}},
{{"tās", "tāt"}, {"F"}},
{{"tūs", "tūt"}, {"F"}},
{{"tiō", "tiōn"}, {"F"}},
{{"siō", "siōn"}, {"F"}},
{{"xiō", "xiōn"}, {"F"}},
{{"gō", "gin"}, {"F"}},
{{"or", "ōr"}, {"M"}},
{{"trīx", "trīc"}, {"F"}},
{{"trix", "trīc"}, {"F"}},
{{"is", ""}, {"I"}},
{{"^([a-zāēīōūȳăĕĭŏŭ].*)ēs$", ""}, {"I"}},
})
if base then
return lemma, stem2, detected_subtypes
end
end
base, _, detected_subtypes = get_noun_subtype_by_ending(lemma, stem2, nil, subtypes, {
{{"us", "or"}, {"N"}},
{{"us", "er"}, {"N"}},
{{"ma", "mat"}, {"N"}},
{{"men", "min"}, {"N"}},
{{"^([A-ZĀĒĪŌŪȲĂĔĬŎŬ].*)e$", ""}, {"N", "sg"}},
{{"e", ""}, {"N", "I", "pure"}},
{{"al", "āl"}, {"N", "I", "pure"}},
{{"ar", "ār"}, {"N", "I", "pure"}},
})
if base then
return lemma, stem2, detected_subtypes
end
return lemma, stem2, {}
elseif typ == "4" then
if subtypes.echo or subtypes.argo or subtypes.Callisto then
base = rmatch(lemma, "^(.*)ō$")
if not base then
error("Declension-4 noun of subtype .echo, .argo or .Callisto should end in -ō: " .. lemma)
end
if subtypes.Callisto then
return base, nil, {"F", "sg"}
else
return base, nil, {"F"}
end
end
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"us", {"M"}},
{"ū", {"N"}},
{"ūs", {"M", "pl"}},
{"ua", {"N", "pl"}},
})
elseif typ == "5" then
return get_noun_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"iēs", {"F", "i"}},
{"iēs", {"F", "i", "pl"}},
{"ēs", {"F"}},
{"ēs", {"F", "pl"}},
})
elseif typ == "irreg" and lemma == "domus" then
-- [[domus]] auto-sets data.loc = true, but we need to know this
-- before declining the noun so we can propagate it to other segments.
return lemma, nil, {"loc"}
elseif typ == "indecl" or typ == "irreg" and (
lemma == "Deus" or lemma == "Iēsus" or lemma == "Jēsus" or
lemma == "Athōs" or lemma == "vēnum"
) then
-- Indeclinable nouns, and certain irregular nouns, set data.num = "sg",
-- but we need to know this before declining the noun so we can
-- propagate it to other segments.
return lemma, nil, {"sg"}
else
return lemma, nil, {}
end
end
function export.detect_noun_subtype(frame)
local params = {
[1] = {required = true},
[2] = {},
[3] = {},
[4] = {},
}
local args = m_para.process(frame.args, params)
local specified_subtypes = {}
if args[4] then
for _, subtype in ipairs(rsplit(args[4], ".")) do
specified_subtypes[subtype] = true
end
end
local base, stem2, subtypes = detect_noun_subtype(args[1], args[2], args[3], specified_subtypes)
return base .. "|" .. (stem2 or "") .. "|" .. table.concat(subtypes, ".")
end
-- Given ENDINGS_AND_SUBTYPES (a list of four-tuples of ENDING, RETTYPE,
-- SUBTYPES, PROCESS_RETVAL), check each ENDING in turn against LEMMA and
-- STEM2. If it matches, return a four-tuple BASE, STEM2, RETTYPE, NEW_SUBTYPES
-- where BASE is normally the remainder of LEMMA minus the ending, STEM2 is
-- as passed in, RETTYPE is as passed in, and NEW_SUBTYPES is the same as
-- SUBTYPES minus any subtypes beginning with a hyphen. If no endings match,
-- throw an error if DECLTYPPE is non-nil, mentioning the DECLTYPE
-- (user-specified declension); but if DECLTYPE is nil, just return the tuple
-- nil, nil, nil, nil.
--
-- In order for a given entry to match, ENDING must match and also the subtypes
-- in SUBTYPES (a list) must not be incompatible with the passed-in
-- user-specified subtypes SPECIFIED_SUBTYPES (a set, i.e. a table where the
-- keys are strings and the value is always true). "Incompatible" means that
-- a given SUBTYPE is specified in either one and -SUBTYPE in the other, or
-- that "pl" is found in SPECIFIED_SUBTYPES and not in SUBTYPES.
--
-- The ending spec in ENDINGS_AND_SUBTYPES is one of the following:
--
-- 1. A simple string, e.g. "tūdō", specifying an ending.
-- 2. A regex that should match the entire lemma (it should be anchored at
-- the beginning with ^ and at the end with $), and contains a single
-- capturing group to match the base.
-- 3. A pair {SIMPLE_STRING_OR_REGEX, STEM2_ENDING} where
-- SIMPLE_STRING_OR_REGEX is one of the previous two possibilities and
-- STEM2_ENDING is a string specifying the corresponding ending that must
-- be present in STEM2. If this form is used, the combination of
-- base + STEM2_ENDING must exactly match STEM2 in order for this entry
-- to be considered a match. An example is {"is", ""}, which will match
-- lemma == "follis", stem2 == "foll", but not lemma == "lapis",
-- stem2 == "lapid".
--
-- If PROCESS_STEM2 is given and the returned STEM2 would be nil, call
-- process_stem2(BASE) to get the STEM2 to return.
local function get_adj_type_and_subtype_by_ending(lemma, stem2, decltype,
specified_subtypes, endings_and_subtypes, process_stem2)
for _, ending_and_subtypes in ipairs(endings_and_subtypes) do
local ending = ending_and_subtypes[1]
local rettype = ending_and_subtypes[2]
local subtypes = ending_and_subtypes[3]
local process_retval = ending_and_subtypes[4]
not_this_subtype = false
if specified_subtypes.pl and not ut.contains(subtypes, "pl") then
-- We now require that plurale tantum terms specify a plural-form lemma.
-- The autodetected subtypes will include 'pl' for such lemmas; if not,
-- we fail this entry.
not_this_subtype = true
else
for _, subtype in ipairs(subtypes) do
-- A subtype is directly canceled by specifying -SUBTYPE.
if specified_subtypes["-" .. subtype] then
not_this_subtype = true
break
end
-- A subtype is canceled if the user specified SUBTYPE and
-- -SUBTYPE is given in the to-be-returned subtypes.
local must_not_be_present = rmatch(subtype, "^%-(.*)$")
if must_not_be_present and specified_subtypes[must_not_be_present] then
not_this_subtype = true
break
end
end
end
if not not_this_subtype then
local base
if type(ending) == "table" then
local lemma_ending = ending[1]
local stem2_ending = ending[2]
base = extract_base(lemma, lemma_ending)
if base and base .. stem2_ending ~= stem2 then
base = nil
end
else
base = extract_base(lemma, ending)
end
if base then
-- Remove subtypes of the form -SUBTYPE from the subtypes
-- to be returned.
local new_subtypes = {}
for _, subtype in ipairs(subtypes) do
if not rfind(subtype, "^%-") then
table.insert(new_subtypes, subtype)
end
end
if process_retval then
base, stem2 = process_retval(base, stem2)
end
if process_stem2 then
stem2 = stem2 or process_stem2(base)
end
return base, stem2, rettype, new_subtypes
end
end
end
if not decltype then
return nil, nil, nil, nil
elseif decltype == "" then
error("Unrecognized ending for adjective: " .. lemma)
else
error("Unrecognized ending for declension-" .. decltype .. " adjective: " .. lemma)
end
end
-- Autodetect the type and subtype of an adjective given all the information
-- specified by the user: lemma, stem2, declension type and specified subtypes.
-- Four values are returned: the lemma base (i.e. the stem of the lemma, as
-- required by the declension functions), the value of stem2 to pass to the
-- declension function, the declension type and the autodetected subtypes.
-- Note that this will not detect a given subtype if -SUBTYPE is specified for
-- any subtype that would be returned, or if SUBTYPE is specified and -SUBTYPE
-- is among the subtypes that would be returned (such subtypes are filtered out
-- of the returned subtypes).
local function detect_adj_type_and_subtype(lemma, stem2, typ, subtypes)
if not rfind(typ, "^[0123]") and not rfind(typ, "^irreg") then
subtypes = mw.clone(subtypes)
subtypes[typ] = true
typ = ""
end
local function base_as_stem2(base, stem2)
return "foo", base
end
local function constant_base(baseval)
return function(base, stem2)
return baseval, nil
end
end
local function decl12_stem2(base)
return base
end
local function decl3_stem2(base)
return m_la_utilities.make_stem2(base)
end
local decl12_entries = {
{"us", "1&2", {}},
{"a", "1&2", {}},
{"um", "1&2", {}},
{"ī", "1&2", {"pl"}},
{"ae", "1&2", {"pl"}},
{"a", "1&2", {"pl"}},
-- Nearly all -os adjectives are greekA
{"os", "1&2", {"greekA", "-greekE"}},
{"os", "1&2", {"greekE", "-greekA"}},
{"ē", "1&2", {"greekE", "-greekA"}},
{"on", "1&2", {"greekA", "-greekE"}},
{"on", "1&2", {"greekE", "-greekA"}},
{"^(.*er)$", "1&2", {"er"}},
{"^(.*ur)$", "1&2", {"er"}},
{"^(h)ic$", "1&2", {"ic"}},
}
local decl3_entries = {
{"^(.*er)$", "3-3", {}},
{"is", "3-2", {}},
{"e", "3-2", {}},
{"^(.*[ij])or$", "3-C", {}},
{"^(min)or$", "3-C", {}},
-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified (which
-- won't work for adjectives like quadripēs, volucripēs).
-- Essentially, for declension-3 adjectives, we require that
-- .pl is given if the lemma is plural.
--
-- Most 3-1 adjectives are i-stem (e.g. audāx) so we require -I
-- to be given with non-i-stem adjectives. The first entry below
-- will apply when -I isn't given, the second when it is given.
{"^(.*ēs)$", "3-1", {"I"}},
{"^(.*ēs)$", "3-1", {"par"}},
{"^(.*[ij])ōrēs$", "3-C", {"pl"}},
{"^(min)ōrēs$", "3-C", {"pl"}},
-- If .pl with -ēs, we don't know if the adjective is 3-1, 3-2
-- or 3-3. Since 3-2 is probably the most common, we infer it
-- (as well as the fact that these adjectives *are* in a sense
-- 3-2 since they have a distinct neuter in -(i)a. Note that
-- we have two entries here; the first one will apply unless
-- -I is given, and will infer an i-stem adjective; the second
-- one will apply otherwise (and infer a non-i-stem 3-1 adjective).
{"ēs", "3-2", {"pl", "I"}},
{"ēs", "3-1", {"pl", "par"}, base_as_stem2},
-- Same for neuters.
{"ia", "3-2", {"pl", "I"}},
{"a", "3-1", {"pl", "par"}, base_as_stem2},
-- As above for -ēs but for miscellaneous singulars.
{"", "3-1", {"I"}},
{"", "3-1", {"par"}},
}
if typ == "" then
local base, new_stem2, rettype, new_subtypes =
get_adj_type_and_subtype_by_ending(lemma, stem2, nil, subtypes,
decl12_entries, decl12_stem2)
if base then
return base, new_stem2, rettype, new_subtypes
else
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ,
subtypes, decl3_entries, decl3_stem2)
end
elseif typ == "0" then
return lemma, nil, "0", {}
elseif typ == "3" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes,
decl3_entries, decl3_stem2)
elseif typ == "1&2" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes,
decl12_entries, decl12_stem2)
elseif typ == "1-1" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"a", "1-1", {}},
{"ae", "1-1", {"pl"}},
})
elseif typ == "2-2" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"us", "2-2", {}},
{"um", "2-2", {}},
{"ī", "2-2", {"pl"}},
{"a", "2-2", {"pl"}},
{"os", "2-2", {"greek"}},
{"on", "2-2", {"greek"}},
{"oe", "2-2", {"greek", "pl"}},
})
elseif typ == "3-1" then
-- This will cancel out the I if -I is specified in subtypes, and the
-- resulting lack of I will get converted to "par".
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
-- Detect -ēs as 3-1 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified.
-- Essentially, for declension-3 adjectives, we require that
-- .pl is given if the lemma is plural.
-- We have two entries here; the first one will apply unless
-- -I is given, and will infer an i-stem adjective; the second
-- one will apply otherwise.
{"^(.*ēs)$", "3-1", {"I"}},
{"^(.*ēs)$", "3-1", {"par"}},
{"ēs", "3-1", {"pl", "I"}, base_as_stem2},
{"ēs", "3-1", {"pl", "par"}, base_as_stem2},
{"ia", "3-1", {"pl", "I"}, base_as_stem2},
{"a", "3-1", {"pl", "par"}, base_as_stem2},
{"", "3-1", {"I"}},
{"", "3-1", {"par"}},
}, decl3_stem2)
elseif typ == "3-2" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"is", "3-2", {}},
{"e", "3-2", {}},
-- Detect -ēs as 3-2 without auto-inferring .pl if .pl
-- not specified. If we don't do this, the later entry for
-- -ēs will auto-infer .pl whenever -ēs is specified (which
-- won't work for adjectives like isoscelēs). Essentially,
-- for declension-3 adjectives, we require that .pl is given
-- if the lemma is plural.
{"ēs", "3-2", {}},
{"ēs", "3-2", {"pl"}},
{"ia", "3-2", {"pl"}},
}, decl3_stem2)
elseif typ == "3-C" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(.*[ij])or$", "3-C", {}},
{"^(min)or$", "3-C", {}},
{"^(.*[ij])ōrēs$", "3-C", {"pl"}},
{"^(min)ōrēs$", "3-C", {"pl"}},
}, decl3_stem2)
elseif typ == "irreg" then
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"^(duo)$", typ, {"pl"}},
{"^(ambō)$", typ, {"pl"}},
{"^(mīll?ia)$", typ, {"N", "pl"}, constant_base("mīlle")},
-- match ea
{"^(ea)$", typ, {}, constant_base("is")},
-- match id
{"^(id)$", typ, {}, constant_base("is")},
-- match plural eī, iī
{"^([ei]ī)$", typ, {"pl"}, constant_base("is")},
-- match plural ea, eae
{"^(eae?)$", typ, {"pl"}, constant_base("is")},
-- match eadem
{"^(eadem)$", typ, {}, constant_base("īdem")},
-- match īdem, idem
{"^([īi]dem)$", typ, {}, constant_base("īdem")},
-- match plural īdem
{"^(īdem)$", typ, {"pl"}},
-- match plural eadem, eaedem
{"^(eae?dem)$", typ, {"pl"}, constant_base("īdem")},
-- match illa, ipsa, ista; it doesn't matter if we overmatch because
-- we'll get an error as we use the stem itself in the returned base
{"^(i[lps][lst])a$", typ, {}, function(base, stem2) return base .. "e", nil end},
-- match illud, istud; as above, it doesn't matter if we overmatch
{"^(i[ls][lt])ud$", typ, {}, function(base, stem2) return base .. "e", nil end},
-- match ipsum
{"^(ipsum)$", typ, {}, constant_base("ipse")},
-- match plural illī, ipsī, istī; as above, it doesn't matter if we
-- overmatch
{"^(i[lps][lst])ī$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
-- match plural illa, illae, ipsa, ipsae, ista, istae; as above, it
-- doesn't matter if we overmatch
{"^(i[lps][lst])ae?$", typ, {"pl"}, function(base, stem2) return base .. "e", nil end},
-- Detect quī as non-plural unless .pl specified.
{"^(quī)$", typ, {}},
-- Otherwise detect quī as plural.
{"^(quī)$", typ, {"pl"}},
-- Same for quae.
{"^(quae)$", typ, {}, constant_base("quī")},
{"^(quae)$", typ, {"pl"}, constant_base("quī")},
{"^(quid)$", typ, {}, constant_base("quis")},
{"^(quod)$", typ, {}, constant_base("quī")},
{"^(qui[cd]quid)$", typ, {}, constant_base("quisquis")},
{"^(quīquī)$", typ, {"pl"}, constant_base("quisquis")},
{"^(quaequae)$", typ, {"pl"}, constant_base("quisquis")},
-- match all remaining lemmas in lemma form
{"", typ, {}},
})
else -- 3-3 or 3-P
return get_adj_type_and_subtype_by_ending(lemma, stem2, typ, subtypes, {
{"ēs", typ, {"pl"}, base_as_stem2},
{"ia", typ, {"pl"}, base_as_stem2},
{"", typ, {}},
}, decl3_stem2)
end
end
-- Parse a segment (e.g. "lūna<1>", "aegis/aegid<3.Greek>", "bōs<irreg.F>",
-- bonus<+>", or "[[vetus]]/veter<3+.-I>"), consisting of a lemma (or optionally
-- a lemma/stem) and declension+subtypes, where a + in the declension indicates
-- an adjective. Brackets can be present to indicate links, for use in
-- {{la-noun}} and {{la-adj}}. The return value is a table, e.g.:
-- {
-- decl = "1",
-- headword_decl = "1",
-- is_adj = false,
-- orig_lemma = "lūna",
-- lemma = "lūna",
-- stem2 = nil,
-- gender = "F",
-- types = {["F"] = true},
-- args = {"lūn"}
-- }
--
-- or
--
-- {
-- decl = "3",
-- headword_decl = "3",
-- is_adj = false,
-- orig_lemma = "aegis",
-- lemma = "aegis",
-- stem2 = "aegid",
-- gender = nil,
-- types = {["Greek"] = true},
-- args = {"aegis", "aegid"}
-- }
--
-- or
--
-- {
-- decl = "irreg",
-- headword_decl = "irreg/3",
-- is_adj = false,
-- orig_lemma = "bōs",
-- lemma = "bōs",
-- stem2 = nil,
-- gender = "F",
-- types = {["F"] = true},
-- args = {"bōs"}
-- }
-- or
--
-- {
-- decl = "1&2",
-- headword_decl = "1&2+",
-- is_adj = true,
-- orig_lemma = "bonus",
-- lemma = "bonus",
-- stem2 = nil,
-- gender = nil,
-- types = {},
-- args = {"bon"}
-- }
--
-- or
--
-- {
-- decl = "3-1",
-- headword_decl = "3-1+",
-- is_adj = true,
-- orig_lemma = "[[vetus]]",
-- lemma = "vetus",
-- stem2 = "veter",
-- gender = nil,
-- types = {},
-- args = {"vetus", "veter"}
-- }
local function parse_segment(segment)
local stem_part, spec_part = rmatch(segment, "^(.*)<(.-)>$")
local stems = rsplit(stem_part, "/", true)
local specs = rsplit(spec_part, ".", true)
local types = {}
local num = nil
local loc = false
local args = {}
local decl
for j, spec in ipairs(specs) do
if j == 1 then
decl = spec
else
local begins_with_hyphen
begins_with_hyphen, spec = rmatch(spec, "^(%-?)(.-)$")
spec = begins_with_hyphen .. spec:gsub("%-", "_")
types[spec] = true
end
end
local orig_lemma = stems[1]
if not orig_lemma or orig_lemma == "" then
orig_lemma = current_title.subpageText
end
local lemma = m_links.remove_links(orig_lemma)
local stem2 = stems[2]
if stem2 == "" then
stem2 = nil
end
if #stems > 2 then
error("Too many stems, at most 2 should be given: " .. stem_part)
end
local base, detected_subtypes
local is_adj = false
local gender = nil
if rfind(decl, "%+") then
decl = decl:gsub("%+", "")
base, stem2, decl, detected_subtypes = detect_adj_type_and_subtype(
lemma, stem2, decl, types
)
is_adj = true
headword_decl = irreg_adj_to_decl[lemma] and "irreg/" .. irreg_adj_to_decl[lemma] or decl .. "+"
for _, subtype in ipairs(detected_subtypes) do
if types["-" .. subtype] then
-- if a "cancel subtype" spec is given, remove the cancel spec
-- and don't apply the subtype
types["-" .. subtype] = nil
else
types[subtype] = true
end
end
else
base, stem2, detected_subtypes = detect_noun_subtype(lemma, stem2, decl, types)
headword_decl = irreg_noun_to_decl[lemma] and "irreg/" .. irreg_noun_to_decl[lemma] or decl
for _, subtype in ipairs(detected_subtypes) do
if types["-" .. subtype] then
-- if a "cancel subtype" spec is given, remove the cancel spec
-- and don't apply the subtype
types["-" .. subtype] = nil
elseif (subtype == "M" or subtype == "F" or subtype == "N") and
(types.M or types.F or types.N) then
-- if gender already specified, don't create conflicting gender spec
elseif (subtype == "sg" or subtype == "pl" or subtype == "both") and
(types.sg or types.pl or types.both) then
-- if number restriction already specified, don't create conflicting
-- number restriction spec
else
types[subtype] = true
end
end
if not types.pl and not types.both and rfind(lemma, "^[A-ZĀĒĪŌŪȲĂĔĬŎŬ]") then
types.sg = true
end
end
if types.loc then
loc = true
types.loc = nil
end
if types.M then
gender = "M"
elseif types.F then
gender = "F"
elseif types.N then
gender = "N"
end
if types.pl then
num = "pl"
types.pl = nil
elseif types.sg then
num = "sg"
types.sg = nil
end
args[1] = base
args[2] = stem2
return {
decl = decl,
headword_decl = headword_decl,
is_adj = is_adj,
gender = gender,
orig_lemma = orig_lemma,
lemma = lemma,
stem2 = stem2,
types = types,
num = num,
loc = loc,
args = args,
}
end
-- Parse a segment run (i.e. a string with zero or more segments [see
-- parse_segment] and optional surrounding text, e.g. "foenum<2>-graecum<2>"
-- or "[[pars]]/part<3.abl-e-occ-i> [[oratio|ōrātiōnis]]"). The segment run
-- currently cannot contain any alternants (e.g. "((epulum<2.sg>,epulae<1>))").
-- The return value is a table of the following form:
-- {
-- segments = PARSED_SEGMENTS (a list of parsed segments),
-- loc = LOC (a boolean indicating whether any of the individual segments
-- has a locative),
-- num = NUM (the first specified value for a number restriction, or nil if
-- no number restrictions),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- propses = PROPSES (list of per-word properties, where each element is an
-- object {
-- decl = DECL (declension),
-- headword_decl = HEADWORD_DECL (declension to be displayed in headword),
-- types = TYPES (set describing the subtypes of a given word),
-- }
-- }
-- Each element in PARSED_SEGMENTS is as returned by parse_segment() but will
-- have an additional .orig_prefix field indicating the text before the segment
-- (including bracketed links) and corresponding .prefix field indicating the text
-- with bracketed links resolved. If there is trailing text, the last element will
-- have only .orig_prefix and .prefix fields containing that trailing text.
local function parse_segment_run(segment_run)
local loc = nil
local num = nil
-- If the segment run begins with a hyphen, include the hyphen in the
-- set of allowed characters for a declined segment. This way, e.g. the
-- suffix [[-cen]] can be declared as {{la-ndecl|-cen/-cin<3>}} rather than
-- {{la-ndecl|-cen/cin<3>}}, which is less intuitive.
local is_suffix = rfind(segment_run, "^%-")
local segments = {}
local propses = {}
-- We want to not break up a bracketed link followed by <> even if it has a space or
-- hyphen in it. So we do an outer capturing split to find the bracketed links followed
-- by <>, then do inner capturing splits on all the remaining text to find the other
-- declined terms.
local bracketed_segments = m_string_utilities.capturing_split(segment_run, "(%[%[[^%[%]]-%]%]<.->)")
for i, bracketed_segment in ipairs(bracketed_segments) do
if i % 2 == 0 then
table.insert(segments, bracketed_segment)
else
for _, subsegment in ipairs(m_string_utilities.capturing_split(
bracketed_segment, is_suffix and "([^<> ,]+<.->)" or "([^<> ,%-]+<.->)"
)) do
table.insert(segments, subsegment)
end
end
end
local parsed_segments = {}
local gender = nil
for i = 2, (#segments - 1), 2 do
local parsed_segment = parse_segment(segments[i])
-- Overall locative is true if any segments call for locative.
loc = loc or parsed_segment.loc
-- The first specified value for num is used becomes the overall value.
num = num or parsed_segment.num
gender = gender or parsed_segment.gender
parsed_segment.orig_prefix = segments[i - 1]
parsed_segment.prefix = m_links.remove_links(segments[i - 1])
table.insert(parsed_segments, parsed_segment)
local props = {
decl = parsed_segment.decl,
headword_decl = parsed_segment.headword_decl,
types = parsed_segment.types,
}
table.insert(propses, props)
end
if segments[#segments] ~= "" then
table.insert(parsed_segments, {
orig_prefix = segments[#segments],
prefix = m_links.remove_links(segments[#segments]),
})
end
return {
segments = parsed_segments,
loc = loc,
num = num,
gender = gender,
propses = propses,
}
end
-- Parse an alternant, e.g. "((epulum<2.sg>,epulae<1>))",
-- "((Serapis<3>,Serapis/Serapid<3>))" or
-- "((rēs<5>pūblica<1>,rēspūblica<1>))". The return value is a table of the form
-- {
-- alternants = PARSED_ALTERNANTS (a list of segment runs, each of which is a
-- list of parsed segments as returned by parse_segment_run()),
-- loc = LOC (a boolean indicating whether any of the individual segment runs
-- has a locative),
-- num = NUM (the overall number restriction, one of "sg", "pl" or "both"),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- propses = PROPSES (list of lists of per-word properties),
-- }
local function parse_alternant(alternant)
local parsed_alternants = {}
local alternant_spec = rmatch(alternant, "^%(%((.*)%)%)$")
local alternants = rsplit(alternant_spec, ",")
local loc = false
local num = nil
local gender = nil
local propses = {}
for i, alternant in ipairs(alternants) do
local parsed_run = parse_segment_run(alternant)
table.insert(parsed_alternants, parsed_run)
loc = loc or parsed_run.loc
-- First time through, set the overall num to the num of the first run,
-- even if nil. After that, if we ever see a run with a different value
-- of num, set the overall num to "both". That way, if all alternants
-- don't specify a num, we get an unspecified num, but if some do and
-- some don't, we get both, because an unspecified num defaults to
-- both.
if i == 1 then
num = parsed_run.num
elseif num ~= parsed_run.num then
-- FIXME, this needs to be rethought to allow for
-- adjective alternants.
num = "both"
end
gender = gender or parsed_run.gender
table.insert(propses, parsed_run.propses)
end
return {
alternants = parsed_alternants,
loc = loc,
num = num,
gender = gender,
propses = propses,
}
end
-- Parse a segment run (see parse_segment_run()). Unlike for
-- parse_segment_run(), this can contain alternants such as
-- "((epulum<2.sg>,epulae<1>))" or "((Serapis<3.sg>,Serapis/Serapid<3.sg>))"
-- embedded in it to indicate words composed of multiple declensions.
-- The return value is a table of the following form:
-- {
-- segments = PARSED_SEGMENTS (a list of parsed segments),
-- loc = LOC (a boolean indicating whether any of the individual segments has
-- a locative),
-- num = NUM (the first specified value for a number restriction, or nil if
-- no number restrictions),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- propses = PROPSES (list of either per-word property objects or lists of
-- lists of such objects),
-- }.
-- Each element in PARSED_SEGMENTS is one of three types:
--
-- 1. A regular segment, as returned by parse_segment() but with additional
-- .prefix and .orig_prefix fields indicating the text before the segment, as per
-- the return value of parse_segment_run().
-- 2. A raw-text segment, i.e. a table with only .prefix and .orig_prefix fields
-- containing the raw text.
-- 3. An alternating segment, i.e. a table of the following form:
-- {
-- alternants = PARSED_SEGMENT_RUNS (a list of parsed segment runs),
-- loc = LOC (a boolean indicating whether the segment as a whole has a
-- locative),
-- num = NUM (the number restriction of the segment as a whole),
-- gender = GENDER (the first specified or inferred gender, or nil if none),
-- propses = PROPSES (list of lists of per-word property objects),
-- }
-- Note that each alternant is a segment run rather than a single parsed
-- segment to allow for alternants like "((rēs<5>pūblica<1>,rēspūblica<1>))".
-- The parsed segment runs in PARSED_SEGMENT_RUNS are tables as returned by
-- parse_segment_run() (of the same form as the overall return value of
-- parse_segment_run_allowing_alternants()).
local function parse_segment_run_allowing_alternants(segment_run)
if rfind(segment_run, " ") then
track("has-space")
end
if rfind(segment_run, "%(%(") then
track("has-alternant")
end
local alternating_segments = m_string_utilities.capturing_split(segment_run, "(%(%(.-%)%))")
local parsed_segments = {}
local loc = false
local num = nil
local gender = nil
local propses = {}
for i = 1, #alternating_segments do
local alternating_segment = alternating_segments[i]
if alternating_segment ~= "" then
if i % 2 == 1 then
local parsed_run = parse_segment_run(alternating_segment)
for _, parsed_segment in ipairs(parsed_run.segments) do
table.insert(parsed_segments, parsed_segment)
end
loc = loc or parsed_run.loc
num = num or parsed_run.num
gender = gender or parsed_run.gender
for _, props in ipairs(parsed_run.propses) do
table.insert(propses, props)
end
else
local parsed_alternating_segment = parse_alternant(alternating_segment)
table.insert(parsed_segments, parsed_alternating_segment)
loc = loc or parsed_alternating_segment.loc
num = num or parsed_alternating_segment.num
gender = gender or parsed_alternating_segment.gender
table.insert(propses, parsed_alternating_segment.propses)
end
end
end
if #parsed_segments > 1 then
track("multiple-segments")
end
return {
segments = parsed_segments,
loc = loc,
num = num,
gender = gender,
propses = propses,
}
end
-- Combine each form in FORMS (a list of forms associated with a slot) with each
-- form in NEW_FORMS (either a single string for a single form, or a list of
-- forms) by concatenating EXISTING_FORM .. PREFIX .. NEW_FORM. Also combine
-- NOTES (a table specifying the footnotes associated with each existing form,
-- i.e. a map from form indices to lists of footnotes) with NEW_NOTES (new
-- footnotes associated with the new forms, in the same format as NOTES). Return
-- a pair NEW_FORMS, NEW_NOTES where either or both of FORMS and NOTES (but not
-- the sublists in NOTES) may be destructively modified to generate the return
-- values.
local function append_form(forms, notes, new_forms, new_notes, prefix)
new_forms = new_forms or ""
notes = notes or {}
new_notes = new_notes or {}
prefix = prefix or ""
if type(new_forms) == "table" and #new_forms == 1 then
new_forms = new_forms[1]
end
if type(new_forms) == "string" then
-- If there's only one new form, destructively modify the existing
-- forms and notes for this new form and its footnotes.
for i = 1, #forms do
forms[i] = forms[i] .. prefix .. new_forms
if new_notes[1] then
if not notes[i] then
notes[i] = new_notes[1]
else
local combined_notes = ut.clone(notes[i])
for _, note in ipairs(new_notes[1]) do
table.insert(combined_notes, note)
end
notes[i] = combined_notes
end
end
end
return forms, notes
else
-- If there are multiple new forms, we need to loop over all
-- combinations of new and old forms. In that case, use new tables
-- for the combined forms and notes.
local ret_forms = {}
local ret_notes = {}
for i=1, #forms do
for j=1, #new_forms do
table.insert(ret_forms, forms[i] .. prefix .. new_forms[j])
if new_notes[j] then
if not notes[i] then
-- We are constructing a linearized matrix of size
-- NI x NJ where J is in the inner loop. If I and J
-- are zero-based, the linear index of (I, J) is
-- I * NJ + J. However, we are one-based, so the
-- same formula won't work. Instead, we effectively
-- need to convert to zero-based indices, compute
-- the zero-based linear index, and then convert it
-- back to a one-based index, i.e.
--
-- (I - 1) * NJ + (J - 1) + 1
--
-- i.e. (I - 1) * NJ + J.
ret_notes[(i - 1) * #new_forms + j] = new_notes[j]
else
local combined_notes = ut.clone(notes[i])
for _, note in ipairs(new_notes[j]) do
table.insert(combined_notes, note)
end
ret_notes[(i - 1) * #new_forms + j] = combined_notes
end
end
end
end
return ret_forms, ret_notes
end
end
-- Destructively modify any forms in FORMS (a map from a slot to a form or a
-- list of forms) by converting sequences of ae, oe, Ae or Oe to the
-- appropriate ligatures.
local function apply_ligatures(forms, is_adj)
for slot in iter_slots(is_adj) do
if type(forms[slot]) == "string" then
forms[slot] = forms[slot]:gsub("[AaOo]e", ligatures)
elseif type(forms[slot]) == "table" then
for i = 1, #forms[slot] do
forms[slot][i] = forms[slot][i]:gsub("[AaOo]e", ligatures)
end
end
end
end
-- Destructively modify any forms in FORMS (a map from a slot to a form or a
-- list of forms) by converting final m to n.
local function apply_sufn(forms, is_adj)
for slot in iter_slots(is_adj) do
if type(forms[slot]) == "string" then
forms[slot] = forms[slot]:gsub("m$", "n")
elseif type(forms[slot]) == "table" then
for i = 1, #forms[slot] do
forms[slot][i] = forms[slot][i]:gsub("m$", "n")
end
end
end
end
-- If NUM == "sg", copy the singular forms to the plural ones; vice-versa if
-- NUM == "pl". This should allow for the equivalent of plural
-- "alpha and omega" formed from two singular nouns, and for the equivalent of
-- plural "St. Vincent and the Grenadines" formed from a singular noun and a
-- plural noun. (These two examples actually occur in Russian, at least.)
local function propagate_number_restrictions(forms, num, is_adj)
if num == "sg" or num == "pl" then
for slot in iter_slots(is_adj) do
if rfind(slot, num) then
local other_num_slot = num == "sg" and slot:gsub("sg", "pl") or slot:gsub("pl", "sg")
forms[other_num_slot] = type(forms[slot]) == "table" and ut.clone(forms[slot]) or forms[slot]
end
end
end
end
local function join_sentences(sentences, joiner)
-- Lowercase the first letter of all but the first sentence, and remove the
-- final period from all but the last sentence. Then join together with the
-- joiner (e.g. " and " or " or ").
-- FIXME: Should we join three or more as e.g. "foo, bar and baz"?
local sentences_to_join = {}
for i, sentence in ipairs(sentences) do
if i < #sentences then
sentence = rsub(sentence, "%.$", "")
end
if i > 1 then
sentence = m_string_utilities.lcfirst(sentence)
end
table.insert(sentences_to_join, sentence)
end
return table.concat(sentences_to_join, joiner)
end
-- Construct the declension of a parsed segment run of the form returned by
-- parse_segment_run() or parse_segment_run_allowing_alternants(). Return value
-- is a table
-- {
-- forms = FORMS (keyed by slot, list of forms for that slot),
-- notes = NOTES (keyed by slot, map from form indices to lists of footnotes),
-- title = TITLE (list of titles for each segment in the run),
-- categories = CATEGORIES (combined categories for all segments),
-- voc = BOOLEAN (false if any adjective in the run has no vocative),
-- }
local function decline_segment_run(parsed_run, pos, is_adj)
local declensions = {
-- For each possible slot (e.g. "abl_sg"), list of possible forms.
forms = {},
-- Keyed by slot (e.g. "abl_sg"). Value is a table indicating the footnotes
-- corresponding to the forms for that slot. Each such table maps indices
-- (the index of the corresponding form) to a list of one or more
-- footnotes.
notes = {},
title = {},
subtitleses = {},
orig_titles = {},
categories = {},
-- FIXME, do we really need to special-case this? Maybe the nonexistent vocative
-- form will automatically propagate up through the other forms.
voc = true,
-- May be set true if declining a 1-1 adjective
noneut = false,
}
for slot in iter_slots(is_adj) do
declensions.forms[slot] = {""}
end
for _, seg in ipairs(parsed_run.segments) do
if seg.decl then -- not an alternant, not a constant segment
seg.loc = parsed_run.loc
seg.num = seg.num or parsed_run.num
seg.gender = seg.gender or parsed_run.gender
local data
local potential_lemma_slots
if seg.is_adj then
if not m_adj_decl[seg.decl] then
error("Unrecognized declension '" .. seg.decl .. "'")
end
potential_lemma_slots = potential_adj_lemma_slots
data = {
subtitles = {},
footnote = "",
num = seg.num or "",
gender = seg.gender,
voc = true,
noneut = false,
pos = is_adj and pos or "adjectives",
forms = {},
types = seg.types,
categories = {},
notes = {},
}
m_adj_decl[seg.decl](data, seg.args)
if not data.voc then
declensions.voc = false
end
if data.noneut then
declensions.noneut = true
end
-- Construct title out of "original title" and subtitles.
if data.types.sufn then
table.insert(data.subtitles, {"with", " ''m'' → ''n'' in compounds"})
elseif data.types.not_sufn then
table.insert(data.subtitles, {"without", " ''m'' → ''n'' in compounds"})
end
-- Record original title and subtitles for use in alternant title-constructing code.
table.insert(declensions.orig_titles, data.title)
if #data.subtitles > 0 then
local subtitles = {}
for _, subtitle in ipairs(data.subtitles) do
if type(subtitle) == "table" then
-- Occurs e.g. with ''idem'', ''quīdam''
table.insert(subtitles, table.concat(subtitle))
else
table.insert(subtitles, subtitle)
end
end
data.title = data.title .. " (" .. table.concat(subtitles, ", ") .. ")"
end
table.insert(declensions.subtitleses, data.subtitles)
else
if not m_noun_decl[seg.decl] then
error("Unrecognized declension '" .. seg.decl .. "'")
end
potential_lemma_slots = potential_noun_lemma_slots
data = {
subtitles = {},
footnote = "",
num = seg.num or "",
loc = seg.loc,
pos = pos,
forms = {},
types = seg.types,
categories = {},
notes = {},
}
m_noun_decl[seg.decl](data, seg.args)
-- Construct title out of "original title" and subtitles.
if not data.title then
local apparent_decl = rmatch(seg.headword_decl, "^irreg/(.*)$")
if apparent_decl then
if #data.subtitles == 0 then
table.insert(data.subtitles, glossary_link("irregular"))
end
else
apparent_decl = seg.headword_decl
end
if declension_to_english[apparent_decl] then
local english = declension_to_english[apparent_decl]
data.title = "[[Appendix:Latin " .. english .. " declension|" .. english .. "-declension]]"
elseif apparent_decl == "irreg" then
data.title = glossary_link("irregular")
elseif apparent_decl == "indecl" or apparent_decl == "0" then
data.title = glossary_link("indeclinable")
else
error("Internal error! Don't recognize noun declension " .. apparent_decl)
end
data.title = data.title .. " noun"
end
if data.types.sufn then
table.insert(data.subtitles, {"with", " ''m'' → ''n'' in compounds"})
elseif data.types.not_sufn then
table.insert(data.subtitles, {"without", " ''m'' → ''n'' in compounds"})
end
-- Record original title and subtitles for use in alternant title-constructing code.
table.insert(declensions.orig_titles, data.title)
if #data.subtitles > 0 then
local subtitles = {}
for _, subtitle in ipairs(data.subtitles) do
if type(subtitle) == "table" then
-- Occurs e.g. with 1st-declension ''-ābus'' ending where
-- we want a common prefix to be extracted out if possible
-- in the alternant title-generating code.
table.insert(subtitles, table.concat(subtitle))
else
table.insert(subtitles, subtitle)
end
end
data.title = data.title .. " (" .. table.concat(subtitles, ", ") .. ")"
end
table.insert(declensions.subtitleses, data.subtitles)
end
-- Generate linked variants of slots that may be the lemma.
-- If the form is the same as the lemma (with links removed),
-- substitute the original lemma (with links included).
for _, slot in ipairs(potential_lemma_slots) do
local forms = data.forms[slot]
if forms then
local linked_forms = {}
if type(forms) ~= "table" then
forms = {forms}
end
for _, form in ipairs(forms) do
if form == seg.lemma then
table.insert(linked_forms, seg.orig_lemma)
else
table.insert(linked_forms, form)
end
end
data.forms["linked_" .. slot] = linked_forms
end
end
if seg.types.lig then
apply_ligatures(data.forms, is_adj)
end
if seg.types.sufn then
apply_sufn(data.forms, is_adj)
end
propagate_number_restrictions(data.forms, seg.num, is_adj)
for slot in iter_slots(is_adj) do
-- 1. Select the forms to append to the existing ones.
local new_forms
if is_adj then
if not seg.is_adj then
error("Can't decline noun '" .. seg.lemma .. "' when overall term is an adjective")
end
new_forms = data.forms[slot]
if not new_forms and slot:find("_[fn]$") then
new_forms = data.forms[slot:gsub("_[fn]$", "_m")]
end
elseif seg.is_adj then
if not seg.gender then
error("Declining modifying adjective " .. seg.lemma .. " but don't know gender of associated noun")
end
-- Select the appropriately gendered equivalent of the case/number
-- combination. Some adjectives won't have feminine or neuter
-- variants, though (e.g. 3-1 and 3-2 adjectives don't have a
-- distinct feminine), so in that case select the masculine.
new_forms = data.forms[slot .. "_" .. mw.ustring.lower(seg.gender)]
or data.forms[slot .. "_m"]
else
new_forms = data.forms[slot]
end
-- 2. Extract the new footnotes in the format we require, which is
-- different from the format passed in by the declension functions.
local new_notes = {}
if type(new_forms) == "string" and data.notes[slot .. "1"] then
new_notes[1] = {data.notes[slot .. "1"]}
elseif new_forms then
for j = 1, #new_forms do
if data.notes[slot .. j] then
new_notes[j] = {data.notes[slot .. j]}
end
end
end
-- 3. Append new forms and footnotes to the existing ones.
declensions.forms[slot], declensions.notes[slot] = append_form(
declensions.forms[slot], declensions.notes[slot], new_forms,
new_notes, slot:find("linked") and seg.orig_prefix or seg.prefix)
end
if not seg.types.nocat then
for _, cat in ipairs(data.categories) do
ut.insert_if_not(declensions.categories, cat)
end
end
if seg.prefix ~= "" and seg.prefix ~= "-" and seg.prefix ~= " " then
table.insert(declensions.title, glossary_link("indeclinable") .. " portion")
end
table.insert(declensions.title, data.title)
elseif seg.alternants then
local seg_declensions = nil
local seg_titles = {}
local seg_subtitleses = {}
local seg_stems_seen = {}
local seg_categories = {}
-- If all alternants have exactly one non-constant segment and all are
-- of the same declension, we use special code that displays the
-- differences in the subtitles. Otherwise we use more general code
-- that displays the full title and subtitles of each segment,
-- separating segment combined titles by "and" and the segment-run
-- combined titles by "or".
local title_the_hard_way = false
local alternant_decl = nil
local alternant_decl_title = nil
for _, this_parsed_run in ipairs(seg.alternants) do
local num_non_constant_segments = 0
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
if not alternant_decl then
alternant_decl = segment.decl
elseif alternant_decl ~= segment.decl then
title_the_hard_way = true
num_non_constant_segments = 500
break
end
num_non_constant_segments = num_non_constant_segments + 1
end
end
if num_non_constant_segments ~= 1 then
title_the_hard_way = true
break
end
end
if not title_the_hard_way then
-- If using the special-purpose code, find the subtypes that are
-- not present in a given alternant but are present in at least
-- one other, and record "negative" variants of these subtypes
-- so that the declension-construction code can record subtitles
-- for these negative variants (so we can construct text like
-- "i-stem or imparisyllabic non-i-stem").
local subtypeses = {}
for _, this_parsed_run in ipairs(seg.alternants) do
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
table.insert(subtypeses, segment.types)
ut.insert_if_not(seg_stems_seen, segment.stem2)
end
end
end
local union = set_union(subtypeses)
for _, this_parsed_run in ipairs(seg.alternants) do
for _, segment in ipairs(this_parsed_run.segments) do
if segment.decl then
local neg_subtypes = set_difference(union, segment.types)
for neg_subtype, _ in pairs(neg_subtypes) do
segment.types["not_" .. neg_subtype] = true
end
end
end
end
end
for _, this_parsed_run in ipairs(seg.alternants) do
this_parsed_run.loc = seg.loc
this_parsed_run.num = this_parsed_run.num or seg.num
this_parsed_run.gender = this_parsed_run.gender or seg.gender
local this_declensions = decline_segment_run(this_parsed_run, pos, is_adj)
if not this_declensions.voc then
declensions.voc = false
end
if this_declensions.noneut then
declensions.noneut = true
end
-- If there's a number restriction on the segment run, blank
-- out the forms outside the restriction. This allows us to
-- e.g. construct heteroclites that decline one way in the
-- singular and a different way in the plural.
if this_parsed_run.num == "sg" or this_parsed_run.num == "pl" then
for slot in iter_slots(is_adj) do
if this_parsed_run.num == "sg" and rfind(slot, "pl") or
this_parsed_run.num == "pl" and rfind(slot, "sg") then
this_declensions.forms[slot] = {}
this_declensions.notes[slot] = nil
end
end
end
if not seg_declensions then
seg_declensions = this_declensions
else
for slot in iter_slots(is_adj) do
-- For a given slot, combine the existing and new forms.
-- We do this by checking to see whether a new form is
-- already present and not adding it if so; in the
-- process, we keep a map from indices in the new forms
-- to indices in the combined forms, for use in
-- combining footnotes below.
local curforms = seg_declensions.forms[slot] or {}
local newforms = this_declensions.forms[slot] or {}
local newform_index_to_new_index = {}
for newj, form in ipairs(newforms) do
local did_break = false
for j = 1, #curforms do
if curforms[j] == form then
newform_index_to_new_index[newj] = j
did_break = true
break
end
end
if not did_break then
table.insert(curforms, form)
newform_index_to_new_index[newj] = #curforms
end
end
seg_declensions.forms[slot] = curforms
-- Now combine the footnotes. Keep in mind that
-- each form may have its own set of footnotes, and
-- in some cases we didn't add a form from the new
-- list of forms because it already occurred in the
-- existing list of forms; in that case, we combine
-- footnotes from the two sources.
local curnotes = seg_declensions.notes[slot]
local newnotes = this_declensions.notes[slot]
if newnotes then
if not curnotes then
curnotes = {}
end
for index, notes in pairs(newnotes) do
local combined_index = newform_index_to_new_index[index]
if not curnotes[combined_index] then
curnotes[combined_index] = notes
else
local combined = mw.clone(curnotes[combined_index])
for _, note in ipairs(newnotes) do
ut.insert_if_not(combined, newnotes)
end
curnotes[combined_index] = combined
end
end
end
end
end
for _, cat in ipairs(this_declensions.categories) do
ut.insert_if_not(seg_categories, cat)
end
ut.insert_if_not(seg_titles, this_declensions.title)
for _, subtitles in ipairs(this_declensions.subtitleses) do
table.insert(seg_subtitleses, subtitles)
end
if not alternant_decl_title then
alternant_decl_title = this_declensions.orig_titles[1]
end
end
-- If overall run is singular, copy singular to plural, and
-- vice-versa. See propagate_number_restrictions() for rationale;
-- also, this should eliminate cases of empty forms, which will
-- cause the overall set of forms for that slot to be empty.
propagate_number_restrictions(seg_declensions.forms, parsed_run.num,
is_adj)
for slot in iter_slots(is_adj) do
declensions.forms[slot], declensions.notes[slot] = append_form(
declensions.forms[slot], declensions.notes[slot],
seg_declensions.forms[slot], seg_declensions.notes[slot], nil)
end
for _, cat in ipairs(seg_categories) do
ut.insert_if_not(declensions.categories, cat)
end
local title_to_insert
if title_the_hard_way then
title_to_insert = join_sentences(seg_titles, " or ")
else
-- Special-purpose title-generation code, for the common
-- situation where each alternant has single-segment runs and
-- all segments belong to the same declension.
--
-- 1. Find the initial subtitles common to all segments.
local first_subtitles = seg_subtitleses[1]
local num_common_subtitles = #first_subtitles
for i = 2, #seg_subtitleses do
local this_subtitles = seg_subtitleses[i]
for j = 1, num_common_subtitles do
if not ut.equals(first_subtitles[j], this_subtitles[j]) then
num_common_subtitles = j - 1
break
end
end
end
-- 2. Construct the portion of the text based on the common subtitles.
local common_subtitles = {}
for i = 1, num_common_subtitles do
if type(first_subtitles[i]) == "table" then
table.insert(common_subtitles, table.concat(first_subtitles[i]))
else
table.insert(common_subtitles, first_subtitles[i])
end
end
local common_subtitle_portion = table.concat(common_subtitles, ", ")
local non_common_subtitle_portion
-- 3. Special-case the situation where there's one non-common
-- subtitle in each segment and a common prefix or suffix to
-- all of them.
local common_prefix, common_suffix
for i = 1, #seg_subtitleses do
local this_subtitles = seg_subtitleses[i]
if #this_subtitles ~= num_common_subtitles + 1 or
type(this_subtitles[num_common_subtitles + 1]) ~= "table" or
#this_subtitles[num_common_subtitles + 1] ~= 2 then
break
end
if i == 1 then
common_prefix = this_subtitles[num_common_subtitles + 1][1]
common_suffix = this_subtitles[num_common_subtitles + 1][2]
else
local this_prefix = this_subtitles[num_common_subtitles + 1][1]
local this_suffix = this_subtitles[num_common_subtitles + 1][2]
if this_prefix ~= common_prefix then
common_prefix = nil
end
if this_suffix ~= common_suffix then
common_suffix = nil
end
if not common_prefix and not common_suffix then
break
end
end
end
if common_prefix or common_suffix then
if common_prefix and common_suffix then
error("Something is wrong, first non-common subtitle is actually common to all segments")
end
if common_prefix then
local non_common_parts = {}
for i = 1, #seg_subtitleses do
table.insert(non_common_parts, seg_subtitleses[i][num_common_subtitles + 1][2])
end
non_common_subtitle_portion = common_prefix .. table.concat(non_common_parts, " or ")
else
local non_common_parts = {}
for i = 1, #seg_subtitleses do
table.insert(non_common_parts, seg_subtitleses[i][num_common_subtitles + 1][1])
end
non_common_subtitle_portion = table.concat(non_common_parts, " or ") .. common_suffix
end
else
-- 4. Join the subtitles that differ from segment to segment.
-- Record whether there are any such differing subtitles.
-- If some segments have differing subtitles and others don't,
-- we use the text "otherwise" for the segments without
-- differing subtitles.
local saw_non_common_subtitles = false
local non_common_subtitles = {}
for i = 1, #seg_subtitleses do
local this_subtitles = seg_subtitleses[i]
local this_non_common_subtitles = {}
for j = num_common_subtitles + 1, #this_subtitles do
if type(this_subtitles[j]) == "table" then
table.insert(this_non_common_subtitles, table.concat(this_subtitles[j]))
else
table.insert(this_non_common_subtitles, this_subtitles[j])
end
end
if #this_non_common_subtitles > 0 then
table.insert(non_common_subtitles, table.concat(this_non_common_subtitles, ", "))
saw_non_common_subtitles = true
else
table.insert(non_common_subtitles, "otherwise")
end
end
non_common_subtitle_portion =
saw_non_common_subtitles and table.concat(non_common_subtitles, " or ") or ""
end
-- 5. Combine the common and non-common subtitle portions.
local subtitle_portions = {}
if common_subtitle_portion ~= "" then
table.insert(subtitle_portions, common_subtitle_portion)
end
if non_common_subtitle_portion ~= "" then
table.insert(subtitle_portions, non_common_subtitle_portion)
end
if #seg_stems_seen > 1 then
table.insert(subtitle_portions,
(number_to_english[#seg_stems_seen] or "" .. #seg_stems_seen) .. " different stems"
)
end
local subtitle_portion = table.concat(subtitle_portions, "; ")
if subtitle_portion ~= "" then
title_to_insert = alternant_decl_title .. " (" .. subtitle_portion .. ")"
else
title_to_insert = alternant_decl_title
end
end
-- Don't insert blank title (happens e.g. with "((ali))quis<irreg+>").
if title_to_insert ~= "" then
table.insert(declensions.title, title_to_insert)
end
else
for slot in iter_slots(is_adj) do
declensions.forms[slot], declensions.notes[slot] = append_form(
declensions.forms[slot], declensions.notes[slot],
slot:find("linked") and seg.orig_prefix or seg.prefix)
end
table.insert(declensions.title, glossary_link("indeclinable") .. " portion")
end
end
-- First title is uppercase, remainder have an indefinite article, joined
-- using "with".
local titles = {}
for i, title in ipairs(declensions.title) do
if i == 1 then
table.insert(titles, m_string_utilities.ucfirst(title))
else
table.insert(titles, m_string_utilities.add_indefinite_article(title))
end
end
declensions.title = table.concat(titles, " with ")
return declensions
end
local function construct_title(args_title, declensions_title, from_headword, parsed_run)
if args_title then
declensions_title = rsub(args_title, "<1>", "[[Appendix:Latin first declension|first declension]]")
declensions_title = rsub(declensions_title, "<1&2>", "[[Appendix:Latin first declension|first]]/[[Appendix:Latin second declension|second declension]]")
declensions_title = rsub(declensions_title, "<2>", "[[Appendix:Latin second declension|second declension]]")
declensions_title = rsub(declensions_title, "<3>", "[[Appendix:Latin third declension|third declension]]")
declensions_title = rsub(declensions_title, "<4>", "[[Appendix:Latin fourth declension|fourth declension]]")
declensions_title = rsub(declensions_title, "<5>", "[[Appendix:Latin fifth declension|fifth declension]]")
if from_headword then
declensions_title = m_string_utilities.lcfirst(rsub(declensions_title, "%.$", ""))
else
declensions_title = m_string_utilities.ucfirst(declensions_title)
end
else
local post_text_parts = {}
if parsed_run.loc then
table.insert(post_text_parts, ", with locative")
end
if parsed_run.num == "sg" then
table.insert(post_text_parts, ", singular only")
elseif parsed_run.num == "pl" then
table.insert(post_text_parts, ", plural only")
end
local post_text = table.concat(post_text_parts)
if from_headword then
declensions_title = m_string_utilities.lcfirst(declensions_title) .. post_text
else
declensions_title = m_string_utilities.ucfirst(declensions_title) .. post_text .. "."
end
end
return declensions_title
end
function export.do_generate_noun_forms(parent_args, pos, from_headword, def, support_num_type)
local params = {
[1] = {required = true, default = def or "aqua<1>"},
footnote = {},
title = {},
num = {},
}
for slot in iter_noun_slots() do
params[slot] = {}
end
if from_headword then
params.lemma = {list = true}
params.id = {}
params.pos = {default = pos}
params.indecl = {type = "boolean"}
params.m = {list = true}
params.f = {list = true}
params.g = {list = true}
end
if support_num_type then
params["type"] = {}
end
local args = m_para.process(parent_args, params)
if args.title then
track("overriding-title")
end
pos = args.pos or pos -- args.pos only set when from_headword
local parsed_run = parse_segment_run_allowing_alternants(args[1])
parsed_run.loc = parsed_run.loc or not not (args.loc_sg or args.loc_pl)
parsed_run.num = args.num or parsed_run.num
local declensions = decline_segment_run(parsed_run, pos, false)
if not parsed_run.loc then
declensions.forms.loc_sg = nil
declensions.forms.loc_pl = nil
end
declensions.title = construct_title(args.title, declensions.title, false, parsed_run)
local all_data = {
title = declensions.title,
footnote = args.footnote or "",
num = parsed_run.num or "",
gender = parsed_run.gender,
propses = parsed_run.propses,
forms = declensions.forms,
categories = declensions.categories,
notes = {},
user_specified = {},
accel = {},
overriding_lemma = args.lemma,
id = args.id,
pos = pos,
indecl = args.indecl,
m = args.m,
f = args.f,
overriding_genders = args.g,
num_type = args["type"],
}
for slot in iter_noun_slots() do
if declensions.notes[slot] then
for index, notes in pairs(declensions.notes[slot]) do
all_data.notes[slot .. index] = notes
end
end
end
process_noun_forms_and_overrides(all_data, args)
return all_data
end
function export.do_generate_adj_forms(parent_args, pos, from_headword, def, support_num_type)
local params = {
[1] = {required = true, default = def or "bonus"},
footnote = {},
title = {},
num = {},
noneut = {type = "boolean"},
}
for slot in iter_adj_slots() do
params[slot] = {}
end
if from_headword then
params.lemma = {list = true}
params.comp = {list = true}
params.sup = {list = true}
params.id = {}
params.pos = {default = pos}
params.indecl = {type = "boolean"}
end
if support_num_type then
params["type"] = {}
end
local args = m_para.process(parent_args, params)
if args.title then
track("overriding-title")
end
pos = args.pos or pos -- args.pos only set when from_headword
local segment_run = args[1]
if not rfind(segment_run, "[<(]") then
-- If the segment run doesn't have any explicit declension specs or alternants,
-- add a default declension spec of <+> to it (or <0+> for indeclinable
-- adjectives). This allows the majority of adjectives to just specify
-- the lemma.
segment_run = segment_run .. (args.indecl and "<0+>" or "<+>")
end
local parsed_run = parse_segment_run_allowing_alternants(segment_run)
parsed_run.loc = parsed_run.loc or not not (
args.loc_sg_m or args.loc_sg_f or args.loc_sg_n or args.loc_pl_m or args.loc_pl_f or args.loc_pl_n
)
parsed_run.num = args.num or parsed_run.num
local overriding_voc = not not (
args.voc_sg_m or args.voc_sg_f or args.voc_sg_n or args.voc_pl_m or args.voc_pl_f or args.voc_pl_n
)
local declensions = decline_segment_run(parsed_run, pos, true)
if not parsed_run.loc then
declensions.forms.loc_sg_m = nil
declensions.forms.loc_sg_f = nil
declensions.forms.loc_sg_n = nil
declensions.forms.loc_pl_m = nil
declensions.forms.loc_pl_f = nil
declensions.forms.loc_pl_n = nil
end
-- declensions.voc is false if any component has no vocative (e.g. quī); in
-- that case, if the user didn't supply any vocative overrides, wipe out
-- any partially-generated vocatives
if not overriding_voc and not declensions.voc then
declensions.forms.voc_sg_m = nil
declensions.forms.voc_sg_f = nil
declensions.forms.voc_sg_n = nil
declensions.forms.voc_pl_m = nil
declensions.forms.voc_pl_f = nil
declensions.forms.voc_pl_n = nil
end
declensions.title = construct_title(args.title, declensions.title, from_headword, parsed_run)
local all_data = {
title = declensions.title,
footnote = args.footnote or "",
num = parsed_run.num or "",
propses = parsed_run.propses,
forms = declensions.forms,
categories = declensions.categories,
notes = {},
user_specified = {},
accel = {},
voc = declensions.voc,
noneut = args.noneut or declensions.noneut,
overriding_lemma = args.lemma,
comp = args.comp,
sup = args.sup,
id = args.id,
pos = pos,
indecl = args.indecl,
num_type = args["type"],
}
for slot in iter_adj_slots() do
if declensions.notes[slot] then
for index, notes in pairs(declensions.notes[slot]) do
all_data.notes[slot .. index] = notes
end
end
end
process_adj_forms_and_overrides(all_data, args)
return all_data
end
function export.show_noun(frame)
local parent_args = frame:getParent().args
local data = export.do_generate_noun_forms(parent_args, "nouns")
show_forms(data, false)
return make_noun_table(data)
end
function export.show_adj(frame)
local parent_args = frame:getParent().args
local data = export.do_generate_adj_forms(parent_args, "adjectives")
partial_show_forms(data, true)
return m_adj_table.make_table(data, data.noneut)
end
function export.generate_noun_forms(frame)
local include_props = frame.args["include_props"]
local parent_args = frame:getParent().args
local data = export.do_generate_noun_forms(parent_args, "nouns")
return concat_forms(data, false, include_props)
end
function export.generate_adj_forms(frame)
local include_props = frame.args["include_props"]
local parent_args = frame:getParent().args
local data = export.do_generate_adj_forms(parent_args, "adjectives")
return concat_forms(data, true, include_props)
end
return export
-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: