Module:User:Theknightwho/bo-noun
Jump to navigation
Jump to search
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.
local export = {}
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_str_utils = require("Module:string utilities")
local lang = require("Module:languages").getByCode("bo")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")
local find = m_str_utils.find
local format = m_str_utils.format
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local u = m_str_utils.char
local da_drag = u(0xFFF0)
local output_noun_slots = {
absv_sg = "absv|s",
gen_sg = "gen|s",
agc_sg = "agc|s",
dat_sg = "dat|s",
loc_sg = "loc|s",
ter_sg = "ter|s",
abl_sg = "abl|s",
ela_sg = "ela|s",
ass_sg = "ass|s",
comc_sg = "comc|s",
absv_pl = "absv|p",
gen_pl = "gen|p",
agc_pl = "agc|p",
dat_pl = "dat|p",
loc_pl = "loc|p",
ter_pl = "ter|p",
abl_pl = "abl|p",
ela_pl = "ela|p",
ass_pl = "ass|p",
comc_pl = "comc|p",
}
local output_noun_slots_with_linked = m_table.shallowcopy(output_noun_slots)
output_noun_slots_with_linked["absv_sg_linked"] = "absv|s"
output_noun_slots_with_linked["absv_pl_linked"] = "absv|p"
local input_params_to_slots_both = {
[1] = "absv_sg",
[2] = "gen_sg",
[3] = "agc_sg",
[4] = "dat_sg",
[5] = "loc_sg",
[6] = "ter_sg",
[7] = "abl_sg",
[8] = "ela_sg",
[9] = "ass_sg",
[10] = "comc_sg",
[11] = "absv_pl",
[12] = "gen_pl",
[13] = "agc_pl",
[14] = "dat_pl",
[15] = "loc_pl",
[16] = "ter_pl",
[17] = "abl_pl",
[18] = "ela_pl",
[19] = "ass_pl",
[20] = "comc_pl",
}
local input_params_to_slots_sg = {
[1] = "absv_sg",
[2] = "gen_sg",
[3] = "agc_sg",
[4] = "dat_sg",
[5] = "loc_sg",
[6] = "ter_sg",
[7] = "abl_sg",
[8] = "ela_sg",
[9] = "ass_sg",
[10] = "comc_sg",
}
local input_params_to_slots_pl = {
[1] = "absv_pl",
[2] = "gen_pl",
[3] = "agc_pl",
[4] = "dat_pl",
[5] = "loc_pl",
[6] = "ter_pl",
[7] = "abl_pl",
[8] = "ela_pl",
[9] = "ass_pl",
[10] = "comc_pl",
}
local cases = {
absv = true,
gen = true,
agc = true,
dat = true,
loc = true,
ter = true,
abl = true,
ela = true,
ass = true,
comc = true,
}
local function skip_slot(number, slot)
return number == "sg" and find(slot, "_p$") or
number == "pl" and find(slot, "_s$")
end
local function add(data, slot, stem_and_ending, footnotes)
local stem
local ending
if not stem_and_ending then
return
end
if skip_slot(data.number, slot) then
return
end
if type(stem_and_ending) == "string" then
stem = stem_and_ending
ending = ""
else
stem = stem_and_ending[1]
ending = stem_and_ending[2]
end
iut.add_forms(data.forms, slot, stem, ending, function(stem, ending) return stem .. ending end, lang)
end
local function process_slot_overrides(data, do_slot)
for slot, overrides in pairs(data.overrides) do
if skip_slot(data.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. data.number .. "' number restriction")
end
if do_slot(slot) then
data.forms[slot] = nil
local slot_is_plural = find(slot, "_p$")
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(data.footnotes, value.footnotes)
end
end
end
end
end
local function plural(lemma, translit)
return lemma .. "་ཚོ"
end
local function kyi(lemma, translit)
local matches = {
{match(translit, "[dbs" .. da_drag .. "]$"),
"་ཀྱི"
},
{match(translit, "[nmrl]$"),
"་གྱི"
},
{match(translit, "n?g$"),
"་གི"
},
{match(translit, "[aāiīuūṛṝḷḹeo']$"),
"འི"
},
}
for i, suffix in ipairs(matches) do
if suffix[1] then
return lemma .. suffix[2]
end
end
end
local function genitive(lemma, translit)
return kyi(lemma, translit)
end
local function agentive(lemma, translit)
lemma = kyi(lemma, translit)
if match(lemma, "འི$") then
return gsub(lemma, "འི$", "ས")
else
return lemma .. "ས"
end
end
local function dative(lemma, translit)
return lemma .. "་ལ"
end
local function locative(lemma, translit)
return lemma .. "་ན"
end
local function terminative(lemma, translit)
local matches = {
{{
match(translit, "[^n]g$"),
match(translit, "[b" .. da_drag .. "]$")
},
"་ཏུ"
},
{{
match(translit, "ng$"),
match(translit, "[dnmrl]$")
},
"་དུ"
},
{{match(translit, "s$")},
"་སུ"
},
{{match(translit, "[aāiīuūṛṝḷḹeo']$")},
"ར"
},
}
for i, suffix in ipairs(matches) do
for j, test in pairs(suffix[1]) do
if test then
return lemma .. suffix[2]
end
end
end
end
local function ablative(lemma, translit)
return lemma .. "་ལས"
end
local function elative(lemma, translit)
return lemma .. "་ནས"
end
local function associative(lemma, translit)
return lemma .. "་དང"
end
local function comparative(lemma, translit)
local matches = {
{match(translit, "[^gdnbms" .. da_drag .. "]$"),
"་བས"
},
{match(translit, "[gdnbms" .. da_drag .. "]$"),
"་པས"
},
}
for i, suffix in ipairs(matches) do
if suffix[1] then
return lemma .. suffix[2]
end
end
end
local function handle_derived_slots_and_overrides(data)
translit_sg = lang:transliterate(data.lemma):gsub("%s*$", "")
if data.da_drag then translit_sg = translit_sg .. da_drag end
add(data, "absv_sg", data.lemma)
add(data, "gen_sg", genitive(data.lemma, translit_sg))
add(data, "agc_sg", agentive(data.lemma, translit_sg))
add(data, "dat_sg", dative(data.lemma, translit_sg))
add(data, "loc_sg", locative(data.lemma, translit_sg))
add(data, "ter_sg", terminative(data.lemma, translit_sg))
add(data, "abl_sg", ablative(data.lemma, translit_sg))
add(data, "ela_sg", elative(data.lemma, translit_sg))
add(data, "ass_sg", associative(data.lemma, translit_sg))
add(data, "comc_sg", comparative(data.lemma, translit_sg))
add(data, "absv_pl", plural(data.lemma, translit_sg))
translit_pl = lang:transliterate(data.forms["absv_pl"][1].form):gsub("%s*$", "")
add(data, "gen_pl", genitive(data.forms["absv_pl"][1].form, translit_pl))
add(data, "agc_pl", agentive(data.forms["absv_pl"][1].form, translit_pl))
add(data, "dat_pl", dative(data.forms["absv_pl"][1].form, translit_pl))
add(data, "loc_pl", locative(data.forms["absv_pl"][1].form, translit_pl))
add(data, "ter_pl", terminative(data.forms["absv_pl"][1].form, translit_pl))
add(data, "abl_pl", ablative(data.forms["absv_pl"][1].form, translit_pl))
add(data, "ela_pl", elative(data.forms["absv_pl"][1].form, translit_pl))
add(data, "comc_pl", comparative(data.forms["absv_pl"][1].form, translit_pl))
-- Compute linked versions of potential lemma slots, for use in {{bo-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs({ "absv_sg", "absv_pl" }) do
iut.insert_forms(data.forms, slot .. "_linked", iut.map_forms(data.forms[slot], function(form)
if form == data.orig_lemma_no_links and find(data.orig_lemma, "%[%[") then
return data.orig_lemma
else
return form
end
end))
end
end
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
local function parse_override(segments, case)
local retval = { values = {} }
local part = segments[1]
if cases[case] then
-- ok
elseif accented_cases[case] then
case = accented_cases[case]
retval.stemstressed = true
else
error("Internal error: unrecognized case in override: '" .. table.concat(segments) .. "'")
end
local rest = sub(part, len(case)+1, len(case)+3)
local slot
if find(rest, "^pl") then
rest = gsub(rest, "^pl", "")
slot = case .. "_pl"
else
slot = case .. "_sl"
end
if find(rest, "^:") then
retval.full = true
rest = gsub(rest, "^:", "")
end
segments[1] = rest
local colon_separated_groups = put.split_alternating_runs(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error("Use - to indicate an empty ending for slot '" .. slot .. "': '" .. table.concat(segments .. "'"))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slot, retval
end
local function parse_indicator_spec(angle_bracket_spec)
local inside = match(angle_bracket_spec, "^<(.*)>$")
local data = { overrides = {}, forms = {} }
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = put.split_alternating_runs(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix
for case,_ in pairs(cases) do
if match(part, "^" .. case .. "[:$]") then
case_prefix = match(part, "^" .. case)
local slot, override = parse_override(dot_separated_group, case_prefix)
if data.overrides[slot] then
table.insert(data.overrides[slot], override)
else
data.overrides[slot] = { override }
end
end
end
if case_prefix ~= nil then
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
data.footnotes = fetch_footnotes(dot_separated_group)
elseif part == "both" or part == "pl" then
if data.number then
error("Can't specify number twice: '" .. inside .. "'")
end
data.number = part
elseif part == "da_drag" then
if data.da_drag then
error( "Can't specify da drag twice: '" .. inside .. "'" )
end
data.da_drag = true
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return data
end
local function set_defaults_and_check_bad_indicators(data)
-- Set default values.
if not data.adj then
if data.proper then
data.number = data.number or "sg"
else
data.number = data.number or "sg"
end
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(data)
set_defaults_and_check_bad_indicators(data)
data.multiword = is_multiword
end)
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = not word_specs[i].adj
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local propval1 = alternant_multiword_spec[property] or default_propval
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = alternant_or_word_spec[property] or propval1
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = multiword_spec[property] or propval2
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = word_spec[property] or propval3
if propval4 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
word_spec[property] = propval4
end
end
else
if propval2 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
alternant_or_word_spec[property] = propval2
end
end
end
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function normalize_all_lemmas(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(data)
data.orig_lemma = data.lemma
data.orig_lemma_no_links = m_links.remove_links(data.lemma)
data.lemma = data.orig_lemma_no_links
end)
end
local function compute_categories_and_annotation(alternant_multiword_spec)
local cats = {}
local function insert(cattype)
m_table.insertIfNot(cats, "Tibetan " .. cattype)
end
if alternant_multiword_spec.pos == "noun" then
if alternant_multiword_spec.number == "sg" then
insert("uncountable nouns")
elseif alternant_multiword_spec.number == "pl" then
insert("pluralia tantum")
end
end
local annotation
if alternant_multiword_spec.manual then
alternant_multiword_spec.annotation =
alternant_multiword_spec.number == "sg" and "sg-only" or
alternant_multiword_spec.number == "pl" and "pl-only" or
""
else
local annparts = {}
local bor = nil
local decl = {}
local irregs = {}
local stems = {}
local reducible = nil
local vh = {}
local function do_word_spec(data)
if data.da_drag == true then
table.insert(annparts, "da drag stem")
else
table.insert(annparts, "regular stem")
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.number == "both" then
table.insert(annparts, "with def pl")
elseif alternant_multiword_spec.number == "pl" then
table.insert(annparts, "pl-only")
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stems > 1 then
insert("nouns with multiple stems")
end
end
alternant_multiword_spec.categories = cats
end
local function combine_stem_ending(stem, ending)
return stem .. ending
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
if alternant_multiword_spec.forms.absv_sg then
for _, absv_sg in ipairs(alternant_multiword_spec.forms.absv_sg) do
table.insert(lemmas, absv_sg.form)
end
elseif alternant_multiword_spec.forms.absv_pl then
for _, absv_pl in ipairs(alternant_multiword_spec.forms.absv_pl) do
table.insert(lemmas, absv_pl.form)
end
end
local props = {
lemmas = lemmas,
slot_table = output_noun_slots_with_linked,
lang = lang,
canonicalize = function(form) return form end,
include_translit = true,
footnotes = alternant_multiword_spec.footnotes,
allow_footnote_symbols = not not alternant_multiword_spec.footnotes,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function header(min_width)
min_width = min_width or "70"
return gsub([===[
<div class="NavFrame" style="display:inline-block;min-width:MINWIDTHem">
<div class="NavHead" >{title}{annotation} </div>
<div class="NavContent">
{\op}| style="text-align:center;min-width:MINWIDTHem;width:100%" class="inflection-table"
|-
]===], "MINWIDTH", min_width)
end
local function template_footer()
return [===[|-
|{\cl}{notes_clause}</div></div>]===]
end
local table_spec_both = header("45") .. [===[
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:17.5em" | singular / indefinite
! style="background:#d9ebff;width:17.5em" | definite plural
|-
! | absolutive
| {absv_sg}
| {absv_pl}
|-
! | genitive
| {gen_sg}
| {gen_pl}
|-
! | agentive
| {agc_sg}
| {agc_pl}
|-
! | dative
| {dat_sg}
| {dat_pl}
|-
! | locative
| {loc_sg}
| {loc_pl}
|-
! | terminative
| {ter_sg}
| {ter_pl}
|-
! | ablative
| {abl_sg}
| {abl_pl}
|-
! | elative
| {ela_sg}
| {ela_pl}
|-
! | associative
| {ass_sg}
| {ass_pl}
|-
! | comparative
| {comc_sg}
| {comc_pl}
]===] .. template_footer()
local function table_spec_one(num, number)
return gsub(gsub(header("30") .. [===[
! | absolutive
| {absv_NUM}
|-
! | genitive
| {gen_NUM}
|-
! | agentive
| {agc_NUM}
|-
! | dative
| {dat_NUM}
|-
! | locative
| {loc_NUM}
|-
! | terminative
| {ter_NUM}
|-
! | ablative
| {abl_NUM}
|-
! | elative
| {ela_NUM}
|-
! | associative
| {ass_NUM}
|-
! | comparative
| {comc_NUM}
]===] .. template_footer(), "NUMBER", number), "NUM", num)
end
local notes_template = [===[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = "Declension of <i lang=\"bo\" class=\"Tibt\">" .. forms.lemma .. "</i>"
end
local annotation = alternant_multiword_spec.annotation or ""
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-weight:normal;font-size:small\">" .. annotation .. "</span>)"
end
local table_spec =
alternant_multiword_spec.number == "both" and table_spec_both or
alternant_multiword_spec.number == "pl" and table_spec_one("pl", "definite plural") or
alternant_multiword_spec.number == "sg" and alternant_multiword_spec.pos == "proper noun" and table_spec_one("sg", "singular") or
table_spec_one("sg", "indefinite")
forms.notes_clause = forms.footnote ~= "" and
format(notes_template, forms) or ""
return format(table_spec, forms)
end
function export.do_generate_forms(parent_args, pos, from_headword, def)
local params = {
[1] = { required = true, default = "སངས་རྒྱས" },
footnote = { list = true },
title = {},
pos = { default = "noun" }
}
local args, parse_props = m_para.process(parent_args, params), {}
if not match(args[1], "<.*>") then args[1] = args[1] .. "<>" end
parse_props = {parse_indicator_spec = parse_indicator_spec}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.pos = pos or args.pos
alternant_multiword_spec.footnotes = args.footnote
alternant_multiword_spec.args = args
normalize_all_lemmas(alternant_multiword_spec)
detect_all_indicator_specs(alternant_multiword_spec)
propagate_properties(alternant_multiword_spec, "number", "sg", "both")
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.number, slot)
end,
slot_table = output_noun_slots_with_linked,
get_variants = get_variants,
inflect_word_spec = handle_derived_slots_and_overrides,
lang = lang
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
return alternant_multiword_spec
end
function export.show(frame)
local parent_args = frame:getParent().args or frame.args
local pos = "noun"
local alternant_multiword_spec = export.do_generate_forms(parent_args, pos)
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang)
end
return export