Module:User:Theknightwho/bo-noun

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local export = {}
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_str_utils = require("Module:string utilities")
local lang = require("Module:languages").getByCode("bo")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")

local find = m_str_utils.find
local format = m_str_utils.format
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local u = m_str_utils.char

local da_drag = u(0xFFF0)

local output_noun_slots = {
	absv_sg = "absv|s",
	gen_sg = "gen|s",
	agc_sg = "agc|s",
	dat_sg = "dat|s",
	loc_sg = "loc|s",
	ter_sg = "ter|s",
	abl_sg = "abl|s",
	ela_sg = "ela|s",
	ass_sg = "ass|s",
	comc_sg = "comc|s",
	absv_pl = "absv|p",
	gen_pl = "gen|p",
	agc_pl = "agc|p",
	dat_pl = "dat|p",
	loc_pl = "loc|p",
	ter_pl = "ter|p",
	abl_pl = "abl|p",
	ela_pl = "ela|p",
	ass_pl = "ass|p",
	comc_pl = "comc|p",
}

local output_noun_slots_with_linked = m_table.shallowcopy(output_noun_slots)
output_noun_slots_with_linked["absv_sg_linked"] = "absv|s"
output_noun_slots_with_linked["absv_pl_linked"] = "absv|p"

local input_params_to_slots_both = {
	[1] = "absv_sg",
	[2] = "gen_sg",
	[3] = "agc_sg",
	[4] = "dat_sg",
	[5] = "loc_sg",
	[6] = "ter_sg",
	[7] = "abl_sg",
	[8] = "ela_sg",
	[9] = "ass_sg",
	[10] = "comc_sg",
	[11] = "absv_pl",
	[12] = "gen_pl",
	[13] = "agc_pl",
	[14] = "dat_pl",
	[15] = "loc_pl",
	[16] = "ter_pl",
	[17] = "abl_pl",
	[18] = "ela_pl",
	[19] = "ass_pl",
	[20] = "comc_pl",
}

local input_params_to_slots_sg = {
	[1] = "absv_sg",
	[2] = "gen_sg",
	[3] = "agc_sg",
	[4] = "dat_sg",
	[5] = "loc_sg",
	[6] = "ter_sg",
	[7] = "abl_sg",
	[8] = "ela_sg",
	[9] = "ass_sg",
	[10] = "comc_sg",
}

local input_params_to_slots_pl = {
	[1] = "absv_pl",
	[2] = "gen_pl",
	[3] = "agc_pl",
	[4] = "dat_pl",
	[5] = "loc_pl",
	[6] = "ter_pl",
	[7] = "abl_pl",
	[8] = "ela_pl",
	[9] = "ass_pl",
	[10] = "comc_pl",
}

local cases = {
	absv = true,
	gen = true,
	agc = true,
	dat = true,
	loc = true,
	ter = true,
	abl = true,
	ela = true,
	ass = true,
	comc = true,
}

local function skip_slot(number, slot)
	return number == "sg" and find(slot, "_p$") or
		number == "pl" and find(slot, "_s$")
end

local function add(data, slot, stem_and_ending, footnotes)
	local stem
	local ending
	if not stem_and_ending then
		return
	end
	if skip_slot(data.number, slot) then
		return
	end
	if type(stem_and_ending) == "string" then
		stem = stem_and_ending
		ending = ""
	else
		stem = stem_and_ending[1]
		ending = stem_and_ending[2]
	end
	iut.add_forms(data.forms, slot, stem, ending, function(stem, ending) return stem .. ending end, lang)
end

local function process_slot_overrides(data, do_slot)
	for slot, overrides in pairs(data.overrides) do
		if skip_slot(data.number, slot) then
			error("Override specified for invalid slot '" .. slot .. "' due to '" .. data.number .. "' number restriction")
		end
		if do_slot(slot) then
			data.forms[slot] = nil
			local slot_is_plural = find(slot, "_p$")
			for _, override in ipairs(overrides) do
				for _, value in ipairs(override.values) do
					local form = value.form
					local combined_notes = iut.combine_footnotes(data.footnotes, value.footnotes)
				end
			end
		end
	end
end

local function plural(lemma, translit)
	return lemma .. "་ཚོ"
end

local function kyi(lemma, translit)
	local matches = {
		{match(translit, "[dbs" .. da_drag .. "]$"),
			"་ཀྱི"
		},
		{match(translit, "[nmrl]$"),
			"་གྱི"
		},
		{match(translit, "n?g$"),
			"་གི"
		},
		{match(translit, "[aāiīuūṛṝḷḹeo']$"),
			"འི"
		},
	}
	
	for i, suffix in ipairs(matches) do
		if suffix[1] then
			return lemma .. suffix[2]
		end
	end
end

local function genitive(lemma, translit)
	return kyi(lemma, translit)
end

local function agentive(lemma, translit)
	lemma = kyi(lemma, translit)
	if match(lemma, "འི$") then
		return gsub(lemma, "འི$", "ས")
	else
		return lemma .. "ས"
	end
end

local function dative(lemma, translit)
	return lemma .. "་ལ"
end

local function locative(lemma, translit)
	return lemma .. "་ན"
end

local function terminative(lemma, translit)
	local matches = {
		{{
			match(translit, "[^n]g$"),
			match(translit, "[b" .. da_drag .. "]$")
			},
			"་ཏུ"
		},
		{{
			match(translit, "ng$"),
			match(translit, "[dnmrl]$")
			},
			"་དུ"
		},
		{{match(translit, "s$")},
			"་སུ"
		},
		{{match(translit, "[aāiīuūṛṝḷḹeo']$")},
			"ར"
		},
	}
	
	for i, suffix in ipairs(matches) do
		for j, test in pairs(suffix[1]) do
			if test then
				return lemma .. suffix[2]
			end
		end
	end
end

local function ablative(lemma, translit)
	return lemma .. "་ལས"
end

local function elative(lemma, translit)
	return lemma .. "་ནས"
end

local function associative(lemma, translit)
	return lemma .. "་དང"
end

local function comparative(lemma, translit)
	local matches = {
		{match(translit, "[^gdnbms" .. da_drag .. "]$"),
			"་བས"
		},
		{match(translit, "[gdnbms" .. da_drag .. "]$"),
			"་པས"
		},
	}
	
	for i, suffix in ipairs(matches) do
		if suffix[1] then
			return lemma .. suffix[2]
		end
	end
end

local function handle_derived_slots_and_overrides(data)
	
	translit_sg = lang:transliterate(data.lemma):gsub("%s*$", "")
	if data.da_drag then translit_sg = translit_sg .. da_drag end
	
	add(data, "absv_sg", data.lemma)
	add(data, "gen_sg", genitive(data.lemma, translit_sg))
	add(data, "agc_sg", agentive(data.lemma, translit_sg))
	add(data, "dat_sg", dative(data.lemma, translit_sg))
	add(data, "loc_sg", locative(data.lemma, translit_sg))
	add(data, "ter_sg", terminative(data.lemma, translit_sg))
	add(data, "abl_sg", ablative(data.lemma, translit_sg))
	add(data, "ela_sg", elative(data.lemma, translit_sg))
	add(data, "ass_sg", associative(data.lemma, translit_sg))
	add(data, "comc_sg", comparative(data.lemma, translit_sg))
	add(data, "absv_pl", plural(data.lemma, translit_sg))
	
	translit_pl = lang:transliterate(data.forms["absv_pl"][1].form):gsub("%s*$", "")
	
	add(data, "gen_pl", genitive(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "agc_pl", agentive(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "dat_pl", dative(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "loc_pl", locative(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "ter_pl", terminative(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "abl_pl", ablative(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "ela_pl", elative(data.forms["absv_pl"][1].form, translit_pl))
	add(data, "comc_pl", comparative(data.forms["absv_pl"][1].form, translit_pl))
	
	-- Compute linked versions of potential lemma slots, for use in {{bo-noun}}.
	-- We substitute the original lemma (before removing links) for forms that
	-- are the same as the lemma, if the original lemma has links.
	for _, slot in ipairs({ "absv_sg", "absv_pl" }) do
		iut.insert_forms(data.forms, slot .. "_linked", iut.map_forms(data.forms[slot], function(form)
			if form == data.orig_lemma_no_links and find(data.orig_lemma, "%[%[") then
				return data.orig_lemma
			else
				return form
			end
		end))
	end
end

local function fetch_footnotes(separated_group)
	local footnotes
	for j = 2, #separated_group - 1, 2 do
		if separated_group[j + 1] ~= "" then
			error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
		end
		if not footnotes then
			footnotes = {}
		end
		table.insert(footnotes, separated_group[j])
	end
	return footnotes
end

local function parse_override(segments, case)
	local retval = { values = {} }
	local part = segments[1]
	if cases[case] then
		-- ok
	elseif accented_cases[case] then
		case = accented_cases[case]
		retval.stemstressed = true
	else
		error("Internal error: unrecognized case in override: '" .. table.concat(segments) .. "'")
	end
	local rest = sub(part, len(case)+1, len(case)+3)
	local slot
	if find(rest, "^pl") then
		rest = gsub(rest, "^pl", "")
		slot = case .. "_pl"
	else
		slot = case .. "_sl"
	end
	if find(rest, "^:") then
		retval.full = true
		rest = gsub(rest, "^:", "")
	end
	segments[1] = rest
	local colon_separated_groups = put.split_alternating_runs(segments, ":")
	for i, colon_separated_group in ipairs(colon_separated_groups) do
		local value = {}
		local form = colon_separated_group[1]
		if form == "" then
			error("Use - to indicate an empty ending for slot '" .. slot .. "': '" .. table.concat(segments .. "'"))
		elseif form == "-" then
			value.form = ""
		else
			value.form = form
		end
		value.footnotes = fetch_footnotes(colon_separated_group)
		table.insert(retval.values, value)
	end
	return slot, retval
end

local function parse_indicator_spec(angle_bracket_spec)
	local inside = match(angle_bracket_spec, "^<(.*)>$")
	local data = { overrides = {}, forms = {} }
	if inside ~= "" then
		local segments = put.parse_balanced_segment_run(inside, "[", "]")
		local dot_separated_groups = put.split_alternating_runs(segments, "%.")
		for i, dot_separated_group in ipairs(dot_separated_groups) do
			local part = dot_separated_group[1]
			local case_prefix
			for case,_ in pairs(cases) do
				if match(part, "^" .. case .. "[:$]") then
					case_prefix = match(part, "^" .. case)
					local slot, override = parse_override(dot_separated_group, case_prefix)
					if data.overrides[slot] then
						table.insert(data.overrides[slot], override)
					else
						data.overrides[slot] = { override }
					end
				end
			end
			if case_prefix ~= nil then
			elseif part == "" then
				if #dot_separated_group == 1 then
					error("Blank indicator: '" .. inside .. "'")
				end
				data.footnotes = fetch_footnotes(dot_separated_group)
			elseif part == "both" or part == "pl" then
				if data.number then
					error("Can't specify number twice: '" .. inside .. "'")
				end
				data.number = part
			elseif part == "da_drag" then
				if data.da_drag then
					error( "Can't specify da drag twice: '" .. inside .. "'" )
				end
				data.da_drag = true
			else
				error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
			end
		end
	end
	return data
end

local function set_defaults_and_check_bad_indicators(data)
	-- Set default values.
	if not data.adj then
		if data.proper then
			data.number = data.number or "sg"
		else
			data.number = data.number or "sg"
		end
	end
end

local function detect_all_indicator_specs(alternant_multiword_spec)
	local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
	iut.map_word_specs(alternant_multiword_spec, function(data)
		set_defaults_and_check_bad_indicators(data)
		data.multiword = is_multiword
	end)
end

local propagate_multiword_properties

local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
	local seen_property
	for _, multiword_spec in ipairs(alternant_spec.alternants) do
		propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
		if seen_property == nil then
			seen_property = multiword_spec[property]
		elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
			seen_property = mixed_value
		end
	end
	alternant_spec[property] = seen_property
end

propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
	local seen_property = nil
	local last_seen_nounal_pos = 0
	local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
	for i = 1, #word_specs do
		local is_nounal
		if word_specs[i].alternants then
			propagate_alternant_properties(word_specs[i], property, mixed_value)
			is_nounal = not not word_specs[i][property]
		elseif nouns_only then
			is_nounal = not word_specs[i].adj
		else
			is_nounal = not not word_specs[i][property]
		end
		if is_nounal then
			if not word_specs[i][property] then
				error("Internal error: noun-type word spec without " .. property .. " set")
			end
			for j = last_seen_nounal_pos + 1, i - 1 do
				word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
			end
			last_seen_nounal_pos = i
			if seen_property == nil then
				seen_property = word_specs[i][property]
			elseif seen_property ~= word_specs[i][property] then
				seen_property = mixed_value
			end
		end
	end
	if last_seen_nounal_pos > 0 then
		for i = last_seen_nounal_pos + 1, #word_specs do
			word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
		end
	end
	multiword_spec[property] = seen_property
end


local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
	local propval1 = alternant_multiword_spec[property] or default_propval
	for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
		local propval2 = alternant_or_word_spec[property] or propval1
		if alternant_or_word_spec.alternants then
			for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
				local propval3 = multiword_spec[property] or propval2
				for _, word_spec in ipairs(multiword_spec.word_specs) do
					local propval4 = word_spec[property] or propval3
					if propval4 == "mixed" then
						error("Attempt to assign mixed " .. property .. " to word")
					end
					word_spec[property] = propval4
				end
			end
		else
			if propval2 == "mixed" then
				error("Attempt to assign mixed " .. property .. " to word")
			end
			alternant_or_word_spec[property] = propval2
		end
	end
end

local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
	propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
	propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
	propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end

local function normalize_all_lemmas(alternant_multiword_spec)
	iut.map_word_specs(alternant_multiword_spec, function(data)
		data.orig_lemma = data.lemma
		data.orig_lemma_no_links = m_links.remove_links(data.lemma)
		data.lemma = data.orig_lemma_no_links
	end)
end

local function compute_categories_and_annotation(alternant_multiword_spec)
	local cats = {}
	local function insert(cattype)
		m_table.insertIfNot(cats, "Tibetan " .. cattype)
	end
	if alternant_multiword_spec.pos == "noun" then
		if alternant_multiword_spec.number == "sg" then
			insert("uncountable nouns")
		elseif alternant_multiword_spec.number == "pl" then
			insert("pluralia tantum")
		end
	end
	local annotation
	if alternant_multiword_spec.manual then
		alternant_multiword_spec.annotation =
			alternant_multiword_spec.number == "sg" and "sg-only" or
			alternant_multiword_spec.number == "pl" and "pl-only" or
			""
	else
		local annparts = {}
		local bor = nil
		local decl = {}
		local irregs = {}
		local stems = {}
		local reducible = nil
		local vh = {}
		local function do_word_spec(data)
			if data.da_drag == true then
				table.insert(annparts, "da drag stem")
			else
				table.insert(annparts, "regular stem")
			end
		end
		local key_entry = alternant_multiword_spec.first_noun or 1
		if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
			local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
			if alternant_or_word_spec.alternants then
				for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
					key_entry = multiword_spec.first_noun or 1
					if #multiword_spec.word_specs >= key_entry then
						do_word_spec(multiword_spec.word_specs[key_entry])
					end
				end
			else
				do_word_spec(alternant_or_word_spec)
			end
		end
		if alternant_multiword_spec.number == "both" then
			table.insert(annparts, "with def pl")
		elseif alternant_multiword_spec.number == "pl" then
			table.insert(annparts, "pl-only")
		end
		if #irregs > 0 then
			table.insert(annparts, table.concat(irregs, " // "))
		end
		alternant_multiword_spec.annotation = table.concat(annparts, " ")
		if #stems > 1 then
			insert("nouns with multiple stems")
		end
	end
	alternant_multiword_spec.categories = cats
end

local function combine_stem_ending(stem, ending)
	return stem .. ending
end

local function show_forms(alternant_multiword_spec)
	local lemmas = {}
	if alternant_multiword_spec.forms.absv_sg then
		for _, absv_sg in ipairs(alternant_multiword_spec.forms.absv_sg) do
			table.insert(lemmas, absv_sg.form)
		end
	elseif alternant_multiword_spec.forms.absv_pl then
		for _, absv_pl in ipairs(alternant_multiword_spec.forms.absv_pl) do
			table.insert(lemmas, absv_pl.form)
		end
	end
	local props = {
		lemmas = lemmas,
		slot_table = output_noun_slots_with_linked,
		lang = lang,
		canonicalize = function(form) return form end,
		include_translit = true,
		footnotes = alternant_multiword_spec.footnotes,
		allow_footnote_symbols = not not alternant_multiword_spec.footnotes,
	}
	iut.show_forms(alternant_multiword_spec.forms, props)
end

local function make_table(alternant_multiword_spec)

	local forms = alternant_multiword_spec.forms
	
	local function header(min_width)
		min_width = min_width or "70"
		return gsub([===[
<div class="NavFrame" style="display:inline-block;min-width:MINWIDTHem">
<div class="NavHead" >{title}{annotation}&nbsp;</div>
<div class="NavContent">
{\op}| style="text-align:center;min-width:MINWIDTHem;width:100%" class="inflection-table"
|-
]===], "MINWIDTH", min_width)
	end
	
	local function template_footer()
		return [===[|-
	|{\cl}{notes_clause}</div></div>]===]
	end
	
	local table_spec_both = header("45") .. [===[
! style="background:#d9ebff;width:10em" |
! style="background:#d9ebff;width:17.5em" | singular / indefinite
! style="background:#d9ebff;width:17.5em" | definite plural
|-
!  | absolutive
| {absv_sg}
| {absv_pl}
|-
!  | genitive
| {gen_sg}
| {gen_pl}
|-
!  | agentive
| {agc_sg}
| {agc_pl}
|-
!  | dative
| {dat_sg}
| {dat_pl}
|-
!  | locative
| {loc_sg}
| {loc_pl}
|-
!  | terminative
| {ter_sg}
| {ter_pl}
|-
!  | ablative
| {abl_sg}
| {abl_pl}
|-
!  | elative
| {ela_sg}
| {ela_pl}
|-
!  | associative
| {ass_sg}
| {ass_pl}
|-
!  | comparative
| {comc_sg}
| {comc_pl}
]===] .. template_footer()
	
	local function table_spec_one(num, number)
		return gsub(gsub(header("30") .. [===[
!  | absolutive
| {absv_NUM}
|-
!  | genitive
| {gen_NUM}
|-
!  | agentive
| {agc_NUM}
|-
!  | dative
| {dat_NUM}
|-
!  | locative
| {loc_NUM}
|-
!  | terminative
| {ter_NUM}
|-
!  | ablative
| {abl_NUM}
|-
!  | elative
| {ela_NUM}
|-
!  | associative
| {ass_NUM}
|-
!  | comparative
| {comc_NUM}
]===] .. template_footer(), "NUMBER", number), "NUM", num)
	end
	
	local notes_template = [===[
<div style="width:100%;text-align:left;background:#d9ebff">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]
	
	if alternant_multiword_spec.title then
		forms.title = alternant_multiword_spec.title
	else
		forms.title = "Declension of <i lang=\"bo\" class=\"Tibt\">" .. forms.lemma .. "</i>"
	end
	
	local annotation = alternant_multiword_spec.annotation or ""
	if annotation == "" then
		forms.annotation = ""
	else
		forms.annotation = " (<span style=\"font-weight:normal;font-size:small\">" .. annotation .. "</span>)"
	end
	
	local table_spec =
		alternant_multiword_spec.number == "both" and table_spec_both or
		alternant_multiword_spec.number == "pl" and table_spec_one("pl", "definite plural") or
		alternant_multiword_spec.number == "sg" and alternant_multiword_spec.pos == "proper noun" and table_spec_one("sg", "singular") or
		table_spec_one("sg", "indefinite")
	forms.notes_clause = forms.footnote ~= "" and
		format(notes_template, forms) or ""
	return format(table_spec, forms)
end

function export.do_generate_forms(parent_args, pos, from_headword, def)

	local params = {
		[1] = { required = true, default = "སངས་རྒྱས" },
		footnote = { list = true },
		title = {},
		pos = { default = "noun" }
	}
	local args, parse_props = m_para.process(parent_args, params), {}
	if not match(args[1], "<.*>") then args[1] = args[1] .. "<>" end
	parse_props = {parse_indicator_spec = parse_indicator_spec}
	
	local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
	alternant_multiword_spec.title = args.title
	alternant_multiword_spec.pos = pos or args.pos
	alternant_multiword_spec.footnotes = args.footnote
	alternant_multiword_spec.args = args
	normalize_all_lemmas(alternant_multiword_spec)
	detect_all_indicator_specs(alternant_multiword_spec)
	propagate_properties(alternant_multiword_spec, "number", "sg", "both")
	local inflect_props = {
		skip_slot = function(slot)
			return skip_slot(alternant_multiword_spec.number, slot)
		end,
		slot_table = output_noun_slots_with_linked,
		get_variants = get_variants,
		inflect_word_spec = handle_derived_slots_and_overrides,
		lang = lang
	}
	iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
	compute_categories_and_annotation(alternant_multiword_spec)
	return alternant_multiword_spec
end

function export.show(frame)
	local parent_args = frame:getParent().args or frame.args
	local pos = "noun"
	local alternant_multiword_spec = export.do_generate_forms(parent_args, pos)
	show_forms(alternant_multiword_spec)
	return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang)
end

return export