Jump to content

Module:User:Benwing2/gmq-headword

From Wiktionary, the free dictionary


local export = {}
local pos_functions = {}

--[=[
Author: Benwing2

This module is eventually intended for all the headword templates of all North Germanic languages, especially the ones
that still maintain complex inflection systems (Icelandic, Faroese, Old Norse, Old Swedish, Elfdalian, etc.), but could
definitely be extended to support other North Germanic languages (Swedish, Danish, Norwegian *). Currently it only
supports Icelandic.
]=]

local force_cat = false -- for testing; if true, categories appear in non-mainspace pages

local rfind = mw.ustring.find

local require_when_needed = require("Module:utilities/require when needed")
local m_table = require("Module:table")
local headword_utilities_module = "Module:headword utilities"
local m_headword_utilities = require_when_needed(headword_utilities_module)
local glossary_link = require_when_needed(headword_utilities_module, "glossary_link")
local inflection_utilities_module = "Module:inflection utilities"
local m_inflection_utilities = require_when_needed(inflection_utilities_module)
local is_adjective_module = "Module:User:Benwing2/is-adjective"
local is_common_module = "Module:User:Benwing2/is-common"
local is_noun_module = "Module:User:Benwing2/is-noun"
local dump = mw.dumpObject

local list_param = {list = true, disallow_holes = true}

-- Table of all valid genders by language, mapping user-specified gender specs to canonicalized versions.
local valid_number_suffixes = {"", "-p"}
local valid_genders = {"m", "f", "n"}
local valid_gender_specs = {}

for _, gender in ipairs(valid_genders) do
	for _, number in ipairs(valid_number_suffixes) do
		local spec = gender .. number
		valid_gender_specs[spec] = spec
	end
end

local function track(track_id)
	require("Module:debug/track")("gmq-headword/" .. track_id)
	return true
end

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	local iparams = {
		[1] = {required = true},
		["lang"] = {required = true},
		["def"] = {},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)
	local args = frame:getParent().args
	local poscat = iargs[1]
	local langcode = iargs.lang
	if langcode ~= "is" then
		error("This module currently only works for lang=is")
	end
	local lang = require("Module:languages").getByCode(langcode, true)
	local langname = lang:getCanonicalName()
	local def = iargs.def

	local parargs = frame:getParent().args

	local params = {
		["head"] = list_param,
		["id"] = {},
		["sort"] = {},
		["nolinkhead"] = {type = "boolean"},
		["json"] = {type = "boolean"},
		["pagename"] = {}, -- for testing
	}

	if pos_functions[poscat] then
		local posparams = pos_functions[poscat].params
		if type(posparams) == "function" then
			posparams = posparams(lang)
		end
		for key, val in pairs(posparams) do
			params[key] = val
		end
	end

    local args = require("Module:parameters").process(parargs, params)

	local pagename = args.pagename or mw.title.getCurrentTitle().text

	local data = {
		lang = lang,
		langname = langname,
		pos_category = poscat,
		categories = {},
		heads = args.head,
		genders = {},
		inflections = {},
		pagename = pagename,
		id = args.id,
		sort_key = args.sort,
		force_cat_output = force_cat,
		def = def,
		is_suffix = false,
	}

	if pagename:find("^%-") and poscat ~= "suffix forms" then
		data.is_suffix = true
		data.pos_category = "suffixes"
		local singular_poscat = require("Module:string utilities").singularize(poscat)
		table.insert(data.categories, langname .. " " .. singular_poscat .. "-forming suffixes")
		table.insert(data.inflections, {label = singular_poscat .. "-forming suffix"})
	end

	local multiple_data = nil
	if pos_functions[poscat] then
		multiple_data = pos_functions[poscat].func(args, data)
	end

	-- mw.ustring.toNFD performs decomposition, so letters that decompose
	-- to an ASCII vowel and a diacritic, such as é, are counted as vowels and
	-- do not need to be included in the pattern.
	if not pagename:find("[ %-]") and not rfind(mw.ustring.lower(mw.ustring.toNFD(pagename)), "[aeiouyæœø]") then
		local cat = langname .. " words without vowels"
		if multiple_data then
			table.insert(multiple_data[1].categories, cat)
		else
			table.insert(data.categories, cat)
		end
	end

	if multiple_data then
		if args.json then
			return require("Module:JSON").toJSON(multiple_data)
		end
		return pos_functions[poscat].process_multiple_headword_data(multiple_data)
	else
		if args.json then
			return require("Module:JSON").toJSON(data)
		end
		return require("Module:headword").full_headword(data)
	end
end

-- Parse user-specified arguments in 1=, 2=, etc. for @@ scraping specs. If found, look up the appropriate
-- inflection(s). Returns two values. If there is no scraping error, the first value is a list of declension
-- specs (either directly specified by the user or scraped if an @@ spec was given) and the second value is
-- nil. Otherwise, the first value is nil and the second value is a scraping-error data structure, which the
-- calling function should directly return upstream.
local function parse_noun_adj_scraping_specs(data, declspecs, infl_template, allow_empty_infl)
	if not declspecs[2] and declspecs[1]:find("^@@") then
		local declid
		if declspecs[1] ~= "@@" then
			declid = declspecs[1]:match("^@@%s*:%s*(.+)$")
			if not declid then
				error(("Syntax error in self-scraping spec '%s'"):format(declspecs[1]))
			end
		end
		local decls = require(is_common_module).find_inflection(data.pagename, "is-ndecl", false, false, declid)
		if type(decls) == "string" then
			data.alternant_multiword_spec = {scrape_errors = {decls}}
			return nil, {data}
		end
		for i, declobj in ipairs(decls) do
			decls[i] = declobj.decl
		end
		declspecs = decls
	end
	return declspecs, nil
end

local function do_noun_adj_form(this_data, alternant_multiword_spec, slot, label, label_for_not_present, accel_form)
	local forms = alternant_multiword_spec.forms[slot]
	local retval
	if not forms then
		if not label_for_not_present then
			return
		end
		retval = {label = "no " .. label_for_not_present}
	else
		retval = {label = label, accel = accel_form and {form = accel_form} or nil}
		local prev_footnotes
		for _, form in ipairs(forms) do
			local footnotes = form.footnotes
			if footnotes and prev_footnotes and m_table.deepEquals(footnotes, prev_footnotes) then
				footnotes = nil
			end
			prev_footnotes = form.footnotes
			local quals, refs
			if footnotes then
				quals, refs = m_inflection_utilities.fetch_headword_qualifiers_and_references(footnotes)
			end
			local term = form.form
			table.insert(retval, {term = term, q = quals, refs = refs})
		end
	end

	table.insert(this_data.inflections, retval)
end

local function insert_noun_adj_linked_lemma_if_needed(this_data, alternant_multiword_spec, infl_module_name)
	-- Use the "linked" form of the lemma as the head if no head= explicitly given.
	if #this_data.heads == 0 then
		this_data.heads = {}
		local lemmas = require(infl_module_name).get_lemmas(alternant_multiword_spec, "linked variant")
		for _, lemma_obj in ipairs(lemmas) do
			local head = lemma_obj.form
			--local head = alternant_multiword_spec.args.nolinkhead and lemma_obj.form or
			--	m_headword_utilities.add_lemma_links(lemma_obj.form, alternant_multiword_spec.args.splithyph)
			local quals, refs
			if lemma_obj.footnotes then
				quals, refs = m_inflection_utilities.fetch_headword_qualifiers_and_references(lemma_obj.footnotes)
			end
			table.insert(this_data.heads, {term = head, q = quals, refs = refs})
		end
	end
end

local function get_process_noun_adj_multiple_data(infl_template)
	return function(multiple_data)
		local parts = {}
		local function ins(txt)
			table.insert(parts, txt)
		end
		for i, data in ipairs(multiple_data) do
			local alternant_multiword_spec = data.alternant_multiword_spec
			if alternant_multiword_spec.header then
				ins(("'''%s:'''<br />"):format(alternant_multiword_spec.header))
			elseif alternant_multiword_spec.q then
				ins((" ''%s''<br />"):format(alternant_multiword_spec.q))
			elseif i > 1 then
				ins(" ''or''<br />")
			end
			if alternant_multiword_spec.scrape_errors then
				local errmsgs = {}
				for _, scrape_error in ipairs(alternant_multiword_spec.scrape_errors) do
					table.insert(errmsgs, '<span style="font-weight: bold; color: #CC2200;">' .. scrape_error .. "</span>")
				end
				ins(table.concat(errmsgs, "<br />"))
				ins(require("Module:utilities").format_categories(
					{("Icelandic scraping errors in Template:%s"):format(infl_template)}, data.lang, nil, nil,
					force_cat))
			else
				ins(require("Module:headword").full_headword(data))
			end
		end
		return table.concat(parts)
	end
end

local function get_manual_noun_params(lang, is_proper)
	return {
		[1] = {alias_of = "g"},
		["g"] = list_param,
		["g_qual"] = {list = "g\1_qual", allow_holes = true},
		["indecl"] = {type = "boolean"},
		["m"] = list_param,
		["f"] = list_param,
		["dim"] = list_param,
		["aug"] = list_param,
		["pej"] = list_param,
		["dem"] = list_param,
		["fdem"] = list_param,
		["gen"] = list_param,
		["pl"] = list_param,
	}
end

local function do_manual_nouns(is_proper, args, data)
	local specs = valid_gender_specs[data.lang:getCode()]
	for i, g in ipairs(args.g) do
		local canon_g = specs[g]
		if canon_g then
			g = canon_g
		else
			error("Unrecognized gender: '" .. g .. "'")
		end
		track("gender-" .. g)
		if args.g_qual[i] then
			table.insert(data.genders, {spec = g, qualifiers = {args.g_qual[i]}})
		else
			table.insert(data.genders, g)
		end
	end
	if #data.genders == 0 then
		table.insert(data.genders, "?")
	end
	if args.indecl then
		table.insert(data.inflections, {label = glossary_link("indeclinable")})
		table.insert(data.categories, data.langname .. " indeclinable nouns")
	end

	-- Parse and insert an inflection not requiring additional processing into `data.inflections`. The raw arguments
	-- come from `args[field]`, which is parsed for inline modifiers. If there is a corresponding qualifier field
	-- `FIELD_qual`, qualifiers may additionally come from there. `label` is the label that the inflections are given,
	-- which is linked to the glossary if surrounded by <<..>> (which is removed). `plpos` is the plural part of speech,
	-- used in [[Category:LANGNAME PLPOS with red links in their headword lines]]. `accel` is the accelerator form, or
	-- nil.
	local function handle_infl(field, label, frob)
		m_headword_utilities.parse_and_insert_inflection {
			headdata = data,
			forms = args[field],
			paramname = field,
			label = label,
			frob = frob,
		}
	end

	handle_infl("gen", "genitive singular")
	handle_infl("pl", "nominative plural")
	handle_infl("m", "male equivalent")
	handle_infl("f", "female equivalent")
	handle_infl("dim", "<<diminutive>>")
	handle_infl("aug", "<<augmentative>>")
	handle_infl("pej", "<<pejorative>>")
	handle_infl("dem", "<<demonym>>")
	handle_infl("fdem", "female <<demonym>>")
end

local function get_auto_noun_params(lang, is_proper)
	return {
        [1] = {required = true, list = true, default = "akur<m.#>"},
        ["pos"] = {},
		["m"] = list_param,
		["f"] = list_param,
		["dim"] = list_param,
		["aug"] = list_param,
		["pej"] = list_param,
		["dem"] = list_param,
		["fdem"] = list_param,
	}
end

local function do_auto_nouns(is_proper, args, data)
	if data.lang:getCode() ~= "is" then
		error("Internal error: Only Icelandic supported at the moment")
	end
	if args.pos then
		data.pos_category = args.pos
	end
	local m_is_noun = require(is_noun_module)
	local alternant_multiword_specs = {}
	local multiple_data = {}
	local all_genders = {}

	local declspecs, scrape_error_data = parse_noun_adj_scraping_specs(data, args[1], "is-ndecl", false)
	if scrape_error_data then
		return scrape_error_data
	end
	for i, declspec in ipairs(declspecs) do
		local alternant_multiword_spec =
			m_is_noun.do_generate_forms(args, declspec, is_proper and "is-proper noun" or "is-noun")
		alternant_multiword_specs[i] = alternant_multiword_spec
		local this_data
		if i < #declspecs then
			this_data = m_table.shallowCopy(data)
			this_data.categories = {}
			this_data.inflections = {}
			-- genders gets overwritten just below
		else
			-- for the last (or usually only) spec, save memory by not cloning
			this_data = data
		end
		multiple_data[i] = this_data
		this_data.alternant_multiword_spec = alternant_multiword_spec
		this_data.heads = args.head
		if not alternant_multiword_spec.scrape_errors then
			this_data.genders = alternant_multiword_spec.genders
			for _, gender in ipairs(this_data.genders) do
				m_table.insertIfNot(all_genders, gender)
			end

			local function do_noun_form(slot, label, label_for_not_present, accel_form)
				do_noun_adj_form(this_data, alternant_multiword_spec, slot, label, label_for_not_present, accel_form)
			end

			local function insert_label(label)
				table.insert(this_data.inflections, {label = label})
			end

			if is_proper then
				insert_label(glossary_link("proper noun"))
			end
			if not alternant_multiword_spec.first_noun and alternant_multiword_spec.first_adj then
				insert_label("adjectival")
			end
			local def_prefix
			if alternant_multiword_spec.definiteness == "def" then
				def_prefix = "def_"
				insert_label(glossary_link("definite") .. " only")
			else
				def_prefix = "ind_"
			end
			if alternant_multiword_spec.number == "pl" then
				insert_label(glossary_link("plural only"))
			end
			if alternant_multiword_spec.saw_indecl and not alternant_multiword_spec.saw_non_indecl then
				insert_label(glossary_link("indeclinable"))
			elseif alternant_multiword_spec.saw_unknown_decl and not alternant_multiword_spec.saw_non_unknown_decl then
				insert_label("unknown declension")
			elseif alternant_multiword_spec.number == "pl" then
				do_noun_form(def_prefix .. "gen_p", "genitive plural", "genitive plural")
			else
				do_noun_form(def_prefix .. "gen_s", "genitive singular", "genitive singular")
				do_noun_form(def_prefix .. "nom_p", "nominative plural", not is_proper and "plural" or nil)
			end

			if i == 1 then
				-- Parse and insert an inflection not requiring additional processing into `this_data.inflections`. The raw
				-- arguments come from `args[field]`, which is parsed for inline modifiers. If there is a corresponding
				-- qualifier field `FIELD_qual`, qualifiers may additionally come from there. `label` is the label that the
				-- inflections are given, which is linked to the glossary if surrounded by <<..>> (which is removed).
				-- `plpos` is the plural part of speech, used in [[Category:LANGNAME PLPOS with red links in their headword
				-- lines]]. `accel` is the accelerator form, or nil.
				local function handle_infl(field, label, frob)
					m_headword_utilities.parse_and_insert_inflection {
						headdata = this_data,
						forms = args[field],
						paramname = field,
						label = label,
						frob = frob,
					}
				end

				handle_infl("m", "male equivalent")
				handle_infl("f", "female equivalent")
				handle_infl("dim", "<<diminutive>>")
				handle_infl("aug", "<<augmentative>>")
				handle_infl("pej", "<<pejorative>>")
				handle_infl("dem", "<<demonym>>")
				handle_infl("fdem", "female <<demonym>>")
			end

			-- Add categories.
			for _, cat in ipairs(alternant_multiword_spec.categories) do
				table.insert(this_data.categories, cat)
			end

			insert_noun_adj_linked_lemma_if_needed(this_data, alternant_multiword_spec, is_noun_module)
		end
	end

	if #all_genders > 1 then
		table.insert(multiple_data[1].categories, data.langname .. " nouns with multiple genders")
	end

	return multiple_data
end

local function get_noun_pos_functions(is_proper)
	return {
		params = function(lang)
			if lang:getCode() == "is" then
				return get_auto_noun_params(lang, is_proper)
			else
				return get_manual_noun_params(lang, is_proper)
			end
		end,
		func = function(args, data)
			if data.lang:getCode() == "is" then
				return do_auto_nouns(is_proper, args, data)
			else
				return do_manual_nouns(is_proper, args, data)
			end
		end,
		process_multiple_headword_data = get_process_noun_adj_multiple_data("is-ndecl"),
	}
end

pos_functions["nouns"] = get_noun_pos_functions(false)
pos_functions["proper nouns"] = get_noun_pos_functions("proper noun")

local function do_comparative_superlative(args, data, plpos)
	if args[1][1] == "-" then
		table.insert(data.inflections, {label = "not " .. glossary_link("comparable")})
		table.insert(data.categories, data.langname .. " uncomparable " .. plpos)
	elseif args[1][1] or args[2][1] then
		local comp = m_headword_utilities.parse_term_list_with_modifiers {
			paramname = {1, "comp"},
			forms = args[1],
		}
		local sup = m_headword_utilities.parse_term_list_with_modifiers {
			paramname = {2, "sup"},
			forms = args[2],
		}
		if comp[1] then
			comp.label = glossary_link("comparative")
			-- comp.accel = {form = "comparative"}
			table.insert(data.inflections, comp)
		end
		if sup[1] then
			sup.label = glossary_link("superlative")
			-- sup.accel = {form = "superlative"}
			table.insert(data.inflections, sup)
		end
		table.insert(data.categories, data.langname .. " comparable " .. plpos)
	end
end

local function get_manual_adjective_params(lang)
	return {
		[1] = {list = "comp", disallow_holes = true},
		[2] = {list = "sup", disallow_holes = true},
		["adv"] = list_param,
		["indecl"] = {type = "boolean"},
	}
end

local function do_manual_adjectives(args, data)
	if args.indecl then
		table.insert(data.inflections, {label = glossary_link("indeclinable")})
		table.insert(data.categories, data.langname .. " indeclinable adjectives")
	end
	do_comparative_superlative(args, data, "adjectives")
	m_headword_utilities.parse_and_insert_inflection {
		headdata = data,
		forms = args.adv,
		paramname = "adv",
		label = "adverb",
	}
end

local function get_auto_adjective_params(lang)
	return {
        [1] = {required = true, list = true, default = "djúpur<comp:^>"},
        ["pos"] = {},
		["adv"] = list_param,
	}
end

local function do_auto_adjectives(args, data)
	if data.lang:getCode() ~= "is" then
		error("Internal error: Only Icelandic supported at the moment")
	end
	if args.pos then
		data.pos_category = args.pos
	end
	local m_is_adjective = require(is_adjective_module)
	local m_is_common = require(is_common_module)
	local alternant_multiword_specs = {}
	local multiple_data = {}
	local declspecs, scrape_error_data = parse_noun_adj_scraping_specs(data, args[1], "is-adecl", "allow empty infl")
	if scrape_error_data then
		return scrape_error_data
	end
	for i, declspec in ipairs(declspecs) do
		local alternant_multiword_spec = m_is_adjective.do_generate_forms(args, declspec, "is-adj")
		alternant_multiword_specs[i] = alternant_multiword_spec
		local this_data
		if i < #declspecs then
			this_data = m_table.shallowCopy(data)
			this_data.categories = {}
			this_data.inflections = {}
		else
			-- for the last (or usually only) spec, save memory by not cloning
			this_data = data
		end
		multiple_data[i] = this_data
		this_data.alternant_multiword_spec = alternant_multiword_spec
		this_data.heads = args.head
		if not alternant_multiword_spec.scrape_errors then
			local function do_adjective_form(slot, label, label_for_not_present, accel_form)
				do_noun_adj_form(this_data, alternant_multiword_spec, slot, label, label_for_not_present, accel_form)
			end

			local function insert_label(label)
				table.insert(this_data.inflections, {label = label})
			end

			if alternant_multiword_spec.saw_indecl and not alternant_multiword_spec.saw_non_indecl then
				insert_label(glossary_link("indeclinable"))
			elseif alternant_multiword_spec.saw_unknown_decl and not alternant_multiword_spec.saw_non_unknown_decl then
				insert_label("unknown declension")
			end

			local function do_superlative()
				do_adjective_form("sup_str_nom_m", glossary_link("superlative"), nil, "superlative")
			end

			if alternant_multiword_spec.haspos == "has" then
				if alternant_multiword_spec.number.pos == "sg" then
					insert_label(glossary_link("singular only"))
				elseif alternant_multiword_spec.number.pos == "pl" then
					insert_label(glossary_link("plural only"))
				end
				if alternant_multiword_spec.state.pos == "strong" or alternant_multiword_spec.state.pos == "weak" then
					-- not "both" or "none"
					insert_label(alternant_multiword_spec.state.pos .. " only")
				end
				if alternant_multiword_spec.hascomp == "has" then
					do_adjective_form("comp_wk_nom_m", glossary_link("comparative"), nil, "comparative")
					if alternant_multiword_spec.hassup == "has" then
						do_superlative()
					elseif alternant_multiword_spec.hassup == "hasnot" then
						insert_label("no " .. glossary_link("superlative"))
					end
				elseif alternant_multiword_spec.hascomp == "hasnot" then
					if alternant_multiword_spec.hassup == "has" then
						insert_label("no " .. glossary_link("comparative"))
						do_superlative()
					else
						insert_label("not " .. glossary_link("comparable"))
					end
				end
			elseif alternant_multiword_spec.hascomp == "has" then
				insert_label(glossary_link("comparative") .. " only")
				if alternant_multiword_spec.hassup == "has" then
					do_superlative()
				end
			else
				insert_label(glossary_link("superlative") .. " only")
			end

			if i == 1 then
				-- Parse and insert an inflection not requiring additional processing into `this_data.inflections`. The raw
				-- arguments come from `args[field]`, which is parsed for inline modifiers. If there is a corresponding
				-- qualifier field `FIELD_qual`, qualifiers may additionally come from there. `label` is the label that the
				-- inflections are given, which is linked to the glossary if surrounded by <<..>> (which is removed).
				-- `plpos` is the plural part of speech, used in [[Category:LANGNAME PLPOS with red links in their headword
				-- lines]]. `accel` is the accelerator form, or nil.
				local function handle_infl(field, label, frob)
					m_headword_utilities.parse_and_insert_inflection {
						headdata = this_data,
						forms = args[field],
						paramname = field,
						label = label,
						frob = frob,
					}
				end

				handle_infl("adv", "adverb")
			end

			-- Add categories.
			for _, cat in ipairs(alternant_multiword_spec.categories) do
				table.insert(this_data.categories, cat)
			end

			insert_noun_adj_linked_lemma_if_needed(this_data, alternant_multiword_spec, is_adjective_module)
		end
	end

	return multiple_data
end

local function get_adjective_pos_functions()
	return {
		params = function(lang)
			if lang:getCode() == "is" then
				return get_auto_adjective_params(lang)
			else
				return get_manual_adjective_params(lang)
			end
		end,
		func = function(args, data)
			if data.lang:getCode() == "is" then
				return do_auto_adjectives(args, data)
			else
				return do_manual_adjectives(args, data)
			end
		end,
		process_multiple_headword_data = get_process_noun_adj_multiple_data("is-adecl"),
	}
end

pos_functions["adjectives"] = get_adjective_pos_functions()

pos_functions["adverbs"] = {
	params = {
		[1] = {list = "comp", disallow_holes = true},
		[2] = {list = "sup", disallow_holes = true},
	},
	func = function(args, data)
		do_comparative_superlative(args, data, "adverbs")
	end,
}


return export