Module:User:Benwing2/phi-headword
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
-- This module contains code for Philippine-language headword templates.
-- Most languages use the following templates (e.g. for Waray-Waray):
-- * {{war-noun}}, {{war-proper noun}};
-- * {{war-verb}};
-- * {{war-adj}};
-- * {{war-adv}};
-- * {{war-head}}.
-- Tagalog uses the following additional templates:
-- * {{tl-num}};
-- * {{tl-pron}};
-- * {{tl-prep}}.
-- Cebuano uses the following additional templates:
-- * {{ceb-num}}.
local export = {}
local pos_functions = {}
local force_cat = false -- for testing; if true, categories appear in non-mainspace pages
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local template_parser_module = "Module:template parser"
local tl_conj_type_data = {
["actor"] = 5,
["actor indirect"] = 0,
["actor 2nd indirect"] = 4,
["object"] = 11,
["locative"] = 2,
["benefactive"] = 3,
["instrument"] = 2,
["reason"] = {4, {1,2,3}},
["directional"] = 6,
["reference"] = 0,
["reciprocal"] = 2
}
local tl_conjugation_types = {}
for key, value in pairs(tl_conj_type_data) do
local type_count = 0
local alternates = {}
if type(value) == "number" then
type_count = value
else
type_count = value[1]
alternates = value[2]
end
local roman_numeral
if type_count == 0 then
local trigger = {key, "trigger"}
if key == "actor indirect" then
trigger[1] = "indirect actor"
end
tl_conjugation_types[key] = table.concat(trigger, " ")
else
for i = 1, type_count do
roman_numeral = require("Module:roman numerals").arabic_to_roman(tostring(i))
local trigger = {require("Module:ordinal")._ordinal(tostring(i)), key, "trigger"}
--These could be typos but putting back in to stay consistent
if key == "actor 2nd indirect" then
trigger[2] = "secondary indirect actor"
end
tl_conjugation_types[key .. " " .. roman_numeral] = table.concat(trigger, " ")
if require("Module:table").contains(alternates, i) then
roman_numeral = roman_numeral .. "A"
trigger[1] = "alternate " .. trigger[1]
tl_conjugation_types[key .. " " .. roman_numeral] = table.concat(trigger, " ")
end
end
end
end
local ilo_conjugation_types = {
["actor I"] = "1st actor trigger", -- um- or -um-
["actor II"] = "2nd actor trigger", -- ag-
["actor III"] = "3rd actor trigger", -- mang-
["actor IV"] = "4th actor trigger", -- ma-
["actor potentive I"] = "1st actor trigger potential mood", -- maka-
["actor potential II"] = "2nd actor trigger potential mood", -- makapag-
["actor causative I"] = "2nd actor trigger potential mood", -- agpa-
["actor causative II"] = "2nd actor trigger potential mood", -- mangpa-
["object"] = "object trigger", -- -en
["object potential"] = "object trigger potential mood", -- ma-
["object causative"] = "2nd actor trigger potential mood", -- ipai-
["comitative"] = "comitative trigger", -- ka-
["comitative potential"] = "comitative trigger potential mood", -- maka-
["comitative causative I"] = "1st comitative trigger causative mood", -- makapa-
["comitative causative II"] = "2nd comitative trigger causative mood", -- makipa-
["locative"] = "locative trigger",-- -an
["locative potential"] = "locative trigger potential mood", -- ma- -an
["locative causative"] = "locative trigger causative mood", -- pa- -an
["thematic"] = "thematic trigger", -- i-
["thematic potential"] = "thematic trigger potential mood", -- mai-
["thematic causative"] = "thematic trigger causative mood", -- ipa-
["benefactive"] = "benefactive trigger", -- i- -an
["benefactive potential"] = "benefactive trigger potential mood", -- mai- -an
["benefactive causative"] = "benefactive trigger causative mood", -- ipa- -an
["instrument"] = "instrument trigger", -- pag-
["instrument potential"] = "instrument trigger potential mood", -- mapag-
["instrument causative"] = "1st instrument trigger causative mood", -- pagpa- -an
["instrument causative II"] = "2nd instrument trigger causative mood", -- panagpa-
}
local pag_conjugation_types = {
["actor I"] = "1st actor trigger", -- on-/-on-
["actor II"] = "2nd actor trigger", --man-
["actor potentive I"] = "actor trigger potential mood", -- maka-
["actor potentive II"] = "actor trigger potential mood", -- makapag-
["object"] = "object trigger", -- -en
["object potential"] = "object trigger potential mood", -- ma-
["instrument"] = "instrument trigger", -- pag-
["instrument potential"] = "instrument trigger potential mood", -- mapag-
["instrument causative"] = "1st instrument trigger causative mood", -- pagpa- -an
["instrument causative II"] = "2nd instrument trigger causative mood", -- panagpa-
}
-- FIXME: Are these various languages really so different in their verb inflections or is this just a case of
-- randomly picking a subset of the total inflections?
local tl_bcl_verb_inflections = {
{"comp", {label = "complete", form = "comp", alias = {2}}},
{"prog", {label = "progressive", form = "imp", alias = {3}}},
{"cont", {label = "contemplative", form = "cont", alias = {4}}},
{"vnoun", {label = "verbal noun", form = "vnoun", alias = {5}}},
}
local hil_krj_war_verb_inflections = {
{"real", {label = "realis", form = "realis", alias = {2}}},
{"imp", {label = "imperative", form = "imp", alias = {3}}},
{"dim", {label = "diminutive"}},
{"caus", {label = "causative"}},
{"freq", {label = "frequentative"}},
}
local ilo_pag_verb_inflections = {
{"perf", {label = "perfective", form = "pfv", alias = {2}}},
{"imperf", {label = "imperfective", form = "impfv", alias = {3}}},
{"past_imperf", {label = "past imperfective", form = "past|impfv", alias = {4}}},
{"fut", {label = "future", form = "fut", alias = {5}}},
}
local hil_krj_war_noun_inflections = {
{"dim", {label = "diminutive"}},
}
local hil_krj_war_adj_inflections = {
{"dim", {label = "diminutive"}},
{"caus", {label = "causative"}},
}
-- NOTE: Here and below, the template names need to be in their canonical form (not shortcuts).
local langs_supported = {
["bcl"] = {
native_script_name = "Basahan",
convert_to_native_script = "bcl-basahan script",
native_script_def = "bcl-basahan",
pronun_templates_to_check = {"bcl-IPA"},
has_pl_all_pos = true,
has_intens_all_pos = true,
verb_inflections = tl_bcl_verb_inflections,
},
["cbk"] = {
pronun_templates_to_check = {"cbk-IPA"},
},
["ceb"] = {
native_script_name = "Badlit",
convert_to_native_script = "ceb-badlit script",
native_script_def = "ceb-badlit",
pronun_templates_to_check = {"ceb-IPA"},
verb_inflections = {
{"inch", {label = "inchoative", form = "realis", alias = {2}}},
{"imp", {label = "imperative", form = "imp", alias = {3}}},
},
},
["hil"] = {
pronun_templates_to_check = {"hil-IPA"},
verb_inflections = hil_krj_war_verb_inflections,
noun_inflections = hil_krj_war_noun_inflections,
adj_inflections = hil_krj_war_adj_inflections,
},
["ilo"] = {
native_script_name = "Kur-itan",
convert_to_native_script = "ilo-kur-itan script",
native_script_def = "ilo-kur-itan",
pronun_templates_to_check = {"ilo-IPA"},
conjugation_types = ilo_conjugation_types,
verb_inflections = ilo_pag_verb_inflections,
adj_inflections = {
{"comp", {label = "comparative", form = "comparative", alias = {2}}},
{"mod", {label = "moderative", form = "moderative", alias = {3}}},
{"comp_sup", {label = "comparative superlative", form = "comp|sup", alias = {4}}},
{"abs_sup", {label = "absolutive superlative", form = "abs|sup", alias = {5}}},
{"intens", {label = "intensive", alias = {6}}},
},
},
["krj"] = {
pronun_templates_to_check = {"krj-IPA"},
verb_inflections = hil_krj_war_verb_inflections,
noun_inflections = hil_krj_war_noun_inflections,
adj_inflections = hil_krj_war_adj_inflections,
},
["mdh"] = {
arabic_script_name = "Jawi",
native_script_def = "mdh-Jawi",
pronun_templates_to_check = {"mdh-IPA"},
},
["mrw"] = {
arabic_script_name = "batang Arab",
},
["pag"] = {
pronun_templates_to_check = {"pag-IPA"},
conjugation_types = pag_conjugation_types,
verb_inflections = ilo_pag_verb_inflections,
},
["pam"] = {
pronun_templates_to_check = {"pam-IPA"},
verb_inflections = {
{"perf", {label = "perfective", form = "pfv", alias = {2}}}, -- Use with affixed verbs only.
{"prog", {label = "progressive", form = "prog", alias = {3}}}, -- Use with affixed verbs only.
},
},
["tl"] = {
native_script_name = "Baybayin",
convert_to_native_script = "tl-baybayin script",
native_script_def = "tl-baybayin",
pronun_templates_to_check = {"tl-pr", "tl-IPA"},
conjugation_types = tl_conjugation_types,
verb_inflections = tl_bcl_verb_inflections,
},
["tsg"] = {
},
["war"] = {
pronun_templates_to_check = {"war-IPA"},
verb_inflections = hil_krj_war_verb_inflections,
noun_inflections = hil_krj_war_noun_inflections,
adj_inflections = hil_krj_war_adj_inflections,
},
}
----------------------------------------------- Utilities --------------------------------------------
local function track(page)
require("Module:debug/track")("phi-headword/" .. page)
return true
end
local function ine(val)
if val == "" then return nil else return val end
end
local function do_inflection(data, forms, label, accel)
if #forms > 0 then
forms.label = label
if accel then
forms.accel = accel
end
table.insert(data.inflections, forms)
end
end
local function add_params(params, params_spec)
if not params_spec then
return
end
for _, spec in ipairs(params_spec) do
local arg, argspecs = unpack(spec)
params[arg] = {list = true}
if argspecs.alias then
for _, al in ipairs(argspecs.alias) do
params[al] = {alias_of = arg}
end
end
end
end
local function do_inflections(args, data, params_spec)
if not params_spec then
return
end
for _, spec in ipairs(params_spec) do
local arg, argspecs = unpack(spec)
do_inflection(data, args[arg], argspecs.label, argspecs.form and {form = argspecs.form} or nil)
end
end
----------------------------------------------- Main code --------------------------------------------
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
local iparams = {
[1] = {},
["lang"] = {required = true},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local parargs = frame:getParent().args
local poscat = iargs[1]
local langcode = iargs.lang
if not langs_supported[langcode] then
local langcodes_supported = {}
for lang, _ in pairs(langs_supported) do
table.insert(langcodes_supported, lang)
end
error("This module currently only works for lang=" .. table.concat(langcodes_supported, "/"))
end
local lang = require("Module:languages").getByCode(langcode)
local langname = lang:getCanonicalName()
local headarg
if poscat then
headarg = 1
else
headarg = 2
poscat = ine(parargs[1]) or
mw.title.getCurrentTitle().fullText == "Template:" .. langcode .. "-head" and "interjection" or
error("Part of speech must be specified in 1=")
poscat = require("Module:string utilities").pluralize(poscat)
end
local langprops = langs_supported[langcode]
local params = {
[headarg] = {list = "head", disallow_holes = true},
["id"] = {},
["nolink"] = {type = "boolean"},
["nolinkhead"] = {type = "boolean", alias_of = "nolink"},
["suffix"] = {type = "boolean"},
["nosuffix"] = {type = "boolean"},
["addlpos"] = {},
["json"] = {type = "boolean"},
["pagename"] = {}, -- for testing
}
if langprops.native_script_name then
params["b"] = {list = true}
end
if langprops.arabic_script_name then
params["j"] = {list = true}
end
local has_alt_script = langprops.native_script_name or langprops.arabic_script_name
if has_alt_script then
params["tr"] = {list = true, allow_holes = true}
end
if headarg == 2 then
params[1] = {required = true} -- required but ignored as already processed above
end
if pos_functions[poscat] then
for key, val in pairs(pos_functions[poscat].params(langcode)) do
params[key] = val
end
end
if langprops.has_pl_all_pos and not params.pl then
-- Yuck, this should be POS-specific but it seems all POS's can be pluralized in Bikol Central?
params["pl"] = {list = true}
need_pl_handled = true
end
if langprops.has_intens_all_pos then
params["intens"] = {list = true}
if langprops.has_pl_all_pos then
params["plintens"] = {list = true}
end
end
local args = require("Module:parameters").process(parargs, params)
local pagename = args.pagename or mw.title.getCurrentTitle().subpageText
if has_alt_script and args.tr.maxindex > #args[headarg] then
error("Too many translits specified; use '+' to indicate a default head")
end
local user_specified_heads = args[headarg]
local heads = user_specified_heads
if args.nolink then
if #heads == 0 then
heads = {pagename}
end
end
for i, head in ipairs(heads) do
if head == "+" then
head = nil
end
heads[i] = {
term = head,
tr = langprops.has_alt_script and args.tr[i] or nil,
}
end
local data = {
lang = lang,
langcode = langcode,
langname = langname,
pos_category = poscat,
categories = {},
heads = heads,
user_specified_heads = user_specified_heads,
no_redundant_head_cat = #user_specified_heads == 0,
inflections = {},
pagename = pagename,
id = args.id,
force_cat_output = force_cat,
}
data.is_suffix = false
if args.suffix or (
not args.nosuffix and pagename:find("^%-") and poscat ~= "suffixes" and poscat ~= "suffix forms"
) then
data.is_suffix = true
data.pos_category = "suffixes"
local singular_poscat = require("Module:string utilities").singularize(poscat)
table.insert(data.categories, langname .. " " .. singular_poscat .. "-forming suffixes")
table.insert(data.inflections, {label = singular_poscat .. "-forming suffix"})
if args.addlpos then
for _, addlpos in ipairs(rsplit(args.addlpos, "%s*,%s*")) do
table.insert(data.categories, langname .. " " .. addlpos .. "-forming suffixes")
table.insert(data.inflections, {label = addlpos .. "-forming suffix"})
end
end
end
if pos_functions[poscat] then
pos_functions[poscat].func(args, data)
end
if need_pl_handled then
do_inflection(data, args.pl, "plural", {form = "plural"})
end
if langprops.has_intens_all_pos then
do_inflection(data, args.intens, "intensified")
if langprops.has_pl_all_pos then
do_inflection(data, args.plintens, "plural intensified")
end
end
local pattern_escape = require("Module:string utilities").pattern_escape
local script
if has_alt_script then
script = lang:findBestScript(pagename) -- Latn or Tglg
-- Disable native-script spelling parameter if entry is already in native script.
if script:getCode() == "Tglg" then
args.b = {}
end
-- Disable Arabic-script spelling parameter if entry is already in Arabic script.
if script:getCode() == "Arab" then
args.j = {}
end
local function check_for_alt_script_entry(altscript, altscript_def)
-- See if we need to add a tracking category for missing alt script entry.
if not altscript_def then
return false
end
local script_entry_present
local title = mw.title.new(altscript)
if title then
local altscript_content = title:getContent()
if altscript_content then
for name, args, text, index in require(template_parser_module).findTemplates(altscript_content) do
if name == altscript_def then
for i = 1, 10 do
if args[i] == pagename then
script_entry_present = true
break
end
end
end
if script_entry_present then
break
end
end
end
end
return script_entry_present
end
local function handle_alt_script(script_argname, script_code, script_name, convert_to_script, script_def)
local script_arg = args[script_argname]
if script_arg then
for i, alt in ipairs(script_arg) do
if alt == "+" then
alt = pagename
end
local altsc = lang:findBestScript(alt)
if altsc:getCode() == "Latn" then
if convert_to_script then
alt = frame:expandTemplate { title = convert_to_script, args = { alt }}
else
error(("Latin script for %s= not currently supported; supply proper script"):format(
script_argname))
end
end
script_arg[i] = {term = alt, sc = require("Module:scripts").getByCode(script_code) }
if not check_for_alt_script_entry(alt, script_def) then
table.insert(data.categories,
("%s terms with missing %s script entries"):format(langname, script_name))
end
end
if #script_arg > 0 then
script_arg.label = script_name .. " spelling"
table.insert(data.inflections, script_arg)
end
if script:getCode() == "Latn" then
table.insert(data.categories, ("%s terms %s %s script"):format(
langname, #script_arg > 0 and "with" or "without", script_name))
elseif script:getCode() == script_code then
table.insert(data.categories, ("%s terms in %s script"):format(langname, script_name))
end
end
end
if langprops.native_script_name then
handle_alt_script("b", "Tglg", langprops.native_script_name, langprops.convert_to_native_script,
langprops.native_script_def)
end
if langprops.arabic_script_name then
handle_alt_script("j", "Arab", langprops.arabic_script_name, langprops.convert_to_arabic_script,
langprops.arabic_script_def)
end
end
if langprops.pronun_templates_to_check and (not has_alt_script or script:getCode() == "Latn") then
-- See if we need to add a tracking category for missing {{tl-pr}}, {{tl-IPA}}, etc.
local template_present
local this_title = mw.title.new(pagename)
if this_title then
local content = this_title:getContent()
if content then
for name, args, text, index in require(template_parser_module).findTemplates(content) do
for _, pronun_template in ipairs(langprops.pronun_templates_to_check) do
if name == pronun_template then
template_present = true
break
end
end
if template_present then
break
end
end
end
end
if not template_present then
table.insert(data.categories, ("%s terms without pronunciation template"):format(langname, pronun_template))
end
end
if args.json then
return require("Module:JSON").toJSON(data)
end
return require("Module:headword").full_headword(data)
end
pos_functions["adjectives"] = {
params = function(langcode)
local params = {
["f"] = {list = true},
["m"] = {list = true},
["pl"] = {list = true},
["comp"] = {list = true},
["sup"] = {list = true},
}
add_params(params, langs_supported[langcode].adj_inflections)
return params
end,
func = function(args, data)
do_inflection(data, args.f, "feminine")
do_inflection(data, args.m, "masculine")
do_inflection(data, args.pl, "plural", {form = "plural"})
do_inflection(data, args.comp, "comparative")
do_inflection(data, args.sup, "superlative")
do_inflections(args, data, langs_supported[data.langcode].adj_inflections)
end,
}
pos_functions["nouns"] = {
params = function(langcode)
local params = {
["f"] = {list = true},
["m"] = {list = true},
["pl"] = {list = true},
rootword = {type = "boolean"},
}
add_params(params, langs_supported[langcode].noun_inflections)
return params
end,
func = function(args, data)
do_inflection(data, args.f, "feminine")
do_inflection(data, args.m, "masculine")
do_inflection(data, args.pl, "plural", {form = "plural"})
do_inflections(args, data, langs_supported[data.langcode].noun_inflections)
if args.rootword then
table.insert(data.infections, {label = "root word"})
table.insert(data.categories, langname .. " roots")
end
end,
}
pos_functions["proper nouns"] = pos_functions["nouns"]
pos_functions["pronouns"] = {
params = function(langcode)
return {
["pl"] = {list = true},
}
end,
func = function(args, data)
do_inflection(data, args.pl, "plural", {form = "plural"})
end,
}
pos_functions["prepositions"] = pos_functions["pronouns"]
pos_functions["verbs"] = {
params = function(langcode)
local params = {
rootword = {type = "boolean"},
}
if langs_supported[langcode].conjugation_types then
params.type = {list = true}
end
add_params(params, langs_supported[langcode].verb_inflections)
return params
end,
func = function(args, data)
do_inflections(args, data, langs_supported[data.langcode].verb_inflections)
if args.rootword then
table.insert(data.infections, {label = "root word"})
table.insert(data.categories, data.langname .. " roots")
end
if args.type then
-- Tag verb trigger
local conjugation_types = langs_supported[data.langcode].conjugation_types
for i, typ in ipairs(args.type) do
if not conjugation_types[typ] then
error(("Unrecognized %s verb conjugation type '%s'"):format(data.langname, typ))
end
local label = conjugation_types[typ]
table.insert(data.inflections, {label = label})
table.insert(data.categories, ("%s %s verbs"):format(data.langname, label))
end
end
end,
}
pos_functions["letters"] = {
params = function(langcode)
local params = {
["type"] = {},
["upper"] = {},
["lower"] = {},
["mixed"] = {},
}
return {}
end,
func = function(args, data)
if args.type then
if args.type ~= "upper" and args.type ~= "lower" and args.type ~= "mixed" then
error(("Unrecognized value for type '%s'; should be one of 'upper', 'lower' or 'mixed'"):format(
args.type))
end
end
local uppage = uupper(data.pagename)
local lopage = ulower(data.pagename)
if uppage == lopage then
if args.type then
error("Can't specify type= when letter has no case")
end
if args.upper or args.lower or args.mixed then
error("Can't specify upper=, lower= or mixed= when letter has no case")
end
table.insert(data.inflections, {label = "no case"})
elseif args.type == "upper" or data.pagename == uppage then
if args.upper then
error("Already uppercase; can't specify upper=")
end
table.insert(data.inflections, {label = "[[Appendix:Capital letter|upper case]]"})
table.insert(data.inflections, {args.lower or lopage, label = "lower case"})
elseif args.type == "lower" or data.pagename == lopage then
if args.lower then
error("Already uppercase; can't specify upper=")
end
table.insert(data.inflections, {label = "lower case"})
table.insert(data.inflections, {args.upper or uppage, label = "upper case"})
else
table.insert(data.inflections, {label = "mixed case"})
table.insert(data.inflections, {args.upper or uppage, label = "upper case"})
table.insert(data.inflections, {args.lower or lopage, label = "lower case"})
end
end,
}
return export