Module:User:Chernorizets/sla-derived terms
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
--[=[
This module implements {{*-derived terms}} for various Slavic languages, which creates a list or table of
verbs derived from a base verb.
Potentially this supports all Slavic languages, but in practice additional work is needed for Ukrainian and
Belarusian to properly handle stressed prefixes (particularly Belarusian вы́-, Ukrainian ви́-). To add this
support, you need to provide the appropriate language-specific version of paste_prefix_suffix(); see
ru_paste_prefix_suffix() for the Russian version, which provides a starting point. (Handling Ukrainian and
Belarusian should be easier because these languages don't normally support or need manual transliteration.)
Author: Benwing2; rewritten from initial version by Erutuon.
FIXME:
1. Brackets. [DONE]
2. Period as prefix value (needed?). [DONE]
3. Properly propagate all modifiers. [DONE]
4. Consider adding default aspect to table if term occurs as both perfective and imperfective.
]=]
local export = {}
local m_table = require("Module:table")
local m_links = require("Module:links")
local rsplit = mw.text.split
local rsubn = mw.ustring.gsub
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function transliterate(lang, term)
return (lang:transliterate(m_links.remove_links(term)))
end
local function default_paste_prefix_suffix(lang, prefix, prefix_tr, suffix, suffix_tr, aspect)
if not prefix_tr and not suffix_tr then
return prefix .. suffix, nil
end
prefix_tr = prefix_tr or transliterate(lang, prefix)
suffix_tr = suffix_tr or transliterate(lang, suffix)
return prefix .. suffix, prefix_tr .. suffix_tr
end
local function ru_paste_prefix_suffix(lang, prefix, prefix_tr, suffix, suffix_tr, aspect)
local com = require("Module:ru-common")
prefix_tr = prefix_tr and com.decompose(prefix_tr) or nil
suffix_tr = suffix_tr and com.decompose(suffix_tr) or nil
if aspect == "impf" then
prefix, prefix_tr = com.make_unstressed(prefix, prefix_tr)
end
if com.is_stressed(prefix) then
suffix, suffix_tr = com.make_unstressed(suffix, suffix_tr)
end
local verb, verb_tr = com.concat_russian_tr(prefix, prefix_tr, suffix, suffix_tr)
verb_tr = verb_tr and com.recompose(verb_tr) or nil
return com.remove_monosyllabic_accents(verb, verb_tr)
end
local function combine_qualifiers(q1, q2)
if q1 == nil then
return q2
elseif q2 == nil then
return q1
else
return q1 .. ", " .. q2
end
end
local function get_aspects(args)
local first_aspect, second_aspect
if args.impf_first then
first_aspect = "impf"
second_aspect = "pf"
else
first_aspect = "pf"
second_aspect = "impf"
end
return first_aspect, second_aspect
end
local modifiers = {"q", "qq", "t", "gloss", "tr", "ts", "g", "id", "alt", "pos", "lit"}
local function parse_aspect_pair(arg, arg_index, state, lang_module, args)
local pair = {}
local suffixes = false
if arg == "-" then
return false
end
local origarg = arg
if arg:find("^%*") then
suffixes = true
arg = rsub(arg, "^%*", "")
end
local function parse_err(msg)
error(msg .. ": " .. arg_index .. "=" .. origarg)
end
local function parse_term_with_modifiers(run)
local obj
local within_brackets = run[1]:match("^%[(.*)%]$")
if within_brackets then
obj = {term = within_brackets, brackets = true}
else
obj = {term = run[1]}
end
if obj.term == "." then
obj.term = ""
end
for j = 2, #run - 1, 2 do
if run[j + 1] ~= "" then
parse_err("Extraneous text '" .. run[j + 1] .. "' after modifier")
end
local modtext = run[j]:match("^<(.*)>$")
if not modtext then
parse_err("Internal error: Modifier '" .. modtext .. "' isn't surrounded by angle brackets")
end
local prefix, value = modtext:match("^([a-z]+):(.*)$")
if not prefix then
parse_err(("Modifier %s lacks a prefix, should begin with one of %s followed by a colon"):format(
run[j], table.concat(modifiers, ",")))
end
if not m_table.contains(modifiers, prefix) then
parse_err(("Unrecognized prefix '%s' in modifier %s, should be one of %s"):format(
prefix, run[j], table.concat(modifiers, ",")))
end
local dest = prefix
if prefix == "t" then
dest = "gloss"
elseif prefix == "g" then
dest = "genders"
end
if obj[dest] then
parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. run[j])
end
obj[dest] = prefix == "g" and rsplit(value, "%s*,%s*") or value
end
return obj
end
if arg:find("<") then -- and not arg:find("^[^<]*<[a-z]*[^a-z:]") then
if not state.put then
state.put = require("Module:parse utilities")
end
local segments = state.put.parse_balanced_segment_run(arg, "<", ">")
local slash_separated_groups =
state.put.split_alternating_runs_and_frob_raw_text(segments, "/", state.put.strip_spaces)
if #slash_separated_groups == 1 then
pair.prefix = parse_term_with_modifiers(slash_separated_groups[1])
elseif #slash_separated_groups > 2 then
parse_err("Saw more than two slashes")
else
local function process_terms(segments)
local retval = {}
local comma_separated_groups =
state.put.split_alternating_runs_and_frob_raw_text(segments, ",", state.put.strip_spaces)
for _, comma_separated_group in ipairs(comma_separated_groups) do
table.insert(retval, parse_term_with_modifiers(comma_separated_group))
end
return retval
end
local firsts, seconds = unpack(slash_separated_groups)
pair.firsts = process_terms(firsts)
pair.seconds = process_terms(seconds)
end
else
local split_on_slash = rsplit(arg, "%s*/%s*")
if #split_on_slash == 1 then
pair.prefix = parse_term_with_modifiers({arg})
elseif #split_on_slash > 2 then
parse_err("Saw more than two slashes")
else
local function process_terms(terms)
local retval = {}
terms = rsplit(terms, "%s*,%s*")
for _, term in ipairs(terms) do
table.insert(retval, parse_term_with_modifiers({term}))
end
return retval
end
local firsts, seconds = unpack(split_on_slash)
pair.firsts = process_terms(firsts)
pair.seconds = process_terms(seconds)
end
end
if pair.prefix and pair.prefix.term:find(",") then
parse_err(("Commas not allowed in single prefix spec '%s'"):format(pair.prefix.term))
end
if suffixes then
if pair.prefix then
parse_err("Can't specify a single prefix with leading *")
end
local function remove_bare_hyphens(terms)
local retval = {}
for _, term in ipairs(terms) do
if term.term ~= "-" then
table.insert(retval, term)
end
end
return retval
end
state.first_suffixes = remove_bare_hyphens(pair.firsts)
state.second_suffixes = remove_bare_hyphens(pair.seconds)
if #state.first_suffixes == 0 and #state.second_suffixes == 0 then
parse_err("Need at least one perfective or imperfective suffix")
end
return nil
end
local first_aspect, second_aspect = get_aspects(args)
if pair.prefix then
-- A single prefix; combine with all template suffixes.
if not state.first_suffixes then
parse_err(
("Saw prefix '%s' with no preceding template suffixes (line beginning with *)"):format(pair.prefix.term)
)
end
pair.prefix.term = rsub(pair.prefix.term, "%-$", "")
if pair.prefix.tr then
pair.prefix.tr = rsub(pair.prefix.tr, "%-$", "")
end
-- Error on prefix properties we don't know how to handle.
for _, prop in ipairs {"ts", "alt", "genders", "id", "pos", "lit"} do
if pair.prefix[prop] then
parse_error(
("Can't handle property '%s' in prefix '%s'"):format(prop, pair.prefix.term))
end
end
local function prefix_template_suffixes(prefix, terms, aspect)
local retval = {}
for _, term in ipairs(terms) do
term = m_table.shallowCopy(term)
for _, prop in ipairs {"ts", "alt"} do
if term[prop] then
parse_err(
("For aspect=%s, can't handle property '%s' in suffix '%s' when combining with prefix"):
format(aspect, prop, term.term))
end
end
term.term, term.tr = lang_module.paste_prefix_suffix(lang_module.lang, prefix.term, prefix.tr,
term.term, term.tr, aspect)
table.insert(retval, term)
end
return retval
end
-- Do the prefixing.
pair.firsts = prefix_template_suffixes(pair.prefix, state.first_suffixes, first_aspect)
pair.seconds = prefix_template_suffixes(pair.prefix, state.second_suffixes, second_aspect)
-- Now propagate t= (goes into 'gloss') and qq= to the last resulting term, and q= to the first resulting term.
local last_term
if #pair.seconds > 0 then
last_term = pair.seconds[#pair.seconds]
else
last_term = pair.firsts[#pair.firsts]
end
last_term.qq = combine_qualifiers(last_term.qq, pair.prefix.qq)
if last_term.gloss and pair.prefix.gloss then
parse_err(("Can't override gloss '%s' of term '%s' with gloss '%s' of prefix '%s'"):
format(last_term.gloss, last_imp.term, pair.prefix.gloss, prefix.term))
elseif pair.prefix.gloss then
last_term.gloss = prefix.gloss
end
local first_term
if #pair.firsts > 0 then
first_term = pair.firsts[1]
else
first_term = pair.seconds[1]
end
first_term.q = combine_qualifiers(first_term.q, pair.prefix.q)
else
local function handle_aspect_terms(terms, template_suffixes, aspect)
local retval = {}
for i, term in ipairs(terms) do
if term.term ~= "-" then
if term.term:find("%-$") or term.term == "" then
-- prefix to add to corresponding template suffix
if #template_suffixes < i then
local numsuf = #template_suffixes
parse_err(
("For aspect=%s, term #%s=%s is a prefix but there %s only %s corresponding template suffix%s"):
format(aspect, i, term.term, numsuf == 1 and "is" or "are", numsuf,
numsuf == 1 and "" or "es"))
end
-- Fetch suffix; clone because we are modifying it destructively and may reuse it later for
-- another prefix.
local newterm = m_table.shallowCopy(template_suffixes[i])
-- Don't know how to combine ts= or alt= values.
for _, prop in ipairs {"ts", "alt"} do
if newterm[prop] or term[prop] then
parse_err(
("For aspect=%s, can't handle property '%s' in prefix '%s' or suffix '%s' when combining them"):
format(aspect, prop, term.term, newterm.term))
end
end
-- Combine term and translit, along with qualifiers and brackets.
newterm.term, newterm.tr =
lang_module.paste_prefix_suffix(lang_module.lang, rsub(term.term, "%-$", ""),
term.tr and rsub(term.tr, "%-$", "") or nil, newterm.term, newterm.tr, aspect)
newterm.q = combine_qualifiers(newterm.q, term.q)
newterm.qq = combine_qualifiers(newterm.qq, term.qq)
newterm.brackets = newterm.brackets or term.brackets
-- Remaining properties are copied from prefix to suffix if not already in suffix.
for _, prop in ipairs {"gloss", "genders", "id", "pos", "lit"} do
if newterm[prop] and term[prop] then
parse_err(
("For aspect=%s, can't handle property '%s' occurring along with both prefix '%s' and suffix '%s' when combining them"):
format(aspect, prop, term.term, newterm.term))
end
if term[prop] then
newterm[prop] = term[prop]
end
end
table.insert(retval, newterm)
else
table.insert(retval, term)
end
end
end
return retval
end
pair.firsts = handle_aspect_terms(pair.firsts, state.first_suffixes, first_aspect)
pair.seconds = handle_aspect_terms(pair.seconds, state.second_suffixes, second_aspect)
end
if #pair.firsts == 0 and #pair.seconds == 0 then
parse_err("Need at least one perfective or imperfective term")
end
return pair
end
local function parse_args(lang, args)
local lang_module = {lang = lang}
if lang:getCode() == "ru" then
lang_module.paste_prefix_suffix = ru_paste_prefix_suffix
else
lang_module.paste_prefix_suffix = default_paste_prefix_suffix
end
local state = {}
local groups = {}
local group = {}
for i, arg in ipairs(args[1]) do
local pair = parse_aspect_pair(arg, i, state, lang_module, args)
if pair == false then
if #group == 0 then
error("No items in group terminated by single hyphen in arg #" .. i)
end
table.insert(groups, group)
group = {}
elseif pair then
table.insert(group, pair)
end
end
if #group > 0 then
table.insert(groups, group)
end
return groups
end
local function format_aspect_terms(lang, args, term_groups, include_default_aspect)
local all_formatted_items = {}
for _, group in ipairs(term_groups) do
local group_formatted_items = {}
for _, items in ipairs(group) do
local sort_key = nil
local this_include_default_aspect = include_default_aspect
local function handle_aspect_terms(terms, aspect)
local term_parts = {}
for _, term in ipairs(terms) do
sort_key = sort_key or (lang:makeSortKey((lang:makeEntryName(term.term))))
local preq_text = term.q and require("Module:qualifier").format_qualifier(term.q) .. " " or ""
if not term.genders and this_include_default_aspect then
term.genders = {aspect}
end
term.lang = lang
local linked_term = m_links.full_link(term)
if term.brackets then
linked_term = "[" .. linked_term .. "]"
end
table.insert(term_parts, preq_text .. linked_term
.. (term.qq and " " .. require("Module:qualifier").format_qualifier(term.qq) or ""))
end
return table.concat(term_parts, ", ")
end
local first_aspect, second_aspect = get_aspects(args)
local switch_aspects
for _, term in ipairs(items.firsts) do
if term.genders and #term.genders == 1 and term.genders[1] == second_aspect then
switch_aspects = true
break
end
end
if switch_aspects then
this_include_default_aspect = true
local temp = first_aspect
first_aspect = second_aspect
second_aspect = temp
end
local firsts = handle_aspect_terms(items.firsts, first_aspect)
local seconds = handle_aspect_terms(items.seconds, second_aspect)
table.insert(group_formatted_items, {
firsts = firsts,
seconds = seconds,
sort_key = sort_key
})
end
table.sort(group_formatted_items, function(a, b) return a.sort_key < b.sort_key end)
for _, formatted_item in ipairs(group_formatted_items) do
table.insert(all_formatted_items, formatted_item)
end
end
return all_formatted_items
end
local function format_terms_as_list(lang, args, formatted_items)
for i, formatted_item in ipairs(formatted_items) do
if formatted_item.firsts == "" then
formatted_items[i] = formatted_item.seconds
elseif formatted_item.seconds == "" then
formatted_items[i] = formatted_item.firsts
else
formatted_items[i] = formatted_item.firsts .. ", " .. formatted_item.seconds
end
end
return require("Module:columns").create_list {
header = "verbs",
format_header = true,
content = formatted_items,
lang = lang,
column_count = args.ncol,
collapse = true,
}
end
local function format_terms_as_table(lang, args, formatted_items)
local lines = {}
local first_aspect_header, second_aspect_header
if args.impf_first then
first_aspect_header = "imperfective"
second_aspect_header = "perfective"
else
first_aspect_header = "perfective"
second_aspect_header = "imperfective"
end
table.insert(lines, '{| class="wikitable vsSwitcher" data-toggle-category="derived terms"\n! ' ..
first_aspect_header .. ' !! class="vsToggleElement" | ' .. second_aspect_header)
for i, formatted_item in ipairs(formatted_items) do
table.insert(lines, '|- class="vsHide"\n| ' .. formatted_item.firsts .. " || " ..
formatted_item.seconds)
end
table.insert(lines, "|}")
return table.concat(lines, "\n")
end
function export.imperfectives_and_perfectives(frame)
local iparams = {
["format"] = {default = "list"},
["lang"] = {},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local params = {
["format"] = {},
[1] = {list = true},
["ncol"] = {default = 2, type = "number"},
["impf_first"] = {type = "boolean"},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local format = args.format or iargs.format
local lang = require("Module:languages").getByCode(iargs.lang, "lang")
if format ~= "list" and format ~= "table" then
error(("Unrecognized format '%s'; possible values are 'list', 'table'"):format(format))
end
local groups = parse_args(lang, args)
local formatted_items = format_aspect_terms(lang, args, groups, format == "list")
return format == "list" and format_terms_as_list(lang, args, formatted_items) or
format_terms_as_table(lang, args, formatted_items)
end
return export