Jump to content

Module:User:Chernorizets/sla-derived terms

From Wiktionary, the free dictionary


--[=[
	This module implements {{*-derived terms}} for various Slavic languages, which creates a list or table of
	verbs derived from a base verb.

	Potentially this supports all Slavic languages, but in practice additional work is needed for Ukrainian and
	Belarusian to properly handle stressed prefixes (particularly Belarusian вы́-, Ukrainian ви́-). To add this
	support, you need to provide the appropriate language-specific version of paste_prefix_suffix(); see
	ru_paste_prefix_suffix() for the Russian version, which provides a starting point. (Handling Ukrainian and
	Belarusian should be easier because these languages don't normally support or need manual transliteration.)

	Author: Benwing2; rewritten from initial version by Erutuon.

	FIXME:
	1. Brackets. [DONE]
	2. Period as prefix value (needed?). [DONE]
	3. Properly propagate all modifiers. [DONE]
	4. Consider adding default aspect to table if term occurs as both perfective and imperfective.
]=]

local export = {}

local m_table = require("Module:table")
local m_links = require("Module:links")

local rsplit = mw.text.split
local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local function transliterate(lang, term)
	return (lang:transliterate(m_links.remove_links(term)))
end

local function default_paste_prefix_suffix(lang, prefix, prefix_tr, suffix, suffix_tr, aspect)
	if not prefix_tr and not suffix_tr then
		return prefix .. suffix, nil
	end
	prefix_tr = prefix_tr or transliterate(lang, prefix)
	suffix_tr = suffix_tr or transliterate(lang, suffix)
	return prefix .. suffix, prefix_tr .. suffix_tr
end

local function ru_paste_prefix_suffix(lang, prefix, prefix_tr, suffix, suffix_tr, aspect)
	local com = require("Module:ru-common")
	prefix_tr = prefix_tr and com.decompose(prefix_tr) or nil
	suffix_tr = suffix_tr and com.decompose(suffix_tr) or nil
	if aspect == "impf" then
		prefix, prefix_tr = com.make_unstressed(prefix, prefix_tr)
	end
	if com.is_stressed(prefix) then
		suffix, suffix_tr = com.make_unstressed(suffix, suffix_tr)
	end
	local verb, verb_tr = com.concat_russian_tr(prefix, prefix_tr, suffix, suffix_tr)
	verb_tr = verb_tr and com.recompose(verb_tr) or nil
	return com.remove_monosyllabic_accents(verb, verb_tr)
end

local function combine_qualifiers(q1, q2)
	if q1 == nil then
		return q2
	elseif q2 == nil then
		return q1
	else
		return q1 .. ", " .. q2
	end
end

local function get_aspects(args)
	local first_aspect, second_aspect
	if args.impf_first then
		first_aspect = "impf"
		second_aspect = "pf"
	else
		first_aspect = "pf"
		second_aspect = "impf"
	end
	return first_aspect, second_aspect
end

local modifiers = {"q", "qq", "t", "gloss", "tr", "ts", "g", "id", "alt", "pos", "lit"}

local function parse_aspect_pair(arg, arg_index, state, lang_module, args)
	local pair = {}
	local suffixes = false
	if arg == "-" then
		return false
	end
	local origarg = arg

	if arg:find("^%*") then
		suffixes = true
		arg = rsub(arg, "^%*", "")
	end

	local function parse_err(msg)
		error(msg .. ": " .. arg_index .. "=" .. origarg)
	end

	local function parse_term_with_modifiers(run)
		local obj
		local within_brackets = run[1]:match("^%[(.*)%]$")
		if within_brackets then
			obj = {term = within_brackets, brackets = true}
		else
			obj = {term = run[1]}
		end
		if obj.term == "." then
			obj.term = ""
		end

		for j = 2, #run - 1, 2 do
			if run[j + 1] ~= "" then
				parse_err("Extraneous text '" .. run[j + 1] .. "' after modifier")
			end
			local modtext = run[j]:match("^<(.*)>$")
			if not modtext then
				parse_err("Internal error: Modifier '" .. modtext .. "' isn't surrounded by angle brackets")
			end
			local prefix, value = modtext:match("^([a-z]+):(.*)$")
			if not prefix then
				parse_err(("Modifier %s lacks a prefix, should begin with one of %s followed by a colon"):format(
					run[j], table.concat(modifiers, ",")))
			end
			if not m_table.contains(modifiers, prefix) then
				parse_err(("Unrecognized prefix '%s' in modifier %s, should be one of %s"):format(
					prefix, run[j], table.concat(modifiers, ",")))
			end
			local dest = prefix
			if prefix == "t" then
				dest = "gloss"
			elseif prefix == "g" then
				dest = "genders"
			end
			if obj[dest] then
				parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. run[j])
			end
			obj[dest] = prefix == "g" and rsplit(value, "%s*,%s*") or value
		end

		return obj
	end

	if arg:find("<") then -- and not arg:find("^[^<]*<[a-z]*[^a-z:]") then
		if not state.put then
			state.put = require("Module:parse utilities")
		end

		local segments = state.put.parse_balanced_segment_run(arg, "<", ">")
		local slash_separated_groups =
			state.put.split_alternating_runs_and_frob_raw_text(segments, "/", state.put.strip_spaces)
		if #slash_separated_groups == 1 then
			pair.prefix = parse_term_with_modifiers(slash_separated_groups[1])
		elseif #slash_separated_groups > 2 then
			parse_err("Saw more than two slashes")
		else
			local function process_terms(segments)
				local retval = {}
				local comma_separated_groups =
					state.put.split_alternating_runs_and_frob_raw_text(segments, ",", state.put.strip_spaces)
				for _, comma_separated_group in ipairs(comma_separated_groups) do
					table.insert(retval, parse_term_with_modifiers(comma_separated_group))
				end
				return retval
			end

			local firsts, seconds = unpack(slash_separated_groups)
			pair.firsts = process_terms(firsts)
			pair.seconds = process_terms(seconds)
		end
	else
		local split_on_slash = rsplit(arg, "%s*/%s*")
		if #split_on_slash == 1 then
			pair.prefix = parse_term_with_modifiers({arg})
		elseif #split_on_slash > 2 then
			parse_err("Saw more than two slashes")
		else
			local function process_terms(terms)
				local retval = {}
				terms = rsplit(terms, "%s*,%s*")
				for _, term in ipairs(terms) do
					table.insert(retval, parse_term_with_modifiers({term}))
				end
				return retval
			end

			local firsts, seconds = unpack(split_on_slash)
			pair.firsts = process_terms(firsts)
			pair.seconds = process_terms(seconds)
		end
	end

	if pair.prefix and pair.prefix.term:find(",") then
		parse_err(("Commas not allowed in single prefix spec '%s'"):format(pair.prefix.term))
	end

	if suffixes then
		if pair.prefix then
			parse_err("Can't specify a single prefix with leading *")
		end
		local function remove_bare_hyphens(terms)
			local retval = {}
			for _, term in ipairs(terms) do
				if term.term ~= "-" then
					table.insert(retval, term)
				end
			end
			return retval
		end

		state.first_suffixes = remove_bare_hyphens(pair.firsts)
		state.second_suffixes = remove_bare_hyphens(pair.seconds)
		if #state.first_suffixes == 0 and #state.second_suffixes == 0 then
			parse_err("Need at least one perfective or imperfective suffix")
		end
		return nil
	end

	local first_aspect, second_aspect = get_aspects(args)

	if pair.prefix then
		-- A single prefix; combine with all template suffixes.
		if not state.first_suffixes then
			parse_err(
				("Saw prefix '%s' with no preceding template suffixes (line beginning with *)"):format(pair.prefix.term)
			)
		end
		pair.prefix.term = rsub(pair.prefix.term, "%-$", "")
		if pair.prefix.tr then
			pair.prefix.tr = rsub(pair.prefix.tr, "%-$", "")
		end

		-- Error on prefix properties we don't know how to handle.
		for _, prop in ipairs {"ts", "alt", "genders", "id", "pos", "lit"} do
			if pair.prefix[prop] then
				parse_error(
					("Can't handle property '%s' in prefix '%s'"):format(prop, pair.prefix.term))
			end
		end

		local function prefix_template_suffixes(prefix, terms, aspect)
			local retval = {}
			for _, term in ipairs(terms) do
				term = m_table.shallowCopy(term)
				for _, prop in ipairs {"ts", "alt"} do
					if term[prop] then
						parse_err(
							("For aspect=%s, can't handle property '%s' in suffix '%s' when combining with prefix"):
							format(aspect, prop, term.term))
					end
				end
				term.term, term.tr = lang_module.paste_prefix_suffix(lang_module.lang, prefix.term, prefix.tr,
					term.term, term.tr, aspect)
				table.insert(retval, term)
			end
			return retval
		end

		-- Do the prefixing.
		pair.firsts = prefix_template_suffixes(pair.prefix, state.first_suffixes, first_aspect)
		pair.seconds = prefix_template_suffixes(pair.prefix, state.second_suffixes, second_aspect)

		-- Now propagate t= (goes into 'gloss') and qq= to the last resulting term, and q= to the first resulting term.
		local last_term
		if #pair.seconds > 0 then
			last_term = pair.seconds[#pair.seconds]
		else
			last_term = pair.firsts[#pair.firsts]
		end
		last_term.qq = combine_qualifiers(last_term.qq, pair.prefix.qq)
		if last_term.gloss and pair.prefix.gloss then
			parse_err(("Can't override gloss '%s' of term '%s' with gloss '%s' of prefix '%s'"):
			format(last_term.gloss, last_imp.term, pair.prefix.gloss, prefix.term))
		elseif pair.prefix.gloss then
			last_term.gloss = prefix.gloss
		end
		local first_term
		if #pair.firsts > 0 then
			first_term = pair.firsts[1]
		else
			first_term = pair.seconds[1]
		end
		first_term.q = combine_qualifiers(first_term.q, pair.prefix.q)
	else
		local function handle_aspect_terms(terms, template_suffixes, aspect)
			local retval = {}
			for i, term in ipairs(terms) do
				if term.term ~= "-" then
					if term.term:find("%-$") or term.term == "" then
						-- prefix to add to corresponding template suffix
						if #template_suffixes < i then
							local numsuf = #template_suffixes
							parse_err(
								("For aspect=%s, term #%s=%s is a prefix but there %s only %s corresponding template suffix%s"):
								format(aspect, i, term.term, numsuf == 1 and "is" or "are", numsuf,
								numsuf == 1 and "" or "es"))
						end

						-- Fetch suffix; clone because we are modifying it destructively and may reuse it later for
						-- another prefix.
						local newterm = m_table.shallowCopy(template_suffixes[i])

						-- Don't know how to combine ts= or alt= values.
						for _, prop in ipairs {"ts", "alt"} do
							if newterm[prop] or term[prop] then
								parse_err(
									("For aspect=%s, can't handle property '%s' in prefix '%s' or suffix '%s' when combining them"):
									format(aspect, prop, term.term, newterm.term))
							end
						end

						-- Combine term and translit, along with qualifiers and brackets.
						newterm.term, newterm.tr =
							lang_module.paste_prefix_suffix(lang_module.lang, rsub(term.term, "%-$", ""),
								term.tr and rsub(term.tr, "%-$", "") or nil, newterm.term, newterm.tr, aspect)
						newterm.q = combine_qualifiers(newterm.q, term.q)
						newterm.qq = combine_qualifiers(newterm.qq, term.qq)
						newterm.brackets = newterm.brackets or term.brackets

						-- Remaining properties are copied from prefix to suffix if not already in suffix.
						for _, prop in ipairs {"gloss", "genders", "id", "pos", "lit"} do
							if newterm[prop] and term[prop] then
								parse_err(
									("For aspect=%s, can't handle property '%s' occurring along with both prefix '%s' and suffix '%s' when combining them"):
									format(aspect, prop, term.term, newterm.term))
							end
							if term[prop] then
								newterm[prop] = term[prop]
							end
						end

						table.insert(retval, newterm)
					else
						table.insert(retval, term)
					end
				end
			end
			return retval
		end
		pair.firsts = handle_aspect_terms(pair.firsts, state.first_suffixes, first_aspect)
		pair.seconds = handle_aspect_terms(pair.seconds, state.second_suffixes, second_aspect)
	end

	if #pair.firsts == 0 and #pair.seconds == 0 then
		parse_err("Need at least one perfective or imperfective term")
	end

	return pair
end

local function parse_args(lang, args)
	local lang_module = {lang = lang}
	if lang:getCode() == "ru" then
		lang_module.paste_prefix_suffix = ru_paste_prefix_suffix
	else
		lang_module.paste_prefix_suffix = default_paste_prefix_suffix
	end

	local state = {}
	local groups = {}
	local group = {}
	for i, arg in ipairs(args[1]) do
		local pair = parse_aspect_pair(arg, i, state, lang_module, args)
		if pair == false then
			if #group == 0 then
				error("No items in group terminated by single hyphen in arg #" .. i)
			end
			table.insert(groups, group)
			group = {}
		elseif pair then
			table.insert(group, pair)
		end
	end
	if #group > 0 then
		table.insert(groups, group)
	end
	return groups
end

local function format_aspect_terms(lang, args, term_groups, include_default_aspect)
	local all_formatted_items = {}
	for _, group in ipairs(term_groups) do
		local group_formatted_items = {}
		for _, items in ipairs(group) do
			local sort_key = nil
			local this_include_default_aspect = include_default_aspect
			local function handle_aspect_terms(terms, aspect)
				local term_parts = {}
				for _, term in ipairs(terms) do
					sort_key = sort_key or (lang:makeSortKey((lang:makeEntryName(term.term))))
					local preq_text = term.q and require("Module:qualifier").format_qualifier(term.q) .. " " or ""
					if not term.genders and this_include_default_aspect then
						term.genders = {aspect}
					end
					term.lang = lang
					local linked_term = m_links.full_link(term)
					if term.brackets then
						linked_term = "[" .. linked_term .. "]"
					end
					table.insert(term_parts, preq_text .. linked_term
						.. (term.qq and " " .. require("Module:qualifier").format_qualifier(term.qq) or ""))
				end
				return table.concat(term_parts, ", ")
			end
			local first_aspect, second_aspect = get_aspects(args)
			local switch_aspects
			for _, term in ipairs(items.firsts) do
				if term.genders and #term.genders == 1 and term.genders[1] == second_aspect then
					switch_aspects = true
					break
				end
			end
			if switch_aspects then
				this_include_default_aspect = true
				local temp = first_aspect
				first_aspect = second_aspect
				second_aspect = temp
			end
			local firsts = handle_aspect_terms(items.firsts, first_aspect)
			local seconds = handle_aspect_terms(items.seconds, second_aspect)
			table.insert(group_formatted_items, {
				firsts = firsts,
				seconds = seconds,
				sort_key = sort_key
			})
		end
		table.sort(group_formatted_items, function(a, b) return a.sort_key < b.sort_key end)
		for _, formatted_item in ipairs(group_formatted_items) do
			table.insert(all_formatted_items, formatted_item)
		end
	end
	return all_formatted_items
end

local function format_terms_as_list(lang, args, formatted_items)
	for i, formatted_item in ipairs(formatted_items) do
		if formatted_item.firsts == "" then
			formatted_items[i] = formatted_item.seconds
		elseif formatted_item.seconds == "" then
			formatted_items[i] = formatted_item.firsts
		else
			formatted_items[i] = formatted_item.firsts .. ", " .. formatted_item.seconds
		end
	end
	return require("Module:columns").create_list {
		header = "verbs",
		format_header = true,
		content = formatted_items,
		lang = lang,
		column_count = args.ncol,
		collapse = true,
	}
end

local function format_terms_as_table(lang, args, formatted_items)
	local lines = {}
	local first_aspect_header, second_aspect_header
	if args.impf_first then
		first_aspect_header = "imperfective"
		second_aspect_header = "perfective"
	else
		first_aspect_header = "perfective"
		second_aspect_header = "imperfective"
	end
	table.insert(lines, '{| class="wikitable vsSwitcher" data-toggle-category="derived terms"\n! ' ..
		first_aspect_header .. ' !! class="vsToggleElement" | ' .. second_aspect_header)

	for i, formatted_item in ipairs(formatted_items) do
		table.insert(lines, '|- class="vsHide"\n| ' .. formatted_item.firsts .. " || " ..
			formatted_item.seconds)
	end
	table.insert(lines, "|}")
	return table.concat(lines, "\n")
end

function export.imperfectives_and_perfectives(frame)
	local iparams = {
		["format"] = {default = "list"},
		["lang"] = {},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)

	local params = {
		["format"] = {},
		[1] = {list = true},
		["ncol"] = {default = 2, type = "number"},
		["impf_first"] = {type = "boolean"},
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local format = args.format or iargs.format
	local lang = require("Module:languages").getByCode(iargs.lang, "lang")
	if format ~= "list" and format ~= "table" then
		error(("Unrecognized format '%s'; possible values are 'list', 'table'"):format(format))
	end
	local groups = parse_args(lang, args)
	local formatted_items = format_aspect_terms(lang, args, groups, format == "list")
	return format == "list" and format_terms_as_list(lang, args, formatted_items) or
		format_terms_as_table(lang, args, formatted_items)
end

return export