Jump to content

Module:la-utilities

From Wiktionary, the free dictionary

Module for working with Latin text.

Functions:

  • strip_macrons(text): Return text minus macrons, breves, etc.
  • make_stem2(stem): Return third-declension stem based on nominative singular.

See also:


local export = {}

local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"

local form_is_empty -- defined below
local forms_equal -- defined below
local ipairs = ipairs
local join -- defined below
local match = string.match
local normalize_form -- defined below
local pairs = pairs
local remove = table.remove
local require = require
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local type = type
local u = mw.ustring.char
local umatch = mw.ustring.match

local MACRON = u(0x304)
local VOWEL = "[aæeioœuyAÆEIOŒUY]"

local function contains(...)
	contains = require(table_module).contains
	return contains(...)
end

local function decode_entities(...)
	decode_entities = require(string_utilities_module).decode_entities
	return decode_entities(...)
end

local function deep_equals(...)
	deep_equals = require(table_module).deepEquals
	return deep_equals(...)
end

local function insert_if_not(...)
	insert_if_not = require(table_module).insertIfNot
	return insert_if_not(...)
end

local function table_len(...)
	table_len = require(table_module).length
	return table_len(...)
end

local function trim(...)
	trim = require(string_utilities_module).trim
	return trim(...)
end

local function ugsub(...)
	ugsub = require(string_utilities_module).gsub
	return ugsub(...)
end

export.cases = {
	["nom"] = "nominative",
	["gen"] = "genitive",
	["dat"] = "dative",
	["acc"] = "accusative",
	["abl"] = "ablative",
	["voc"] = "vocative",
	["loc"] = "locative",
}

local cons_to_vowel = {
	["j"] = "i", ["J"] = "I",
	["v"] = "u", ["V"] = "U",
}

local function link_if_unlinked(text)
	return text:match("%[%[.-]]") and text or ugsub(text, "^(%s*)(.-)(%s*)$", "%1[[%2]]%3")
end

function export.join(a, b)
	a, b = toNFD(a), toNFD(b)
	-- If the first part ends in "j" or "v", convert it to "i" or "u" unless
	-- the second part begins with a vowel.
	if not umatch(b, "^" .. VOWEL) then
		a = a:gsub("[jvJV]$", cons_to_vowel)
	end
	-- If there is a space between the two forms, link both parts separately.
	if umatch(a, "%s$") or umatch(b, "^%s") then
		a, b = link_if_unlinked(a), link_if_unlinked(b)
	end
	return toNFC(a .. b)
end
join = export.join

function export.normalize_form(form)
	if not form or form == "" then
		return nil
	elseif type(form) == "string" then
		return form
	end
	local i = 1
	while true do
		local formval = form[i]
		if formval == nil then
			break
		elseif normalize_form(formval) == nil then
			remove(form, i)
		else
			i = i + 1
		end
	end
	local form_len = table_len(form)
	if form_len == 0 then
		return nil
	elseif form_len == 1 then
		return normalize_form(form[1])
	end
	return form
end
normalize_form = export.normalize_form

function export.form_is_empty(form)
	form = normalize_form(form)
	if not form or form == "" then
		return true
	elseif type(form) ~= "table" then
		form = decode_entities(form)
		return form == "-" or form == "—"
	end
	for _, formval in ipairs(form) do
		if not form_is_empty(formval) then
			return false
		end
	end
end
form_is_empty = export.form_is_empty

-- For a given form, we allow either strings (a single form) or lists of forms,
-- and treat strings equivalent to one-element lists.
function export.forms_equal(form1, form2)
	return deep_equals(normalize_form(form1), normalize_form(form2))
end
forms_equal = export.forms_equal

local options = {comparison = forms_equal}

function export.form_contains(forms, form)
	if not forms then
		return false
	elseif type(forms) ~= "table" then
		return forms == form
	end
	return contains(forms, form, options) and true or false
end

-- Add a value to a given form key, e.g. "1s_pres_actv_indc". If the value is
-- already present in the key, it won't be added again.
--
-- The value is formed by concatenating `stem` and `suf`. `suf` can be a list,
-- in which case `stem` will be concatenated in turn to each value in the list
-- and all the resulting forms added to the key.
--
-- `pos` is the position to insert the form(s) at; default is at the end. To
-- insert at the beginning, specify 1 for `pos`.
do
	local function _add_form(forms, key, stem, suf, pos)
		local curr_form, new_form = forms[key], join(stem, suf)
		if not curr_form then
			forms[key] = new_form
			return
		elseif curr_form == new_form then
			return pos
		elseif type(curr_form) ~= "table" then
			curr_form = {curr_form}
			forms[key] = curr_form
		end
		options.pos = pos
		local success = insert_if_not(curr_form, new_form, options)
		return pos ~= nil and success and pos + 1 or pos
	end
	
	local function add_stem(forms, key, stem, suf, pos)
		if suf == nil then
			return
		elseif type(suf) ~= "table" then
			return _add_form(forms, key, stem, suf, pos)
		end
		for _, s in ipairs(suf) do
			pos = _add_form(forms, key, stem, s, pos)
		end
		return pos
	end
	
	function export.add_form(forms, key, stem, suf, pos)
		-- Bound `pos` between 1 and the current number of forms + 1.
		if pos then
			local form = forms[key]
			if not form then
				pos = nil
			elseif pos <= 1 then
				pos = 1
			elseif not (type(form) == "table" and pos <= table_len(form)) then
				pos = nil
			end
		end
		if type(stem) ~= "table" then
			add_stem(forms, key, stem, suf, pos)
			return
		end
		for _, s in ipairs(stem) do
			pos = add_stem(forms, key, s, suf, pos)
		end
	end
end

do
	local check_keytypes

	local function check_exceptions(slot, forms, keytypes, exceptions)
		for _, keytype in ipairs(exceptions) do
			if match(slot, keytype) then
				return true
			end
		end
		if keytypes then
			check_keytypes(slot, forms, keytypes)
		end
	end

	function check_keytypes(slot, forms, keytypes, exceptions)
		for _, keytype in ipairs(keytypes) do
			if match(slot, keytype) and not (exceptions and check_exceptions(slot, forms, nil, exceptions)) then
				forms[slot] = nil
				return
			end
		end
	end

	-- Remove all forms with a key matching any of the keys in the list
	-- `keytypes`, unless they match any keytypes listed in `exceptions`.
	function export.remove_forms(forms, keytypes, exceptions)
		-- Check the shorter list first.
		local func = (exceptions == nil or #exceptions >= #keytypes) and check_keytypes or check_exceptions
		for slot in pairs(forms) do
			func(slot, forms, keytypes, exceptions)
		end
	end
end

local patterns = {
	{"[mM]a", "%0t"},
	{"e", ""},
	{"([aoAO])([lr])", "%1" .. MACRON .. "%2"},
	{"[eE]l", "%0l"},
	{"([mM])en", "%1in"},
	{"([tT]ūd)ō", "%1in"},
	{"([gG])ō", "%1in"},
	{"[ōŌ]", "%1n"},
	{"er", "r"},
	{"[ēi]s", ""},
	{"([āēīōūȳĀĒĪŌŪȲ]n)s", function(v)
		return toNFD(v):gsub(MACRON, "") .. "t"
	end},
	{"([cC])eps", "%1ipit"},
	{"([bp])s", "%1"},
	{"us", "or"},
	{"s", "t"},
	{"ex", "ic"},
	{"x", "c"},
}

function export.make_stem2(stem)
	local n
	for _, pattern in ipairs(patterns) do
		local key = pattern[1]
		stem, n = ugsub(stem, key .. "$", pattern[2])
		if n > 0 then
			require("Module:debug").track("la-utilities/" .. key)
			return toNFC(stem)
		end
	end
	require("Module:debug").track("la-utilities")
	return stem
end

return export