Module:la-utilities
Appearance
- The following documentation is located at Module:la-utilities/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Module for working with Latin text.
Functions:
strip_macrons(text)
: Return text minus macrons, breves, etc.make_stem2(stem)
: Return third-declension stem based on nominative singular.
See also:
- Module:la-utilities/testcases
- Module:la-nominal
- Module:la-noun/data
- Module:la-adj/data
- Module:la-verb
- Module:la-headword
local export = {}
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local form_is_empty -- defined below
local forms_equal -- defined below
local ipairs = ipairs
local join -- defined below
local match = string.match
local normalize_form -- defined below
local pairs = pairs
local remove = table.remove
local require = require
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local type = type
local u = mw.ustring.char
local umatch = mw.ustring.match
local MACRON = u(0x304)
local VOWEL = "[aæeioœuyAÆEIOŒUY]"
local function contains(...)
contains = require(table_module).contains
return contains(...)
end
local function decode_entities(...)
decode_entities = require(string_utilities_module).decode_entities
return decode_entities(...)
end
local function deep_equals(...)
deep_equals = require(table_module).deepEquals
return deep_equals(...)
end
local function insert_if_not(...)
insert_if_not = require(table_module).insertIfNot
return insert_if_not(...)
end
local function table_len(...)
table_len = require(table_module).length
return table_len(...)
end
local function trim(...)
trim = require(string_utilities_module).trim
return trim(...)
end
local function ugsub(...)
ugsub = require(string_utilities_module).gsub
return ugsub(...)
end
export.cases = {
["nom"] = "nominative",
["gen"] = "genitive",
["dat"] = "dative",
["acc"] = "accusative",
["abl"] = "ablative",
["voc"] = "vocative",
["loc"] = "locative",
}
local cons_to_vowel = {
["j"] = "i", ["J"] = "I",
["v"] = "u", ["V"] = "U",
}
local function link_if_unlinked(text)
return text:match("%[%[.-]]") and text or ugsub(text, "^(%s*)(.-)(%s*)$", "%1[[%2]]%3")
end
function export.join(a, b)
a, b = toNFD(a), toNFD(b)
-- If the first part ends in "j" or "v", convert it to "i" or "u" unless
-- the second part begins with a vowel.
if not umatch(b, "^" .. VOWEL) then
a = a:gsub("[jvJV]$", cons_to_vowel)
end
-- If there is a space between the two forms, link both parts separately.
if umatch(a, "%s$") or umatch(b, "^%s") then
a, b = link_if_unlinked(a), link_if_unlinked(b)
end
return toNFC(a .. b)
end
join = export.join
function export.normalize_form(form)
if not form or form == "" then
return nil
elseif type(form) == "string" then
return form
end
local i = 1
while true do
local formval = form[i]
if formval == nil then
break
elseif normalize_form(formval) == nil then
remove(form, i)
else
i = i + 1
end
end
local form_len = table_len(form)
if form_len == 0 then
return nil
elseif form_len == 1 then
return normalize_form(form[1])
end
return form
end
normalize_form = export.normalize_form
function export.form_is_empty(form)
form = normalize_form(form)
if not form or form == "" then
return true
elseif type(form) ~= "table" then
form = decode_entities(form)
return form == "-" or form == "—"
end
for _, formval in ipairs(form) do
if not form_is_empty(formval) then
return false
end
end
end
form_is_empty = export.form_is_empty
-- For a given form, we allow either strings (a single form) or lists of forms,
-- and treat strings equivalent to one-element lists.
function export.forms_equal(form1, form2)
return deep_equals(normalize_form(form1), normalize_form(form2))
end
forms_equal = export.forms_equal
local options = {comparison = forms_equal}
function export.form_contains(forms, form)
if not forms then
return false
elseif type(forms) ~= "table" then
return forms == form
end
return contains(forms, form, options) and true or false
end
-- Add a value to a given form key, e.g. "1s_pres_actv_indc". If the value is
-- already present in the key, it won't be added again.
--
-- The value is formed by concatenating `stem` and `suf`. `suf` can be a list,
-- in which case `stem` will be concatenated in turn to each value in the list
-- and all the resulting forms added to the key.
--
-- `pos` is the position to insert the form(s) at; default is at the end. To
-- insert at the beginning, specify 1 for `pos`.
do
local function _add_form(forms, key, stem, suf, pos)
local curr_form, new_form = forms[key], join(stem, suf)
if not curr_form then
forms[key] = new_form
return
elseif curr_form == new_form then
return pos
elseif type(curr_form) ~= "table" then
curr_form = {curr_form}
forms[key] = curr_form
end
options.pos = pos
local success = insert_if_not(curr_form, new_form, options)
return pos ~= nil and success and pos + 1 or pos
end
local function add_stem(forms, key, stem, suf, pos)
if suf == nil then
return
elseif type(suf) ~= "table" then
return _add_form(forms, key, stem, suf, pos)
end
for _, s in ipairs(suf) do
pos = _add_form(forms, key, stem, s, pos)
end
return pos
end
function export.add_form(forms, key, stem, suf, pos)
-- Bound `pos` between 1 and the current number of forms + 1.
if pos then
local form = forms[key]
if not form then
pos = nil
elseif pos <= 1 then
pos = 1
elseif not (type(form) == "table" and pos <= table_len(form)) then
pos = nil
end
end
if type(stem) ~= "table" then
add_stem(forms, key, stem, suf, pos)
return
end
for _, s in ipairs(stem) do
pos = add_stem(forms, key, s, suf, pos)
end
end
end
do
local check_keytypes
local function check_exceptions(slot, forms, keytypes, exceptions)
for _, keytype in ipairs(exceptions) do
if match(slot, keytype) then
return true
end
end
if keytypes then
check_keytypes(slot, forms, keytypes)
end
end
function check_keytypes(slot, forms, keytypes, exceptions)
for _, keytype in ipairs(keytypes) do
if match(slot, keytype) and not (exceptions and check_exceptions(slot, forms, nil, exceptions)) then
forms[slot] = nil
return
end
end
end
-- Remove all forms with a key matching any of the keys in the list
-- `keytypes`, unless they match any keytypes listed in `exceptions`.
function export.remove_forms(forms, keytypes, exceptions)
-- Check the shorter list first.
local func = (exceptions == nil or #exceptions >= #keytypes) and check_keytypes or check_exceptions
for slot in pairs(forms) do
func(slot, forms, keytypes, exceptions)
end
end
end
local patterns = {
{"[mM]a", "%0t"},
{"e", ""},
{"([aoAO])([lr])", "%1" .. MACRON .. "%2"},
{"[eE]l", "%0l"},
{"([mM])en", "%1in"},
{"([tT]ūd)ō", "%1in"},
{"([gG])ō", "%1in"},
{"[ōŌ]", "%1n"},
{"er", "r"},
{"[ēi]s", ""},
{"([āēīōūȳĀĒĪŌŪȲ]n)s", function(v)
return toNFD(v):gsub(MACRON, "") .. "t"
end},
{"([cC])eps", "%1ipit"},
{"([bp])s", "%1"},
{"us", "or"},
{"s", "t"},
{"ex", "ic"},
{"x", "c"},
}
function export.make_stem2(stem)
local n
for _, pattern in ipairs(patterns) do
local key = pattern[1]
stem, n = ugsub(stem, key .. "$", pattern[2])
if n > 0 then
require("Module:debug").track("la-utilities/" .. key)
return toNFC(stem)
end
end
require("Module:debug").track("la-utilities")
return stem
end
return export