Module:is-common
Jump to navigation
Jump to search
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local lang = require("Module:languages").getByCode("is")
local m_links = require("Module:links")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local uupper = mw.ustring.upper
-- Capitalize the first letter.
local function ucap(str)
local first, rest = rmatch(str, "^(.)(.*)$")
if first then
return uupper(first) .. rest
else
return str
end
end
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local AU_SUB = u(0xFFF0) -- temporary substitution for 'au'
local CAP_AU_SUB = u(0xFFF1) -- temporary substitution for 'Au'
local ALL_CAP_AU_SUB = u(0xFFF1) -- temporary substitution for 'AU'
local lc_vowel = "aeiouyáéíóúýöæ"
local uc_vowel = uupper(lc_vowel)
export.vowel = lc_vowel .. uc_vowel .. AU_SUB .. CAP_AU_SUB .. ALL_CAP_AU_SUB
export.vowel_c = "[" .. export.vowel .. "]"
export.vowel_or_hyphen = export.vowel .. "%-"
export.vowel_or_hyphen_c = "[" .. export.vowel_or_hyphen .. "]"
export.non_vowel_c = "[^" .. export.vowel .. "]"
export.cons_c = "[^" .. export.vowel .. "]"
local lc_i_mutation = {
["a"] = "e", -- [[dagur]] "dat" -> dat sg [[degi]]; [[faðir]] "father" -> nom pl [[feður]]; [[maður]] "man" -> nom
-- pl [[menn]]; [[taka]] "to take" -> 1sg pres ind [[tek]]; [[langur]] "long" -> [[lengd]] "length"
["á"] = "æ", -- [[háttur]] "way, manner" -> nom pl [[hættir]]; [[hár]] "high" -> comp [[hærri]]
["e"] = "i", -- this may mostly occur in reverse
["o"] = "e", -- [[hnot]] "nut; small ball of yarn" -> nom pl [[hnetur]]; [[koma]] "to come" -> 1sg pres ind [[kem]]
-- ["o"] = "y", -- [[sonur]] "son" -> nom pl [[synir]]; needs explicit vowel
["ö"] = "e", -- [[mölur]] "clothes moth" -> nom pl [[melir]]; [[köttur]] "cat" -> nom pl [[kettir]]; [[slökkva]]
-- "to extinguish" -> 1sg pres ind [[slekk]]; [[dökkur]] "dark" -> comp [[dekkri]]
["ó"] = "æ", -- [[bók]] "book" -> nom pl [[bækur]]; [[stór]] "big" -> comp [[stærri]]; [[dómur]] "judgement" ->
-- [[dæmdur]] "judged"
["u"] = "y", -- [[fullur]] "full" -> comp [[fyllri]]; [[þungur]] "heavy/weighty" -> [[þyngd]] "weight"
["ú"] = "ý", -- [[mús]] "mouse" -> nom pl [[mýs]]; [[brú]] "bridge" -> nom pl [[brýr]]; [[búa]] "to reside" ->
-- 1sg pres ind [[bý]]; [[hús]] "house" -> [[hýsa]] "to house"
["ja"] = "i", -- un-u-mutated version of jö
["jö"] = "i", -- [[fjörður]] "fjord" -> dat sg [[firði]], nom pl [[firðir]]
-- ["jö"] = "é", -- [[stjölur]] "?" -> dat sg [[stéli]], nom pl [[stélir]]; needs explicit vowel
["jó"] = "ý", -- [[bjóða]] "to offer" -> 1sg pres ind [[býð]]; [[ljós]] "light" -> [[lýsa]] "to illuminate"
["ju"] = "y", -- [[við]] [[bjuggum]] "we lived" -> subjunctive [[við]] [[byggjum]]
["jú"] = "ý", -- [[ljúga]] "to lie" -> 1sg pres ind [[lýg]]
["au"] = "ey", -- [[ausa]] "to dip, to scoop" -> 1sg pres ind [[eys]]; [[aumur]] "wretched" -> [[eymd]]
-- "wretchedness"
}
local i_mutation = {}
for k, v in pairs(lc_i_mutation) do
i_mutation[k] = v
i_mutation[ucap(k)] = ucap(v)
end
local lc_reverse_i_mutation = {
["æ"] = "á", -- [[hættur]] nom pl "bedtime, quitting time" dat pl [[háttum]]; [[ær]] "ewe" acc/dat sg [[á]]
["e"] = "a", -- [[ketill]] "kettle" dat sg [[katli]]; [[Egill]] (male given name) dat sg [[Agli]];
-- [[telja]] "to count" past ind [[taldi]]
["i"] = "e", -- [[sitja]] "to sit" past part [[setinn]]
["ý"] = "ú", -- [[kýr]] "cow" acc/dat sg [[kú]]
["y"] = "u", -- FIXME: examples?
["ey"] = "au", -- FIXME: examples?
}
local reverse_i_mutation = {}
for k, v in pairs(lc_reverse_i_mutation) do
reverse_i_mutation[k] = v
reverse_i_mutation[ucap(k)] = ucap(v)
end
-- Apply i-mutation to the last vowel of `stem`. If `newv` is given, use that vowel (for cases like [[sonur]] "son"
-- nom pl [[synir]] but [[hnot]] "nut; small ball of yarn" nom pl [[hnetur]]); otherwise use the appropriate default
-- vowel.
function export.apply_i_mutation(stem, newv)
local modstem, subbed
local function subfunc(origv, post)
return (newv or i_mutation[origv]) .. post
end
modstem, subbed = rsubb(stem, "([Aa]u)(" .. export.cons_c .. "*)$", subfunc)
if subbed then
return modstem
end
modstem, subbed = rsubb(stem, "([Jj][aöóúu])(" .. export.cons_c .. "*)$", subfunc)
if subbed then
return modstem
end
modstem, subbed = rsubb(stem, "([aáeoöóúuAÁEOÖÓÚU])(" .. export.cons_c .. "*)$", subfunc)
if subbed then
return modstem
end
error(("Stem '%s' does not contain an i-mutable vowel as its last vowel"):format(stem))
end
-- Apply reverse i-mutation to the last vowel of `stem`. If `newv` is given, use that vowel; otherwise use the
-- appropriate default vowel.
function export.apply_reverse_i_mutation(stem, newv)
local modstem, subbed
local function subfunc(origv, post)
return (newv or reverse_i_mutation[origv]) .. post
end
modstem, subbed = rsubb(stem, "([Ee]y)(" .. export.cons_c .. "*)$", subfunc)
if subbed then
return modstem
end
modstem, subbed = rsubb(stem, "([æeiýyÆEIÝY])(" .. export.cons_c .. "*)$", subfunc)
if subbed then
return modstem
end
error(("Stem '%s' does not contain a reversible i-mutated vowel as its last vowel"):format(stem))
end
local lesser_u_mutation = {
["a"] = "ö",
["A"] = "Ö",
}
local lesser_reverse_u_mutation = {
["ö"] = "a",
["Ö"] = "A",
}
local greater_u_mutation = {
["a"] = "u",
["A"] = "U", -- FIXME, may not occur
}
local greater_reverse_u_mutation = {
["u"] = "a",
["U"] = "A", -- FIXME, may not occur
}
local function apply_au_sub(stem)
-- au doesn't mutate; easiest way to handle this is to temporarily convert au and variants to single characters
stem = stem:gsub("au", AU_SUB)
stem = stem:gsub("Au", CAP_AU_SUB)
stem = stem:gsub("AU", ALL_CAP_AU_SUB)
return stem
end
local function undo_au_sub(stem)
stem = stem:gsub(AU_SUB, "au")
stem = stem:gsub(CAP_AU_SUB, "Au")
stem = stem:gsub(ALL_CAP_AU_SUB, "AU")
return stem
end
-- Apply u-mutation to `stem`. `typ` is the type of u-mutation:
-- * "umut" (mutate the last vowel if possible, with a -> ö);
-- * "Umut" (mutate the last vowel if possible, with a -> u);
-- * "uumut" (mutate the last two vowels if possible, with a -> ö in the second-to-last and a -> ö in the last);
-- * "uUmut" (mutate the last two vowels if possible, with a -> ö in the second-to-last and a -> u in the last);
-- * "u_mut" (mutate the second-to-last vowel if possible, with a -> ö, leaving alone the last vowel).
function export.apply_u_mutation(stem, typ, error_if_unmatchable)
local origstem = stem
stem = apply_au_sub(stem)
if typ == "uUmut" or typ == "uumut" or typ == "u_mut" then
local first, v1, middle, v2, last = rmatch(stem, "^(.*)(" .. export.vowel_c .. ")(" .. export.cons_c .. "*)(" ..
export.vowel_c .. ")(" .. export.cons_c .. "*)$")
if not first then
if error_if_unmatchable then
error(("Can't apply u-mutation of type '%s' because stem '%s' doesn't have two syllables"):
format(typ, origstem))
end
return undo_au_sub(stem)
end
v1 = lesser_u_mutation[v1] or v1
v2 = typ == "u_mut" and v2 or (typ == "uUmut" and greater_u_mutation or lesser_u_mutation)[v2] or v2
local retval = undo_au_sub(first .. v1 .. middle .. v2 .. last)
if retval == origstem and error_if_unmatchable then
error(("Can't apply u-mutation of type '%s' to stem '%s'; result would be the same as the original"):
format(typ, origstem))
end
return retval
end
if typ ~= "umut" and typ ~= "Umut" then
error(("Internal error: For stem '%s', saw unrecognized u-mutation type '%s'"):format(origstem, typ))
end
local first, v, last = rmatch(stem, "^(.*)(" .. export.vowel_c .. ")(" .. export.cons_c .. "*)$")
if not first then
if error_if_unmatchable then
error(("Can't apply u-mutation of type '%s' because stem '%s' doesn't have a vowel"):format(typ, origstem))
end
return undo_au_sub(stem)
end
v = (typ == "Umut" and greater_u_mutation or lesser_u_mutation)[v] or v
local retval = undo_au_sub(first .. v .. last)
if retval == origstem and error_if_unmatchable then
error(("Can't apply u-mutation of type '%s' to stem '%s'; result would be the same as the original"):
format(typ, origstem))
end
return retval
end
-- Apply reverse u-mutation to `stem`. `typ` is the type of u-mutation:
-- * "unumut" (unmutate the last vowel if possible, with ö -> a);
-- * "unUmut" (unmutate the last vowel if possible, with u -> a);
-- * "unuumut" (unmutate the last two vowels if possible, with ö -> a in the second-to-last and ö -> a in the last);
-- * "unuUmut" (unmutate the last two vowels if possible, with ö -> a in the second-to-last and u -> a in the last);
-- * "unu_mut" (unmutate the second-to-last vowel if possible, with ö -> a, leaving alone the last vowel).
function export.apply_reverse_u_mutation(stem, typ, error_if_unmatchable)
local origstem = stem
stem = apply_au_sub(stem)
if typ == "unuumut" or typ == "unuUmut" or typ == "unu_mut" then
local first, v1, middle, v2, last = rmatch(stem, "^(.*)(" .. export.vowel_c .. ")(" .. export.cons_c .. "*)(" ..
export.vowel_c .. ")(" .. export.cons_c .. "*)$")
if not first then
if error_if_unmatchable then
error(("Can't apply reverse u-mutation of type '%s' because stem '%s' doesn't have two syllables"):
format(typ, origstem))
end
return undo_au_sub(stem)
end
v1 = lesser_reverse_u_mutation[v1] or v1
v2 = typ == "unu_mut" and v2 or (typ == "unuUmut" and greater_reverse_u_mutation or lesser_reverse_u_mutation)[v2] or v2
local retval = undo_au_sub(first .. v1 .. middle .. v2 .. last)
if retval == origstem and error_if_unmatchable then
error(("Can't apply reverse u-mutation of type '%s' to stem '%s'; result would be the same as the original"):
format(typ, origstem))
end
return retval
end
if typ ~= "unumut" and typ ~= "unUmut" then
error(("Internal error: For stem '%s', saw unrecognized reverse u-mutation type '%s'"):format(origstem, typ))
end
local first, v, last = rmatch(stem, "^(.*)(" .. export.vowel_c .. ")(" .. export.cons_c .. "*)$")
if not first then
if error_if_unmatchable then
error(("Can't apply reverse u-mutation of type '%s' because stem '%s' doesn't have a vowel"):
format(typ, origstem))
end
return undo_au_sub(stem)
end
v = (typ == "unUmut" and greater_reverse_u_mutation or lesser_reverse_u_mutation)[v] or v
local retval = undo_au_sub(first .. v .. last)
if retval == origstem and error_if_unmatchable then
error(("Can't apply reverse u-mutation of type '%s' to stem '%s'; result would be the same as the original"):
format(typ, origstem))
end
return retval
end
-- Apply contraction to `stem`. Throw an error if the stem can't be contracted.
function export.apply_contraction(stem)
-- Contraction only applies when the last vowel is a/i/u and followed by a single consonant. There are restrictions
-- on what the consonant can be but I'm not sure exactly what they are; r/l/n/ð are all possible (cf. [[hamar]],
-- [[megin]], [[höfuð]], [[þumall]], where in the last case the final -l is the nominative singular ending).
local butlast, v, last = rmatch(stem, "^(.*" .. export.cons_c .. ")([aiu])(" .. export.cons_c .. ")$")
if not butlast then
error(("Contraction cannot be applied to stem '%s' because it doesn't end in a/i/u preceded by a consonant and followed by a single consonant"
):format(stem))
end
return butlast .. last
end
return export