Module:he-noun
Appearance
- The following documentation is located at Module:he-noun/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module generates inflection tables for Hebrew nouns, used by {{he-decl}}
, see its documentation to learn more.
local m_links = require("Module:links")
local m_strutils = require("Module:string utilities")
local com = require("Module:he-common")
local lang = require("Module:languages").getByCode("he")
local export = {}
local function process_arg(arg)
arg = com.fix_nikud(arg)
local i = mw.ustring.find(arg, "[\\/]")
if i then
arg = {mw.text.trim(mw.ustring.sub(arg, 1, i - 1)), mw.text.trim(mw.ustring.sub(arg, i + 1))}
end
return arg
end
local function getv(x)
if type(x) == "table" then
return x[2]
else
return x
end
end
local function getnv(x)
if type(x) == "table" then
return x[1]
else
return mw.ustring.gsub(x, "[^%^$()%%.%[%]*+%-?]+", function(c1) return (lang:makeEntryName(c1)) end)
end
end
local function getnvrx(x)
return mw.ustring.gsub(getnv(x), "%[%][%?%+%-]?", "")
end
local function append_parts_2(a, b)
if type(a) == "table" then
if type(b) == "table" then
return {a[1] .. b[1], a[2] .. b[2]}
else
return {a[1] .. (lang:makeEntryName(b)), a[2] .. b}
end
else
if type(b) == "table" then
return {(lang:makeEntryName(a)) .. b[1], a .. b[2]}
else
return a .. b
end
end
end
local function append_parts(a, ...)
for _, b in ipairs({...}) do
a = append_parts_2(a, b)
end
return a
end
local function equal(a, b)
if type(a) == "table" then
return type(b) == "table" and a[1] == b[1] and a[2] == b[2]
else
return a == b
end
end
local function gsub_form(form, regex, repl, const_repl)
if type(form) == "table" or type(regex) == "table" or (not const_repl and type(repl) == "table") then
local replv = repl
local replnv = repl
if not const_repl then
replv = getv(repl)
replnv = getnv(repl)
end
local retv = mw.ustring.gsub(getv(form), getv(regex), replv)
local retnv = mw.ustring.gsub(getnv(form), getnvrx(regex), replnv)
return {retnv, retv}
else
return mw.ustring.gsub(form, regex, repl)
end
end
local function match_form(form, regex)
if type(form) == "table" or type(regex) == "table" then
local retv = mw.ustring.match(getv(form), getv(regex))
local retnv = mw.ustring.match(getnv(form), getnvrx(regex))
if retv and retnv then
return {retnv, retv}
else
return nil
end
else
return mw.ustring.match(form, regex)
end
end
local final_to_unfinal = {
["ך"] = "כ",
["ם"] = "מ",
["ן"] = "נ",
["ף"] = "פ",
["ץ"] = "צ",
}
local function unfinalize(form)
if match_form(form, "[חע]ַ$") or match_form(form, "הּ[ַ]?$") then
return gsub_form(form, "([החע])[ּ]?[ַ]?$", "%1")
elseif match_form(form, "[םןףץ]$") or match_form(form, "ךְ$") then
return gsub_form(form, "([ךםןףץ])[ְ]?$", final_to_unfinal, true)
else
return form
end
end
local function make_link(x, is_construct)
local dolink = true
if SUPPRESS_LINKS then
dolink = nil
end
local maqaf = is_construct and "־" or ""
if type(x) == "table" then
local pg = (lang:makeEntryName(x[1]))
return m_links.full_link({lang = lang, allow_self_link = false, term = dolink and pg, alt = (pg ~= (lang:makeEntryName(x[2])) and pg .. maqaf .. " / " or "") .. x[2] .. maqaf, tr = "-"})
else
if x == "-" then
return "—" -- m-dash
else
return m_links.full_link({lang = lang, term = dolink and x, alt = x .. maqaf, tr = "-", allow_self_link = false})
end
end
end
local numbers = {
["s"] = "singular",
["d"] = "dual",
["p"] = "plural",
["ms"] = "masculine singular",
["md"] = "masculine dual",
["mp"] = "masculine plural",
["fs"] = "feminine singular",
["fd"] = "feminine dual",
["fp"] = "feminine plural",
}
local singulars = {
["s"] = true,
["ms"] = true,
["fs"] = true,
}
local function split_defv(form)
local ret = mw.ustring.gsub(form, "^הַ([וזטילמנסצקש][ׁׂ]?)ּ", "%1")
if ret ~= form then return ret end
ret = mw.ustring.gsub(form, "^הַ([בגדיכפת])", "%1")
if ret ~= form then return ret end
ret = mw.ustring.gsub(form, "^הָ(ר)", "%1")
if ret ~= form then return ret end
ret = mw.ustring.gsub(form, "^ה[ֶַָ]([אהחע])", "%1")
if ret ~= form then return ret end
return nil
end
local function split_def(form)
if type(form) == "table" then
local wv = split_defv(form[2])
local nv = mw.ustring.gsub(form[1], "^ה", "")
if wv and (nv ~= form[1]) then
return {nv, wv}
else
return nil
end
else
return split_defv(form)
end
end
local function attach_def(form)
if match_form(form, "^[בגדהחכפת]") then
return append_parts("הַ", form)
elseif match_form(form, "^[אער]") then
return append_parts("הָ", form)
elseif match_form(form, "^ו") then
return gsub_form(form, "^ו", {"הוו", "הַוּ"})
elseif match_form(form, "^[זטילמנסצק]") or match_form(form, "^ש[ׁׂ]") then
return gsub_form(form, "^([זטילמנסצקש][ׁׂ]?)", "הַ%1ּ")
else
error("Unrecognized initial consonant in indefinite form.")
end
end
local function remove_maqaf(x)
if type(x) == "table" then
return {mw.ustring.gsub(x[1], "־$", ""), mw.ustring.gsub(x[2], "־$", "")}
else
return mw.ustring.gsub(x, "־$", "")
end
end
local function process_args(args)
for key, val in pairs(args) do
val = mw.text.trim(val)
if val == "" then val = nil end
args[key] = val
end
local forms = {}
if not args[1] then return forms end
local i = 1
local num = "s"
while num do
local section = {["number"] = num}
local touched = false
local likely_def = nil
local initial_hes = nil
while args[i] and not numbers[args[i]] do
local form = process_arg(args[i])
local wv = getv(form)
if likely_def then
local cur_initial_hes = mw.ustring.len(mw.ustring.match(getnv(form), "^ה*"))
if cur_initial_hes < initial_hes then
section["d"] = likely_def
elseif cur_initial_hes > initial_hes and split_defv(wv) then
section["i"] = likely_def
else
error("Definite form must either be first or second.")
end
likely_def = nil
end
if (not section["d"]) and split_defv(wv) then
if touched then
section["d"] = form
else
likely_def = form
initial_hes = mw.ustring.len(mw.ustring.match(getnv(form), "^ה*"))
end
elseif not touched then
section["i"] = form
elseif (not section["2ms"]) and mw.ustring.match(wv, "ךָ$") then
section["2ms"] = form
elseif (not section["2mp"]) and mw.ustring.match(wv, "כֶם$") then
section["2mp"] = form
elseif (not section["3ms"]) and (mw.ustring.match(wv, "וֹ$") or mw.ustring.match(wv, "ו$") or mw.ustring.match(wv, "הוּ$")) then
section["3ms"] = form
elseif (not section["c"]) and (mw.ustring.match(wv, "־$") or mw.ustring.match(getnv(form), "־$")) then
section["c"] = remove_maqaf(form)
else
error("Unrecognized or duplicate form: " .. wv)
end
touched = true
i = i + 1
end
if likely_def then
if section["d"] then error("Unrecognized or duplicate form: " .. getv(section["d"])) end
section["d"] = likely_def
end
if touched then
table.insert(forms, section)
end
num = args[i]
i = i + 1
end
return forms
end
local endings_c = {
["3p_heavy"] = false,
["1s"] = "ִי",
["2ms"] = "ְךָ",
["2fs"] = "ֵךְ",
["3ms"] = "וֹ",
["3fs"] = "ָהּ",
["1p"] = "ֵנוּ",
["2mp"] = "ְכֶם",
["2fp"] = "ְכֶן",
["3mp"] = "ָם",
["3fp"] = "ָן",
}
-- most have alternative forms that are not yet supported
local endings_e = {
["3p_heavy"] = true,
["1s"] = "ִי",
["2ms"] = "ֶךָ",
["2fs"] = "ֵךְ",
["3ms"] = "ֵהוּ",
["3fs"] = "ֶהָ",
["1p"] = "ֵנוּ",
["2mp"] = "ֵכֶם",
["2fp"] = "ֵכֶן",
["3mp"] = "ֵהֶם",
["3fp"] = "ֵהֶן",
}
local endings_i = {
["3p_heavy"] = true,
["1s"] = "",
["2ms"] = "ךָ",
["2fs"] = "ךְ",
["3ms"] = "ו", -- and "הוּ", for when I allow multiple forms
["3fs"] = "הָ",
["1p"] = "נוּ",
["2mp"] = "כֶם",
["2fp"] = "כֶן",
["3mp"] = "הֶם",
["3fp"] = "הֶן",
}
-- endings for ים ending
local endings_pm = {
["3p_heavy"] = true,
["1s"] = {"יי", "ַי"},
["2ms"] = "ֶיךָ",
["2fs"] = {"ייך", "ַיִךְ"},
["3ms"] = "ָיו",
["3fs"] = "ֶיהָ",
["1p"] = "ֵינוּ",
["2mp"] = "ֵיכֶם",
["2fp"] = "ֵיכֶן",
["3mp"] = "ֵיהֶם",
["3fp"] = "ֵיהֶן",
}
-- endings for ות ending
local endings_pf = {
["3p_heavy"] = true,
["1s"] = {"יי", "ַי"},
["2ms"] = "ֶיךָ",
["2fs"] = {"ייך", "ַיִךְ"},
["3ms"] = "ָיו",
["3fs"] = "ֶיהָ",
["1p"] = "ֵינוּ",
["2mp"] = "ֵיכֶם",
["2fp"] = "ֵיכֶן",
["3mp"] = "ָם",--add extra יהם ending
["3fp"] = "ָן",--add extra יהן ending
}
local light_endings = {
"1s",
"2ms",
"2fs",
"3ms",
"3fs",
"1p",
}
local heavy_endings = {
"2mp",
"2fp",
}
local maybe_endings = {
"3mp",
"3fp",
}
local function attach_ending(stem, ending)
if match_form(ending, "^ְ") and match_form(stem, "[אהחע]$") then
ending = gsub_form(ending, "^ְ", "ֲ")
elseif match_form(ending, "^ָ") and match_form(stem, "ַח$") then
stem = gsub_form(stem, "ַח$", "ֶח")
elseif type(ending) == "table" and mw.ustring.match(getnv(stem), "י$") and mw.ustring.len(mw.ustring.match(getnv(ending), "^י*")) > mw.ustring.len(mw.ustring.match(getnv(getv(ending)), "^י*")) then
ending = gsub_form(ending, {"^י", "^"}, "")
end
return append_parts(stem, ending)
end
local function infer_forms(forms)
for _, section in pairs(forms) do
local light_stem = nil
local heavy_stem = nil
local endings = nil
if section["d"] then
section["i"] = section["i"] or split_def(section["d"])
elseif section["i"] then
section["d"] = section["d"] or attach_def(section["i"])
else
error("Must have either definite or indefinite form.")
end
if singulars[section["number"]] then
local is_fem = match_form(section["i"], "ָה$")
local is_e = match_form(section["i"], "ֶה$")
if not section["c"] then
if is_fem then
section["c"] = gsub_form(section["i"], "ָה$", "ַת")
elseif is_e then
section["c"] = gsub_form(section["i"], "ֶה$", "ֵה")
elseif match_form(section["i"], "ָ[ב-ת][ׁׂ]?$") then
section["c"] = gsub_form(section["i"], "ָ([ב-ת][ׁׂ]?)$", "ַ%1")
else
section["c"] = section["i"]
end
end
if section["3ms"] then
if match_form(section["3ms"], "וֹ$") then
light_stem = gsub_form(section["3ms"], "וֹ$", "")
endings = endings_c
elseif match_form(section["3ms"], "ֵהוּ$") then
light_stem = gsub_form(section["3ms"], "ֵהוּ$", "")
endings = endings_c
elseif match_form(section["3ms"], "ִיהוּ$") then
light_stem = gsub_form(section["3ms"], "הוּ$", "")
endings = endings_i
elseif match_form(section["3ms"], "ִיו$") then
light_stem = gsub_form(section["3ms"], "ו$", "")
endings = endings_i
else
error("Unrecognized 3ms suffix pattern.")
end
elseif match_form(section["c"], "ֵה$") then
if is_e then
light_stem = gsub_form(section["i"], "ֶה$", "")
else
light_stem = gsub_form(section["c"], "ֵה$", "")
end
endings = endings_e
elseif match_form(unfinalize(section["c"]), "ַ[א-ת][ׁׂ]?$") then
light_stem = gsub_form(unfinalize(section["c"]), "ַ([בגדהוזחטכלמנסעפצקרשת][ׁׂ]?)$", "ָ%1")
endings = endings_c
else
light_stem = unfinalize(section["c"])
if match_form(section["c"], "ִי$") then
endings = endings_i
else
endings = endings_c
end
end
if section["2mp"] then
if endings == endings_c then
if not match_form(section["2mp"], "[^אהחע]ְכֶם$") and not match_form(section["2mp"], "[אהחע]ֲכֶם$") then
error("Unrecognized 2mp suffix pattern.")
end
heavy_stem = gsub_form(section["2mp"], "[ְֲ]כֶם$", "")
else
if not match_form(section["2mp"], "כֶם$") then
error("Unrecognized 2mp suffix pattern.")
end
heavy_stem = gsub_form(section["2mp"], "כֶם$", "")
end
elseif section["2ms"] and endings == endings_c and match_form(section["2ms"], "ְךָ$") then
heavy_stem = gsub_form(section["2ms"], "ְךָ$", "")
elseif match_form(light_stem, "ּ$") or match_form(light_stem, "ְ[א-ת][ּׁׂ]?$") then
heavy_stem = light_stem
elseif match_form(section["c"], "ֵה$") then
if endings == endings_e then
heavy_stem = gsub_form(section["c"], "ה$", "")
else
heavy_stem = light_stem
end
else
heavy_stem = unfinalize(section["c"])
end
else
endings = endings_pm
if not section["c"] then
if match_form(section["i"], "וֹת$") then
endings = endings_pf
section["c"] = section["i"]
elseif match_form(section["i"], "ִים$") then
section["c"] = gsub_form(section["i"], "ִים$", "ֵי")
elseif match_form(section["i"], {"יי?ם$", "ַיִם$"}) then
section["c"] = gsub_form(section["i"], {"יי?ם$", "ַיִם$"}, "ֵי")
else
error("Unrecognized plural pattern.")
end
end
if match_form(section["c"], "ֵי$") then
heavy_stem = gsub_form(section["c"], "ֵי$", "")
if match_form(section["i"], "ִים$") then
light_stem = gsub_form(section["i"], "ִים$", "")
elseif match_form(section["i"], {"יי?ם$", "ַיִם$"}) then
light_stem = gsub_form(section["i"], {"יי?ם$", "ַיִם$"}, "")
else
light_stem = heavy_stem
end
elseif match_form(section["c"], "וֹת$") then
endings = endings_pf
heavy_stem = section["c"]
light_stem = heavy_stem
else
error("Unrecognized plural construct pattern.")
end
end
local maybe_stem = endings["3p_heavy"] and heavy_stem or light_stem
for _, p in pairs(light_endings) do
section[p] = section[p] or attach_ending(light_stem, endings[p])
end
for _, p in pairs(heavy_endings) do
section[p] = section[p] or attach_ending(heavy_stem, endings[p])
end
for _, p in pairs(maybe_endings) do
section[p] = section[p] or attach_ending(maybe_stem, endings[p])
end
end
end
local example_s = {
["number"] = "s",
["i"] = "בַּיִת",
["d"] = "הַבַּיִת",
["c"] = "בֵּית",
["1s"] = "בֵּיתִי",
["2ms"] = "בֵּיתְךָ",
["2fs"] = "בֵּיתֵךְ",
["3ms"] = "בֵּיתוֹ",
["3fs"] = "בֵּיתָהּ",
["1p"] = "בֵּיתֵנוּ",
["2mp"] = "בֵּיתְכֶם",
["2fp"] = "בֵּיתְכֶן",
["3mp"] = "בֵּיתָם",
["3fp"] = "בֵּיתָן",
}
local example_p = {
["number"] = "p",
["i"] = "בָּתִּים",
["d"] = "הַבָּתִּים",
["c"] = "בָּתֵּי",
["1s"] = {"בתיי", "בָּתַּי"},
["2ms"] = "בָּתֶּיךָ",
["2fs"] = {"בתייך", "בָּתַּיִךְ"},
["3ms"] = "בָּתָּיו",
["3fs"] = "בָּתֶּיהָ",
["1p"] = "בָּתֵּינוּ",
["2mp"] = "בָּתֵּיכֶם",
["2fp"] = "בָּתֵּיכֶן",
["3mp"] = "בָּתֵּיהֶם",
["3fp"] = "בָּתֵּיהֶן",
}
local example = {example_s, example_p}
local table_top = [===[<div><div class="NavFrame" style="display:inline-block;min-width:30em">
<div class="NavHead" align="left" style="min-width:30em">{title}</div>
<div class="NavContent" align="center" style="min-width:30em">
{\op}| class="wikitable inflection-table" style="text-align:center;margin:0"
! rowspan="3" | Number !! colspan="2" | Isolated forms !! colspan="5" | With possessive pronouns
|-
! rowspan="2" | State !! rowspan="2" | Form !! rowspan="2" | Person !! colspan="2" | singular !! colspan="2" | plural
|-
! m. !! f. !! m. !! f.
]===]
local table_section = [===[|-
! rowspan="3" | {number} !! indefinite
| {i}
! first
| colspan="2" | {1s} || colspan="2" | {1p}
|-
! definite
| {d}
! second
| {2ms} || {2fs} || {2mp} || {2fp}
|-
! construct
| {c}
! third
| {3ms} || {3fs} || {3mp} || {3fp}
]===]
local table_bottom = [===[|{\cl}
</div></div></div>]===]
local forms_names = {
"i",
"d",
"c",
"1s",
"2ms",
"2fs",
"3ms",
"3fs",
"1p",
"2mp",
"2fp",
"3mp",
"3fp",
}
local function make_table(forms)
local title = forms and forms[1] and forms[1]["i"] and ("Declension of " .. make_link(forms[1]["i"])) or "Declension"
local output = {}
table.insert(output, m_strutils.format(table_top, {["title"] = title}))
for _, section in ipairs(forms) do
section["number"] = numbers[section["number"]]
for _, form_name in pairs(forms_names) do
section[form_name] = make_link(section[form_name] or "-", section[form_name] and form_name == "c")
end
table.insert(output, m_strutils.format(table_section, section))
end
table.insert(output, m_strutils.format(table_bottom, {}))
return table.concat(output)
end
function export.show(frame)
local args = frame:getParent().args
PAGENAME = mw.title.getCurrentTitle().text
NAMESPACE = mw.title.getCurrentTitle().nsText
if args["nolink"] and args["nolink"] ~= "" then
SUPPRESS_LINKS = true
end
local forms = nil
if frame.args[1] == "example" then
forms = example
else
forms = process_args(args)
infer_forms(forms)
end
return make_table(forms)
end
return export