Jump to content

Module:he-noun

From Wiktionary, the free dictionary

This module generates inflection tables for Hebrew nouns, used by {{he-decl}}, see its documentation to learn more.


local m_links = require("Module:links")
local m_strutils = require("Module:string utilities")
local com = require("Module:he-common")

local lang = require("Module:languages").getByCode("he")

local export = {}

local function process_arg(arg)
    arg = com.fix_nikud(arg)
    local i = mw.ustring.find(arg, "[\\/]")
    if i then
        arg = {mw.text.trim(mw.ustring.sub(arg, 1, i - 1)), mw.text.trim(mw.ustring.sub(arg, i + 1))}
    end
    return arg
end

local function getv(x)
    if type(x) == "table" then
        return x[2]
    else
        return x
    end
end

local function getnv(x)
    if type(x) == "table" then
        return x[1]
    else
        return mw.ustring.gsub(x, "[^%^$()%%.%[%]*+%-?]+", function(c1) return (lang:makeEntryName(c1)) end)
    end
end

local function getnvrx(x)
    return mw.ustring.gsub(getnv(x), "%[%][%?%+%-]?", "")
end

local function append_parts_2(a, b)
    if type(a) == "table" then
        if type(b) == "table" then
            return {a[1] .. b[1], a[2] .. b[2]}
        else
            return {a[1] .. (lang:makeEntryName(b)), a[2] .. b}
        end
    else
        if type(b) == "table" then
            return {(lang:makeEntryName(a)) .. b[1], a .. b[2]}
        else
            return a .. b
        end
    end
end

local function append_parts(a, ...)
    for _, b in ipairs({...}) do
        a = append_parts_2(a, b)
    end
    return a
end

local function equal(a, b)
    if type(a) == "table" then
        return type(b) == "table" and a[1] == b[1] and a[2] == b[2]
    else
        return a == b
    end
end

local function gsub_form(form, regex, repl, const_repl)
    if type(form) == "table" or type(regex) == "table" or (not const_repl and type(repl) == "table") then
        local replv = repl
        local replnv = repl
        if not const_repl then
            replv = getv(repl)
            replnv = getnv(repl)
        end
        local retv = mw.ustring.gsub(getv(form), getv(regex), replv)
        local retnv = mw.ustring.gsub(getnv(form), getnvrx(regex), replnv)
        return {retnv, retv}
    else
        return mw.ustring.gsub(form, regex, repl)
    end
end

local function match_form(form, regex)
    if type(form) == "table" or type(regex) == "table" then
        local retv = mw.ustring.match(getv(form), getv(regex))
        local retnv = mw.ustring.match(getnv(form), getnvrx(regex))
        if retv and retnv then
            return {retnv, retv}
        else
            return nil
        end
    else
        return mw.ustring.match(form, regex)
    end
end

local final_to_unfinal = {
    ["ך"] = "כ",
    ["ם"] = "מ",
    ["ן"] = "נ",
    ["ף"] = "פ",
    ["ץ"] = "צ",
}

local function unfinalize(form)
    if match_form(form, "[חע]ַ$") or match_form(form, "הּ[ַ]?$") then
        return gsub_form(form, "([החע])[ּ]?[ַ]?$", "%1")
    elseif match_form(form, "[םןףץ]$") or match_form(form, "ךְ$") then
        return gsub_form(form, "([ךםןףץ])[ְ]?$", final_to_unfinal, true)
    else
        return form
    end
end

local function make_link(x, is_construct)
    local dolink = true
    if SUPPRESS_LINKS then
        dolink = nil
    end
    local maqaf = is_construct and "־" or ""
    if type(x) == "table" then
        local pg = (lang:makeEntryName(x[1]))
        return m_links.full_link({lang = lang, allow_self_link = false, term = dolink and pg, alt = (pg ~= (lang:makeEntryName(x[2])) and pg .. maqaf .. " / " or "") .. x[2] .. maqaf, tr = "-"})
    else
        if x == "-" then
            return "—" -- m-dash
        else
            return m_links.full_link({lang = lang, term = dolink and x, alt = x .. maqaf, tr = "-", allow_self_link = false})
        end
    end
end

local numbers = {
    ["s"] = "singular",
    ["d"] = "dual",
    ["p"] = "plural",
    ["ms"] = "masculine singular",
    ["md"] = "masculine dual",
    ["mp"] = "masculine plural",
    ["fs"] = "feminine singular",
    ["fd"] = "feminine dual",
    ["fp"] = "feminine plural",
}

local singulars = {
    ["s"] = true,
    ["ms"] = true,
    ["fs"] = true,
}

local function split_defv(form)
    local ret = mw.ustring.gsub(form, "^הַ([וזטילמנסצקש][ׁׂ]?)ּ", "%1")
    if ret ~= form then return ret end
    ret = mw.ustring.gsub(form, "^הַ([בגדיכפת])", "%1")
    if ret ~= form then return ret end
    ret = mw.ustring.gsub(form, "^הָ(ר)", "%1")
    if ret ~= form then return ret end
    ret = mw.ustring.gsub(form, "^ה[ֶַָ]([אהחע])", "%1")
    if ret ~= form then return ret end
    return nil
end

local function split_def(form)
    if type(form) == "table" then
        local wv = split_defv(form[2])
        local nv = mw.ustring.gsub(form[1], "^ה", "")
        if wv and (nv ~= form[1]) then
            return {nv, wv}
        else
            return nil
        end
    else
        return split_defv(form)
    end
end

local function attach_def(form)
    if match_form(form, "^[בגדהחכפת]") then
        return append_parts("הַ", form)
    elseif match_form(form, "^[אער]") then
        return append_parts("הָ", form)
    elseif match_form(form, "^ו") then
        return gsub_form(form, "^ו", {"הוו", "הַוּ"})
    elseif match_form(form, "^[זטילמנסצק]") or match_form(form, "^ש[ׁׂ]") then
        return gsub_form(form, "^([זטילמנסצקש][ׁׂ]?)", "הַ%1ּ")
    else
        error("Unrecognized initial consonant in indefinite form.")
    end
end

local function remove_maqaf(x)
    if type(x) == "table" then
        return {mw.ustring.gsub(x[1], "־$", ""), mw.ustring.gsub(x[2], "־$", "")}
    else
        return mw.ustring.gsub(x, "־$", "")
    end
end

local function process_args(args)
    for key, val in pairs(args) do
        val = mw.text.trim(val)
        if val == "" then val = nil end
        args[key] = val
    end

    local forms = {}

    if not args[1] then return forms end

    local i = 1
    local num = "s"
    while num do
        local section = {["number"] = num}
        local touched = false
        local likely_def = nil
        local initial_hes = nil
        while args[i] and not numbers[args[i]] do
            local form = process_arg(args[i])
            local wv = getv(form)
            if likely_def then
                local cur_initial_hes = mw.ustring.len(mw.ustring.match(getnv(form), "^ה*"))
                if cur_initial_hes < initial_hes then
                    section["d"] = likely_def
                elseif cur_initial_hes > initial_hes and split_defv(wv) then
                    section["i"] = likely_def
                else
                    error("Definite form must either be first or second.")
                end
                likely_def = nil
            end
            if (not section["d"]) and split_defv(wv) then
                if touched then
                    section["d"] = form
                else
                    likely_def = form
                    initial_hes = mw.ustring.len(mw.ustring.match(getnv(form), "^ה*"))
                end
            elseif not touched then
                section["i"] = form
            elseif (not section["2ms"]) and mw.ustring.match(wv, "ךָ$") then
                section["2ms"] = form
            elseif (not section["2mp"]) and mw.ustring.match(wv, "כֶם$") then
                section["2mp"] = form
            elseif (not section["3ms"]) and (mw.ustring.match(wv, "וֹ$") or mw.ustring.match(wv, "ו$") or mw.ustring.match(wv, "הוּ$")) then
                section["3ms"] = form
            elseif (not section["c"]) and (mw.ustring.match(wv, "־$") or mw.ustring.match(getnv(form), "־$")) then
                section["c"] = remove_maqaf(form)
            else
                error("Unrecognized or duplicate form: " .. wv)
            end
            touched = true
            i = i + 1
        end
        if likely_def then
            if section["d"] then error("Unrecognized or duplicate form: " .. getv(section["d"])) end
            section["d"] = likely_def
        end
        if touched then
            table.insert(forms, section)
        end
        num = args[i]
        i = i + 1
    end

    return forms
end

local endings_c = {
    ["3p_heavy"] = false,
    ["1s"] = "ִי",
    ["2ms"] = "ְךָ",
    ["2fs"] = "ֵךְ",
    ["3ms"] = "וֹ",
    ["3fs"] = "ָהּ",
    ["1p"] = "ֵנוּ",
    ["2mp"] = "ְכֶם",
    ["2fp"] = "ְכֶן",
    ["3mp"] = "ָם",
    ["3fp"] = "ָן",
}

-- most have alternative forms that are not yet supported
local endings_e = {
    ["3p_heavy"] = true,
    ["1s"] = "ִי",
    ["2ms"] = "ֶךָ",
    ["2fs"] = "ֵךְ",
    ["3ms"] = "ֵהוּ",
    ["3fs"] = "ֶהָ",
    ["1p"] = "ֵנוּ",
    ["2mp"] = "ֵכֶם",
    ["2fp"] = "ֵכֶן",
    ["3mp"] = "ֵהֶם",
    ["3fp"] = "ֵהֶן",
}

local endings_i = {
    ["3p_heavy"] = true,
    ["1s"] = "",
    ["2ms"] = "ךָ",
    ["2fs"] = "ךְ",
    ["3ms"] = "ו", -- and "הוּ", for when I allow multiple forms
    ["3fs"] = "הָ",
    ["1p"] = "נוּ",
    ["2mp"] = "כֶם",
    ["2fp"] = "כֶן",
    ["3mp"] = "הֶם",
    ["3fp"] = "הֶן",
}

-- endings for ים ending

local endings_pm = {
    ["3p_heavy"] = true,
    ["1s"] = {"יי", "ַי"},
    ["2ms"] = "ֶיךָ",
    ["2fs"] = {"ייך", "ַיִךְ"},
    ["3ms"] = "ָיו",
    ["3fs"] = "ֶיהָ",
    ["1p"] = "ֵינוּ",
    ["2mp"] = "ֵיכֶם",
    ["2fp"] = "ֵיכֶן",
    ["3mp"] = "ֵיהֶם",
    ["3fp"] = "ֵיהֶן",
}

-- endings for ות ending

local endings_pf = {
    ["3p_heavy"] = true,
    ["1s"] = {"יי", "ַי"},
    ["2ms"] = "ֶיךָ",
    ["2fs"] = {"ייך", "ַיִךְ"},
    ["3ms"] = "ָיו",
    ["3fs"] = "ֶיהָ",
    ["1p"] = "ֵינוּ",
    ["2mp"] = "ֵיכֶם",
    ["2fp"] = "ֵיכֶן",
    ["3mp"] = "ָם",--add extra יהם ending
    ["3fp"] = "ָן",--add extra יהן ending
}

local light_endings = {
    "1s",
    "2ms",
    "2fs",
    "3ms",
    "3fs",
    "1p",
}

local heavy_endings = {
    "2mp",
    "2fp",
}

local maybe_endings = {
    "3mp",
    "3fp",
}

local function attach_ending(stem, ending)
    if match_form(ending, "^ְ") and match_form(stem, "[אהחע]$") then
        ending = gsub_form(ending, "^ְ", "ֲ")
    elseif match_form(ending, "^ָ") and match_form(stem, "ַח$") then
        stem = gsub_form(stem, "ַח$", "ֶח")
    elseif type(ending) == "table" and mw.ustring.match(getnv(stem), "י$") and mw.ustring.len(mw.ustring.match(getnv(ending), "^י*")) > mw.ustring.len(mw.ustring.match(getnv(getv(ending)), "^י*")) then
        ending = gsub_form(ending, {"^י", "^"}, "")
    end
    return append_parts(stem, ending)
end

local function infer_forms(forms)
    for _, section in pairs(forms) do
        local light_stem = nil
        local heavy_stem = nil
        local endings = nil
        if section["d"] then
            section["i"] = section["i"] or split_def(section["d"])
        elseif section["i"] then
            section["d"] = section["d"] or attach_def(section["i"])
        else
            error("Must have either definite or indefinite form.")
        end
        if singulars[section["number"]] then
            local is_fem = match_form(section["i"], "ָה$")
            local is_e = match_form(section["i"], "ֶה$")
            if not section["c"] then
                if is_fem then
                    section["c"] = gsub_form(section["i"], "ָה$", "ַת")
                elseif is_e then
                    section["c"] = gsub_form(section["i"], "ֶה$", "ֵה")
                elseif match_form(section["i"], "ָ[ב-ת][ׁׂ]?$") then
                    section["c"] = gsub_form(section["i"], "ָ([ב-ת][ׁׂ]?)$", "ַ%1")
                else
                    section["c"] = section["i"]
                end
            end
            if section["3ms"] then
                if match_form(section["3ms"], "וֹ$") then
                    light_stem = gsub_form(section["3ms"], "וֹ$", "")
                    endings = endings_c
                elseif match_form(section["3ms"], "ֵהוּ$") then
                    light_stem = gsub_form(section["3ms"], "ֵהוּ$", "")
                    endings = endings_c
                elseif match_form(section["3ms"], "ִיהוּ$") then
                    light_stem = gsub_form(section["3ms"], "הוּ$", "")
                    endings = endings_i
                elseif match_form(section["3ms"], "ִיו$") then
                    light_stem = gsub_form(section["3ms"], "ו$", "")
                    endings = endings_i
                else
                    error("Unrecognized 3ms suffix pattern.")
                end
            elseif match_form(section["c"], "ֵה$") then
                if is_e then
                    light_stem = gsub_form(section["i"], "ֶה$", "")
                else
                    light_stem = gsub_form(section["c"], "ֵה$", "")
                end
                endings = endings_e
            elseif match_form(unfinalize(section["c"]), "ַ[א-ת][ׁׂ]?$") then
                light_stem = gsub_form(unfinalize(section["c"]), "ַ([בגדהוזחטכלמנסעפצקרשת][ׁׂ]?)$", "ָ%1")
                endings = endings_c
            else
                light_stem = unfinalize(section["c"])
                if match_form(section["c"], "ִי$") then
                    endings = endings_i
                else
                    endings = endings_c
                end
            end
            if section["2mp"] then
                if endings == endings_c then
                    if not match_form(section["2mp"], "[^אהחע]ְכֶם$") and not match_form(section["2mp"], "[אהחע]ֲכֶם$") then
                        error("Unrecognized 2mp suffix pattern.")
                    end
                    heavy_stem = gsub_form(section["2mp"], "[ְֲ]כֶם$", "")
                else
                    if not match_form(section["2mp"], "כֶם$") then
                        error("Unrecognized 2mp suffix pattern.")
                    end
                    heavy_stem = gsub_form(section["2mp"], "כֶם$", "")
                end
            elseif section["2ms"] and endings == endings_c and match_form(section["2ms"], "ְךָ$") then
                heavy_stem = gsub_form(section["2ms"], "ְךָ$", "")
            elseif match_form(light_stem, "ּ$") or match_form(light_stem, "ְ[א-ת][ּׁׂ]?$") then
                heavy_stem = light_stem
            elseif match_form(section["c"], "ֵה$") then
                if endings == endings_e then
                    heavy_stem = gsub_form(section["c"], "ה$", "")
                else
                    heavy_stem = light_stem
                end
            else
                heavy_stem = unfinalize(section["c"])
            end
        else
            endings = endings_pm
            if not section["c"] then
                if match_form(section["i"], "וֹת$") then
    				endings = endings_pf
                    section["c"] = section["i"]
                elseif match_form(section["i"], "ִים$") then
                    section["c"] = gsub_form(section["i"], "ִים$", "ֵי")
                elseif match_form(section["i"], {"יי?ם$", "ַיִם$"}) then
                    section["c"] = gsub_form(section["i"], {"יי?ם$", "ַיִם$"}, "ֵי")
                else
                    error("Unrecognized plural pattern.")
                end
            end
            if match_form(section["c"], "ֵי$") then
                heavy_stem = gsub_form(section["c"], "ֵי$", "")
                if match_form(section["i"], "ִים$") then
                    light_stem = gsub_form(section["i"], "ִים$", "")
                elseif match_form(section["i"], {"יי?ם$", "ַיִם$"}) then
                    light_stem = gsub_form(section["i"], {"יי?ם$", "ַיִם$"}, "")
                else
                    light_stem = heavy_stem
                end
            elseif match_form(section["c"], "וֹת$") then
    			endings = endings_pf
                heavy_stem = section["c"]
                light_stem = heavy_stem
            else
                error("Unrecognized plural construct pattern.")
            end
        end
        local maybe_stem = endings["3p_heavy"] and heavy_stem or light_stem
        for _, p in pairs(light_endings) do
            section[p] = section[p] or attach_ending(light_stem, endings[p])
        end
        for _, p in pairs(heavy_endings) do
            section[p] = section[p] or attach_ending(heavy_stem, endings[p])
        end
        for _, p in pairs(maybe_endings) do
            section[p] = section[p] or attach_ending(maybe_stem, endings[p])
        end
    end
end

local example_s = {
    ["number"] = "s",
    ["i"] = "בַּיִת",
    ["d"] = "הַבַּיִת",
    ["c"] = "בֵּית",
    ["1s"] = "בֵּיתִי",
    ["2ms"] = "בֵּיתְךָ",
    ["2fs"] = "בֵּיתֵךְ",
    ["3ms"] = "בֵּיתוֹ",
    ["3fs"] = "בֵּיתָהּ",
    ["1p"] = "בֵּיתֵנוּ",
    ["2mp"] = "בֵּיתְכֶם",
    ["2fp"] = "בֵּיתְכֶן",
    ["3mp"] = "בֵּיתָם",
    ["3fp"] = "בֵּיתָן",
}

local example_p = {
    ["number"] = "p",
    ["i"] = "בָּתִּים",
    ["d"] = "הַבָּתִּים",
    ["c"] = "בָּתֵּי",
    ["1s"] = {"בתיי", "בָּתַּי"},
    ["2ms"] = "בָּתֶּיךָ",
    ["2fs"] = {"בתייך", "בָּתַּיִךְ"},
    ["3ms"] = "בָּתָּיו",
    ["3fs"] = "בָּתֶּיהָ",
    ["1p"] = "בָּתֵּינוּ",
    ["2mp"] = "בָּתֵּיכֶם",
    ["2fp"] = "בָּתֵּיכֶן",
    ["3mp"] = "בָּתֵּיהֶם",
    ["3fp"] = "בָּתֵּיהֶן",
}

local example = {example_s, example_p}

local table_top = [===[<div><div class="NavFrame" style="display:inline-block;min-width:30em">
<div class="NavHead" align="left" style="min-width:30em">{title}</div>
<div class="NavContent" align="center" style="min-width:30em">
{\op}| class="wikitable inflection-table" style="text-align:center;margin:0"
! rowspan="3" | Number !! colspan="2" | Isolated forms !! colspan="5" | With possessive pronouns
|-
! rowspan="2" | State !! rowspan="2" | Form !! rowspan="2" | Person !! colspan="2" | singular !! colspan="2" | plural
|-
! m. !! f. !! m. !! f.
]===]

local table_section = [===[|-
! rowspan="3" | {number} !! indefinite
| {i}
! first
| colspan="2" | {1s} || colspan="2" | {1p}
|-
! definite
| {d}
! second
| {2ms} || {2fs} || {2mp} || {2fp}
|-
! construct
| {c}
! third
| {3ms} || {3fs} || {3mp} || {3fp}
]===]

local table_bottom = [===[|{\cl}
</div></div></div>]===]

local forms_names = {
    "i",
    "d",
    "c",
    "1s",
    "2ms",
    "2fs",
    "3ms",
    "3fs",
    "1p",
    "2mp",
    "2fp",
    "3mp",
    "3fp",
}

local function make_table(forms)
    local title = forms and forms[1] and forms[1]["i"] and ("Declension of " .. make_link(forms[1]["i"])) or "Declension"
    local output = {}
    table.insert(output, m_strutils.format(table_top, {["title"] = title}))
    for _, section in ipairs(forms) do
        section["number"] = numbers[section["number"]]
        for _, form_name in pairs(forms_names) do
            section[form_name] = make_link(section[form_name] or "-", section[form_name] and form_name == "c")
        end
        table.insert(output, m_strutils.format(table_section, section))
    end
    table.insert(output, m_strutils.format(table_bottom, {}))
    return table.concat(output)
end

function export.show(frame)
    local args = frame:getParent().args
    PAGENAME = mw.title.getCurrentTitle().text
    NAMESPACE = mw.title.getCurrentTitle().nsText

    if args["nolink"] and args["nolink"] ~= "" then
        SUPPRESS_LINKS = true
    end

    local forms = nil
    if frame.args[1] == "example" then
        forms = example
    else
        forms = process_args(args)
        infer_forms(forms)
    end

    return make_table(forms)
end

return export