Module:Hebr-common
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_str_utils = require("Module:string utilities")
local makeDisplayText -- defined below
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ugsub = mw.ustring.gsub
local usub = m_str_utils.sub
local DIACRITICS = "[" ..
u(0x0307) .. u(0x0308) ..
u(0x034F) .. -- combining grapheme joiner
u(0x200C) .. -- zero width non-joiner
u(0x200D) .. -- zero width joiner
u(0x0591) .. "-" .. u(0x05BD) ..
u(0x05BF) ..
u(0x05C1) .. u(0x05C2) ..
u(0x05C4) .. u(0x05C5) ..
u(0x05C7) ..
u(0xFB1E) ..
"]"
local GERESH = u(0x059C)
local GERSHAYIM = u(0x059E) -- double geresh
local MERCHA = u(0x05A5)
local MERCHA_KEFULA = u(0x05A6) -- double mercha
local TELISHA = u(0x05A0) .. u(0x05A9)
local KARNE_PARAH = u(0x059F)
local SHEVA = u(0x05B0)
local HOLAM = u(0x05B9)
local HOLAM_HASER_FOR_VAV = u(0x05BA)
local WIDE_ALEF = u(0xFB21)
local substitutes = {
["'"] = "׳",
['"'] = "״",
["-"] = "־",
["|"] = "׀",
[SHEVA .. u(0x05B6)] = u(0x05B1),
[SHEVA .. u(0x05B7)] = u(0x05B2),
[SHEVA .. u(0x05B8)] = u(0x05B3),
[SHEVA .. u(0x05C7)] = u(0x05B3),
}
function export.makeDisplayText(text, lang, sc)
text = toNFD(text):gsub("['\"%-|]", substitutes)
:gsub(SHEVA .. "[\214\215][\182-\184\135]", substitutes)
:gsub(GERESH .. GERESH, GERSHAYIM)
:gsub(MERCHA .. MERCHA, MERCHA_KEFULA)
:gsub(TELISHA, KARNE_PARAH)
-- Holam haser for vav (U+05BA) can only be placed on vav; otherwise, replace with holam (U+05B9).
if text:find(HOLAM_HASER_FOR_VAV, nil, true) then
text = ugsub(text, "()(" .. DIACRITICS .. "+)", function(loc, dia)
loc = loc - 1
if usub(text, loc, loc) ~= "ו" then
return (dia:gsub(HOLAM_HASER_FOR_VAV, HOLAM))
end
end)
end
return toNFC(text)
end
makeDisplayText = export.makeDisplayText
local retain_diacritics = {
["yi"] = true,
["itk"] = true,
["lad"] = true,
["lsd"] = true,
}
function export.makeEntryName(text, lang, sc)
if retain_diacritics[lang:getFullCode()] then
return makeDisplayText(text, lang, sc)
end
text = ugsub(toNFD(text), DIACRITICS .. "+", "")
:gsub("['\"%-|]", substitutes)
return toNFC(text)
end
local sortkey_substitutes = {
["'"] = "׳",
['"'] = "״",
["-"] = "־",
["|"] = "׀",
["ך"] = "כ",
["ם"] = "מ",
["ן"] = "נ",
["ף"] = "פ",
["ץ"] = "צ",
["ׯ"] = "ו" .. u(0xF000),
["װ"] = "וו",
["ױ"] = "וי",
["ײ"] = "יי",
["ℵ"] = "א",
["ℶ"] = "ב",
["ℷ"] = "ג",
["ℸ"] = "ד",
["ﬠ"] = "ע",
["ﬡ"] = "א",
["ﬢ"] = "ד",
["ﬣ"] = "ה",
["ﬤ"] = "כ",
["ﬥ"] = "ל",
["ﬦ"] = "ם",
["ﬧ"] = "ר",
["ﬨ"] = "ת",
["﬩"] = "+",
["ﭏ"] = "אל"
}
-- Sort after U+FB21 HEBREW LETTER WIDE ALEF, so that it sorts after Arabic script titles.
local sort_after_wide_alef = {
["ar"] = true,
["fa"] = true,
["ur"] = true,
["shi"] = true,
}
function export.makeSortKey(text, lang, sc)
text = ugsub(toNFD(text), DIACRITICS .. "+", "")
text = ugsub(text, "['\"%-|ךםןףץׯ-ײℵ-ℸﬠ-﬩ﭏ]", sortkey_substitutes)
if sort_after_wide_alef[lang:getFullCode()] then
text = WIDE_ALEF .. text
end
return toNFC(text)
end
return export