Module:User:Malku H₂n̥rés/he-filter
Appearance
- The following documentation is located at Module:User:Malku H₂n̥rés/he-filter/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Eru's code
[edit]Lua error in package.lua at line 80: module 'Module:User:Sartma/he-translit' not found
Malku's code
[edit]Lua error in package.lua at line 80: module 'Module:User:Sartma/he-translit' not found
local export = {}
MH = require("Module:User:Sartma/he-translit").MH_tr --TO CHANGE WHEN MOVED
lang = require("Module:languages").getByCode("he")
local N = mw.text.nowiki
local s = mw.ustring.gsub
local l = mw.ustring.lower
local M = mw.ustring.match
local U = mw.ustring.char
local bcc = "%s%z" ..
U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E)
.. U(0x2066) .. "-" .. U(0x2069) .. "%-"
local word_end = "%f[" .. bcc .. "]"
local word_start = "%f[^" .. bcc .. "]"
local a = "[áéíóú]"
local V = "[aeiouáéíóú]"
local c = { --for what may appear on wiki
{"(.)%1", "%1"},
{"[ḇw]", "v"},
{"ḡ", "g"},
{"ḏ", "d"},
{"[ḵẖḥħx]", "K"},
{"p̄", "f"},
{"[ṯṭ]", "t"},
{"[kc]h", "K"},
{"č", "C"},
{"ž", "Z"},
{"zh", "Z"},
{"š", "S"},
{"d[Zj]", "j"},
{"ṣ", "T"},
{"t[sz]", "T"},
{"[qḳ]", "k"},
{"ś", "s"},
{"sh", "S"},
{"‘׳", "'"},
{"“״", "″"},
{"['‘ʾʿʼʻʔʕˀˤə-]", ""}, --glottal stops, schwa, hyphen, 3ayin, hyphen
{"ā́", "á"},
{"ḗ", "é"},
{"ī́", "í"},
{"ṓ", "ó"},
{"ū́", "ú"},
{"[āăâ]", "a"},
{"[ēĕê]", "e"},
{"[īĭî]", "i"},
{"[ōŏô]", "o"},
{"[ūŭû]", "u"},
{"(" .. V .. ")i", "%1y"},
{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"}
}
local r = { --for generated MH tr
{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"},
{"['-]", ""}, --glottal stop & hyphen
{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"},
{"([zsck])'h", "%1h"},
{"[kc]h", "K"},
{"zh", "Z"},
{"sh", "S"},
{"ts", "T"}
}
local o = { --removing stress
["á"] = "a",
["é"] = "e",
["í"] = "i",
["ó"] = "o",
["ú"] = "u"
}
function export.convert(g, m)
m = l(m)
g = l(MH(g))
for z = 1, #c do
m = s(m, c[z][1], c[z][2])
end
for z = 1, #r do
g = s(g, r[z][1], r[z][2])
end
--removing acute accents...
--[[if (not M(m, a)) and M(g, a) then --...on the generated tr, if there's none on the manual
g = s(g, ".", o)
end--]]
if (not M(g, a)) and M(m, a) then --...on the manual one if monosyllabic
m = s(m, ".", o)
end
return g, m
end
function export.filter(g, m) --For a POS section: g = headword spelling, m = manual romanization
local g, m = export.convert(g, m)
if m == g then
return true
else
return false
end
end
function export.print_filter(frame)
local data_page_title = frame.args[1]
local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
local content = mw.title.new(data_page_title):getContent()
local output = require "Module:array"()
local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
local i = 0
for line in data:gmatch "[^\n]+" do
local headword, translit = line:match "^([^\t]+)\t([^\t]+)$"
if not headword then
error("Following line did not match pattern:\n" .. line)
end
output:insert(("# %s %s | %s: %s"):format(N(headword), N(translit), MH(headword), tostring(export.filter(headword, translit))))
i = i + 1
if i >= max_lines then break end
end
return output:concat("\n")
end
function export.spelling(frame)
local data_page_title = frame.args[1]
local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
local content = mw.title.new(data_page_title):getContent()
local output = require "Module:array"()
local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
local i = 0
for line in data:gmatch "[^\n]+" do
local headword, others = line:match "^([^\t]+)\t([^\t]+)$"
if not headword then
error("Following line did not match pattern:\n" .. line)
end
headword = N(headword)
entry = lang:makeEntryName(headword)
output:insert(("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s"):format(entry, headword, entry, N(others)))
i = i + 1
if i >= max_lines then break end
end
return output:concat("\n")
end
function export.dunno(frame)
local data_page_title = frame.args[1]
local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
local content = mw.title.new(data_page_title):getContent()
local output = require "Module:array"()
local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
local i = 0
for line in data:gmatch "[^\n]+" do
local headword = line:match "^([^\t]+)\t$"
if not headword then
error("Following line did not match pattern:\n" .. line)
end
headword = N(headword)
entry = lang:makeEntryName(headword)
output:insert(("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s"):format(entry, headword, entry, MH(headword)))
i = i + 1
if i >= max_lines then break end
end
return output:concat("\n")
end
function export.draft(frame)
local data_page_title = frame.args[1]
local min_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 0
local max_lines = frame.args[3] and assert(tonumber(frame.args[3])) or 1500
local content = mw.title.new(data_page_title):getContent()
local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
local i = 0
local t, T, F = "", {"Correct"}, {"To check", {"Unpointed"}, {"Several spellings"}, {"Several romanizations"}, {"Lacking gershayim"}, {"Bad cute accent"}, {"Hyphen"}, {"Multiword"}, {"Lacking stress"}, {"Stress position"}, {"Schwa"}, {"Kamats (a/o)"}, {"Other"}} --to add: lacking pointing, lacking translit
for line in data:gmatch "[^\n]+" do
i = i + 1
if min_lines <= i then
if i >= max_lines then
i = i - 1
break
else
local headword, translit = line:match "^([^\t]+)\t([^\t]+)$"
if not headword then
error("Following line did not match pattern:\n" .. line)
else
local g, m = export.convert(headword, translit)
headword = N(headword)
entry = lang:makeEntryName(headword)
t = ("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s • %s"):format(entry, headword, entry, N(translit), MH(headword))
--t = ("# %s %s • %s (%s | %s)"):format(N(headword), N(translit), MH(headword), m, g)
if g == m then
table.insert(T, t)
elseif not M(headword, "[ְֱֲֳִֵֶַָׇֹֻ״־]") then --unpointed
table.insert(F[2], t)
elseif M(g, "[\\/,]") then --several spellings
table.insert(F[3], t)
elseif M(m, "[\\/,]") then --several romanizations
table.insert(F[4], t)
elseif s(g, "″", "") == m then --lacking gershayim
table.insert(F[5], t)
elseif M(m, "&[aeiou]acute;") then --bad acute accent
table.insert(F[6], t)
elseif s(m, "[- ]", "") == s(g, "[- ]", "") then --hyphen
table.insert(F[7], t)
elseif M(g, " ") then --multiword
table.insert(F[8], t)
elseif not M(m, a) then --lacking stress
table.insert(F[9], t)
elseif s(m, ".", o) == s(g, ".", o) then --stress position
table.insert(F[10], t)
elseif s(m, "e", "") == s(g, "e", "") then --schwa
table.insert(F[11], t)
elseif s(s(m, "[áó]", "X"), "[ao]", "x") == s(s(g, "[áó]", "X"), "[ao]", "x") then --kamats (a/o)
table.insert(F[12], t)
else
table.insert(F[13], t)
end
end
end
end
end
i = i + 1
length = i - min_lines --true number, with i = where it stopped
T[1] = "==" .. T[1] .. " (" .. (#T - 1) .. "/" .. length .. ")=="
F[1] = "==" .. F[1] .. " (" .. (length - #T + 1) .. "/" .. length .. ")=="
for z = 2, #F do
F[z][1] = "===" .. F[z][1] .. " (" .. (#F[z] - 1) .. "/" .. length .. ")==="
F[z] = table.concat(F[z], "\n")
end
return "Interval: [" .. min_lines .. ", " .. i .. "[\n" .. table.concat(F, "\n\n") .. "\n" .. table.concat(T, "\n")
end
return export