Jump to content

Module:User:Malku H₂n̥rés/he-filter

From Wiktionary, the free dictionary

Eru's code

[edit]

Lua error in package.lua at line 80: module 'Module:User:Sartma/he-translit' not found

Malku's code

[edit]

Lua error in package.lua at line 80: module 'Module:User:Sartma/he-translit' not found


local export = {} 
MH = require("Module:User:Sartma/he-translit").MH_tr --TO CHANGE WHEN MOVED
lang = require("Module:languages").getByCode("he")
local N = mw.text.nowiki

local s = mw.ustring.gsub
local l = mw.ustring.lower
local M = mw.ustring.match

local U = mw.ustring.char
local bcc = "%s%z" .. 
	U(0x061C) .. U(0x200E) .. U(0x200F) .. U(0x202A) .. "-" .. U(0x202E)
	.. U(0x2066) .. "-" .. U(0x2069) .. "%-"
local word_end = "%f[" .. bcc .. "]"
local word_start = "%f[^" .. bcc .. "]"

local a = "[áéíóú]"
local V = "[aeiouáéíóú]"

local c = { --for what may appear on wiki
	{"(.)%1", "%1"},
	{"[ḇw]", "v"},
	{"ḡ", "g"},
	{"ḏ", "d"},
	{"[ḵẖḥħx]", "K"},
	{"p̄", "f"},
	{"[ṯṭ]", "t"},
	{"[kc]h", "K"},
	{"č", "C"},
	{"ž", "Z"},
	{"zh", "Z"},
	{"š", "S"},
	{"d[Zj]", "j"},
	{"ṣ", "T"},
	{"t[sz]", "T"},
	{"[qḳ]", "k"},
	{"ś", "s"},
	{"sh", "S"},
	{"‘׳", "'"},
	{"“״", "″"},
	{"['‘ʾʿʼʻʔʕˀˤə-]", ""}, --glottal stops, schwa, hyphen, 3ayin, hyphen
	{"ā́", "á"},
	{"ḗ", "é"},
	{"ī́", "í"},
	{"ṓ", "ó"},
	{"ū́", "ú"},
	{"[āăâ]", "a"},
	{"[ēĕê]", "e"},
	{"[īĭî]", "i"},
	{"[ōŏô]", "o"},
	{"[ūŭû]", "u"},
	{"(" .. V .. ")i", "%1y"},
	{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"}
}

local r = { --for generated MH tr
	{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"},
	{"['-]", ""}, --glottal stop & hyphen
	{"(" .. V .. ")h(" .. word_end .. ")", "%1%2"},
	{"([zsck])'h", "%1h"},
	{"[kc]h", "K"},
	{"zh", "Z"},
	{"sh", "S"},
	{"ts", "T"}
} 

local o = { --removing stress
	["á"] = "a",
	["é"] = "e",
	["í"] = "i",
	["ó"] = "o",
	["ú"] = "u"
}

function export.convert(g, m)
	m = l(m)
	g = l(MH(g))
	for z = 1, #c do
		m = s(m, c[z][1], c[z][2])
	end
	for z = 1, #r do
		g = s(g, r[z][1], r[z][2])
	end

	--removing acute accents...
	--[[if (not M(m, a)) and M(g, a) then --...on the generated tr, if there's none on the manual
			g = s(g, ".", o)
	end--]]
	if (not M(g, a)) and M(m, a) then --...on the manual one if monosyllabic
		m = s(m, ".", o)
	end
	return g, m
end

function export.filter(g, m) --For a POS section: g = headword spelling, m = manual romanization
	local g, m = export.convert(g, m)
	if m == g then
		return true
	else
		return false
    end
end





function export.print_filter(frame)
	local data_page_title = frame.args[1]
	local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
	local content = mw.title.new(data_page_title):getContent()
	local output = require "Module:array"()
	local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
	local i = 0

	for line in data:gmatch "[^\n]+" do
		local headword, translit = line:match "^([^\t]+)\t([^\t]+)$"
		if not headword then
			error("Following line did not match pattern:\n" .. line)
		end
		output:insert(("# %s %s | %s: %s"):format(N(headword), N(translit), MH(headword), tostring(export.filter(headword, translit))))
		i = i + 1
		if i >= max_lines then break end
	end
	return output:concat("\n")
end



function export.spelling(frame)
	local data_page_title = frame.args[1]
	local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
	local content = mw.title.new(data_page_title):getContent()
	local output = require "Module:array"()
	local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
	local i = 0

	for line in data:gmatch "[^\n]+" do
		local headword, others = line:match "^([^\t]+)\t([^\t]+)$"
		if not headword then
			error("Following line did not match pattern:\n" .. line)
		end
		headword = N(headword)
		entry = lang:makeEntryName(headword)
		output:insert(("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s"):format(entry, headword, entry, N(others)))

		i = i + 1
		if i >= max_lines then break end
	end
	return output:concat("\n")
end



function export.dunno(frame)
	local data_page_title = frame.args[1]
	local max_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 1000000
	local content = mw.title.new(data_page_title):getContent()
	local output = require "Module:array"()
	local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
	local i = 0

	for line in data:gmatch "[^\n]+" do
		local headword = line:match "^([^\t]+)\t$"
		if not headword then
			error("Following line did not match pattern:\n" .. line)
		end
		headword = N(headword)
		entry = lang:makeEntryName(headword)
		output:insert(("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s"):format(entry, headword, entry, MH(headword)))

		i = i + 1
		if i >= max_lines then break end
	end
	return output:concat("\n")
end



function export.draft(frame)
	local data_page_title = frame.args[1]
	local min_lines = frame.args[2] and assert(tonumber(frame.args[2])) or 0
	local max_lines = frame.args[3] and assert(tonumber(frame.args[3])) or 1500
	local content = mw.title.new(data_page_title):getContent()
	local data = assert(content:match("<pre>%s*(.-)%s*</pre>"), "expected pre tag in data page")
	local i = 0

	local t, T, F = "", {"Correct"}, {"To check", {"Unpointed"}, {"Several spellings"}, {"Several romanizations"}, {"Lacking gershayim"}, {"Bad cute accent"}, {"Hyphen"}, {"Multiword"}, {"Lacking stress"}, {"Stress position"}, {"Schwa"}, {"Kamats (a/o)"}, {"Other"}} --to add: lacking pointing, lacking translit

	for line in data:gmatch "[^\n]+" do
		i = i + 1
		if min_lines <= i then
			if i >= max_lines then
				i = i - 1
				break
			else
				local headword, translit = line:match "^([^\t]+)\t([^\t]+)$"
				if not headword then
					error("Following line did not match pattern:\n" .. line)
				else
					local g, m = export.convert(headword, translit)
					headword = N(headword)
					entry = lang:makeEntryName(headword)
					t = ("# <span class=\"Hebr\" lang=\"he\">[[%s#Hebrew|%s]]</span><sup>[[Special:EditPage/%s|edit]]</sup> %s • %s"):format(entry, headword, entry, N(translit), MH(headword))
					--t = ("# %s %s • %s (%s | %s)"):format(N(headword), N(translit), MH(headword), m, g)
					if g == m then
						table.insert(T, t)
					elseif not M(headword, "[ְֱֲֳִֵֶַָׇֹֻ״־]") then --unpointed
						table.insert(F[2], t)
					elseif M(g, "[\\/,]") then --several spellings
						table.insert(F[3], t)
					elseif M(m, "[\\/,]") then --several romanizations
						table.insert(F[4], t)
					elseif s(g, "″", "") == m then --lacking gershayim
						table.insert(F[5], t)
					elseif M(m, "&[aeiou]acute;") then --bad acute accent
						table.insert(F[6], t)
					elseif s(m, "[- ]", "") == s(g, "[- ]", "") then --hyphen
						table.insert(F[7], t)
					elseif M(g, " ") then --multiword
						table.insert(F[8], t)
					elseif not M(m, a) then --lacking stress
						table.insert(F[9], t)
					elseif s(m, ".", o) == s(g, ".", o) then --stress position
						table.insert(F[10], t)
					elseif s(m, "e", "") == s(g, "e", "") then --schwa
						table.insert(F[11], t)
					elseif s(s(m, "[áó]", "X"), "[ao]", "x") == s(s(g, "[áó]", "X"), "[ao]", "x") then --kamats (a/o)
						table.insert(F[12], t)
					else
						table.insert(F[13], t)
					end
				end
			end
		end
	end
	i = i + 1
	length = i - min_lines --true number, with i = where it stopped
	T[1] = "==" .. T[1] .. " (" .. (#T - 1)  .. "/" .. length .. ")=="
	F[1] = "==" ..  F[1] .. " (" .. (length - #T + 1) .. "/" .. length .. ")=="
	for z = 2, #F do
		F[z][1] = "===" .. F[z][1] .. " (" .. (#F[z] - 1) .. "/" .. length .. ")==="
		F[z] = table.concat(F[z], "\n")
	end
	return "Interval: [" .. min_lines .. ", " .. i .. "[\n" .. table.concat(F, "\n\n") .. "\n" .. table.concat(T, "\n")
end

return export