Jump to content

Module:R:he:HWSSRoot

From Wiktionary, the free dictionary

This module implements the reference template {{R:he:HWSSRoot}}.


local p = {}

local list = require("Module:R:he:HWSSRoot/data")

local hebrew_letters = {
	["א"] = "ʾ",
	["ב"] = "B",
	["ג"] = "G",
	["ד"] = "D",
	["ה"] = "H",
	["ו"] = "W",
	["ז"] = "Z",
	["ח"] = "X",
	["ט"] = "7",
	["י"] = "Y",
	["[כך]"] = "K",
	["ל"] = "L",
	["[מם]"] = "M",
	["[נן]"] = "N",
	["ס"] = "S",
	["ע"] = "&",
	["[פף]"] = "P",
	["[ץצ]"] = "C",
	["ק"] = "Q",
	["ר"] = "R",
	["ש"] = "ʃ",
	["ת"] = "T",
}

local function cleanup(value)
	return value:gsub("/.", ""):gsub("%(.*", ""):gsub("[VH]$", ""):gsub("[VH] ", " "):gsub("(.)%1$","%1"):gsub("(.)%1 ","%1 ")
end

function p.hebrew_to_normalized_latin(term)
	local normalized = term:gsub("שׁ" ,"ש"):gsub("שׂ", "ש"):gsub("־", "")
	for k, v in pairs(hebrew_letters) do
		normalized = mw.ustring.gsub(normalized, k, v)
	end
	return cleanup(normalized)
end

function p.latin_to_hebrew_for_sorting(term)
	local normalized = term
	for k, v in pairs(hebrew_letters) do
		if mw.ustring.find(k, "%[") then
			normalized = mw.ustring.gsub(normalized, v, mw.ustring.sub(k, 2, 2))
		else
			normalized = mw.ustring.gsub(normalized, v, k)
		end
	end
	return normalized
end

function p.simple_latin(frame)
	return hebrew_to_normalized_latin(frame.args[1])
end

function p.page_number(frame)
	local value = p.hebrew_to_normalized_latin(frame.args[1])
	value = p.latin_to_hebrew_for_sorting(value)
    local low = 1
    local high = #list
    local mid = 0
    while high - low > 1 do
        mid = math.floor((low+high)/2)
        local midval = p.latin_to_hebrew_for_sorting(cleanup(list[mid]))
        if midval == value then return 78 + mid
        elseif midval < value then low = mid
        elseif midval > value then high = mid - 1
        end
    end

    if p.latin_to_hebrew_for_sorting(list[high]) <= value then return 78 + high
    else return 78 + low
    end
end

return p