Jump to content

Module:he-utilities

From Wiktionary, the free dictionary

Some utilities for use in other Hebrew modules.


local export = {}

local m_str_utils = require("Module:string utilities")

local catfix = require("Module:utilities").catfix
local codepoint = m_str_utils.codepoint
local gsub = m_str_utils.gsub
local u = m_str_utils.char

local export = {}

local sc = require("Module:scripts").getByCode("Hebr")

local lang = require("Module:languages").getByCode("he")
local m_utilities = require("Module:utilities")
local m_links = require("Module:links")
local m_headword = require("Module:headword")
local rsplit = mw.text.split

-- A wrapper function allowing the contents of this module to be called from
-- templates. For example, '{{#invoke:he-utilities|main|otSofit|כ}}' produces
-- 'ך', as does '{{#invoke:he-utilities|main|letters|kafSofit}}'.
function export.main(frame)
	if type(export[frame.args[1]]) == 'function' then
		return export[frame.args[1]](frame.args[2])
	else
		return export[frame.args[1]][frame.args[2]]
	end
end

-- A mapping from strings containing letter-names, e.g. "alef", to strings
-- containing letters, e.g. "\215\144". (The latter is a UTF-8-encoded string
-- containing the single Unicode character U+05D0 HEBREW LETTER ALEF.)
export.letters = {}
for i, name in ipairs({ 'alef', 'bet', 'gimel', 'dalet', 'hei', 'vav', 'zayen',
	'khet', 'tet', 'yud', 'kafSofit', 'kaf', 'lamed',
	'memSofit', 'mem', 'nunSofit', 'nun', 'samekh', 'ayin',
	'peiSofit', 'pei', 'tsadiSofit', 'tsadi', 'kuf', 'resh',
	'shin', 'tav' }) do
	export.letters[name] = u(0x05D0 + i - 1)
end
export.letters.sin = export.letters.shin

-- Same as previous, but for vowels instead of letters. (It also includes a few
-- marks and diacritics that aren't quite "vowels", but are in the same sequence
-- of Unicode characters.)
export.vowels = {}
for i, name in ipairs({ 'shva', 'khatafSegol', 'khatafPatakh', 'khatafKamats',
	'khirik', 'tseirei', 'segol', 'patakh', 'kamats',
	'kholam', 'ignoreMe', 'kubuts', 'dagesh', 'meteg',
	'makaf', 'rafe', 'pasek', 'shinDot', 'sinDot',
	'sofPasuk', 'upperDot' }) do
	export.vowels[name] = u(0x05B0 + i - 1)
end
export.vowels.ignoreMe = nil
export.vowels.mapik = export.vowels.dagesh

-- If letter is kaf, mem, nun, pei, or tsadi, returns kaf sofit, mem sofit,
-- etc., respectively; otherwise, just returns letter.
function export.otSofit(letter)
	if letter == export.letters.kaf or letter == export.letters.mem
		or letter == export.letters.nun or letter == export.letters.pei
		or letter == export.letters.tsadi then
		return u(codepoint(letter) - 1)
	else
		return letter
	end
end

-- Same as previous, except that if letter is kaf or kaf sofit, also tacks on
-- a sh'va (since kaf sofit is written with a sh'va when it has no other vowel).
function export.otSofitShva(letter)
	letter = export.otSofit(letter)
	if letter == export.letters.kafSofit then
		return letter .. export.vowels.shva
	else
		return letter
	end
end

-- If letter is kaf sofit, mem sofit, etc., returns kaf, mem, etc.; otherwise,
-- just returns letter.
function export.otLoSofit(letter)
	if letter == export.letters.kafSofit or letter == export.letters.memSofit
		or letter == export.letters.nunSofit or letter == export.letters.peiSofit
		or letter == export.letters.tsadiSofit then
		return u(codepoint(letter) + 1)
	else
		return letter
	end
end

-- If letter is bet, gimel, dalet, kaf, pei, or tav, returns letter plus a
-- dagesh; otherwise, just returns letter.
function export.dageshKal(letter)
	if letter == export.letters.bet or letter == export.letters.gimel
		or letter == export.letters.dalet or letter == export.letters.kaf
		or letter == export.letters.pei or letter == export.letters.tav then
		return letter .. export.vowels.dagesh
	else
		return letter
	end
end

-- If letter is shin plus a shin or sin dot, returns shin (without the dot);
-- otherwise, just returns letter.
function export.dotlessShin(letter)
	if letter == export.letters.shin .. export.letters.shinDot
		or letter == export.letters.sin .. export.letters.sinDot then
		return export.letters.shin
	else
		return letter
	end
end

local letters               = "[א-ת]"
local modifiers             = "[ּׁׂׄ]?"
local separators            = "[-־ %.,!|]?"
local regex                 = "(" .. letters .. modifiers .. ")" .. separators

local medial_radicals       = {
	["א"] = true,
	["ב"] = true,
	["ג"] = true,
	["ד"] = true,
	["ה"] = true,
	["הּ"] = false,
	["ו"] = true,
	["ז"] = true,
	["ח"] = true,
	["ט"] = true,
	["י"] = true,
	["כ"] = true,
	["ל"] = true,
	["מ"] = true,
	["נ"] = true,
	["ס"] = true,
	["ע"] = true,
	["פ"] = true,
	["צ"] = true,
	["ק"] = true,
	["ר"] = true,
	["שׁ"] = true,
	["שׂ"] = true,
	["ת"] = true,
}
local initial_radicals      = medial_radicals
local final_radicals        = {
	["א"] = true,
	["ב"] = true,
	["ג"] = true,
	["ד"] = true,
	["ה"] = true,
	["הּ"] = true,
	["ו"] = false,
	["ז"] = true,
	["ח"] = true,
	["ט"] = true,
	["י"] = false,
	["ך"] = true,
	["ל"] = true,
	["ם"] = true,
	["ן"] = true,
	["ס"] = true,
	["ע"] = true,
	["ף"] = true,
	["ץ"] = true,
	["ק"] = true,
	["ר"] = true,
	["שׁ"] = true,
	["שׂ"] = true,
	["ת"] = true,
}

local radical_romanizations = {
	["א"] = "ʾ",
	["ב"] = "b",
	["ג"] = "g",
	["ד"] = "d",
	["ה"] = "h",
	["ו"] = "w",
	["ז"] = "z",
	["ח"] = "kh",
	["ט"] = "ṭ",
	["י"] = "y",
	["[כך]"] = "k",
	["ל"] = "l",
	["[מם]"] = "m",
	["[נן]"] = "n",
	["ס"] = "s",
	["ע"] = "ʿ",
	["[פף]"] = "p",
	["[ץצ]"] = "ts",
	["ק"] = "q",
	["ר"] = "r",
	["שׁ"] = "sh",
	["שׂ"] = "s",
	["ת"] = "t",
}

local function transliterate_root(root_string)
	local romanized_root = root_string:gsub("־", "-")
	for k, v in pairs(radical_romanizations) do
		romanized_root = gsub(romanized_root, k, v)
	end
	return romanized_root
end

function export.plain_root(frame)
	local radicals = {}
	local len = 0
	local subber = function(radical)
		table.insert(radicals, radical)
		len = len + 1
		return ""
	end
	local scraps = gsub(frame.args[1], regex, subber)
	if scraps ~= "" then
		error("Unrecognized characters in root.")
	end
	if len < 2 then
		error("Root must have at least two radicals.")
	end
	for i, radical in ipairs(radicals) do
		if i == 1 then -- initial
			if not initial_radicals[radical] then
				error("Unrecognized initial radical " .. radical .. ".")
			end
		elseif i == len then -- final
			if not final_radicals[radical] then
				error("Unrecognized final radical " .. radical .. ".")
			end
		else -- medial
			if not medial_radicals[radical] then
				error("Unrecognized medial radical " .. radical .. ".")
			end
		end
	end
	return table.concat(radicals, "־")
end

function export.romanized_root(frame)
	local root = export.plain_root(frame)
	return transliterate_root(root)
end

function export.catfix()
	return catfix(lang, sc)
end

local function link(term, alt, tr, id)
	if term == "" or term == "&mdash;" then
		return term
	else
		return m_links.full_link({
			term = term,
			alt = alt,
			tr = tr,
			lang = lang,
			id = id,
		})
	end
end

local function validateRoot(rootTable, joined_root)
	if type(rootTable) ~= "table" then
		error("rootTable is not a table", 2)
	end

	local len = #rootTable

	if len < 2 then
		error("Root must have at least two radicals.")
	end

	for i, radical in ipairs(rootTable) do
		if i == 1 then -- initial
			if not initial_radicals[radical] then
				error("Unrecognized initial radical " .. radical .. " in " .. joined_root)
			end
		elseif i == #rootTable then -- final
			if not final_radicals[radical] then
				error("Unrecognized final radical " .. radical .. " in " .. joined_root)
			end
		else -- medial
			if not medial_radicals[radical] then
				error("Unrecognized medial radical " .. radical .. " in " .. joined_root)
			end
		end
	end
end

function export.root(frame)
	local output = {}
	local categories = {}
	local title = mw.title.getCurrentTitle()
	local fulltitle = title.fullText
	local namespace = title.nsText

	local params = {
		[1] = { list = true },
		["nocat"] = { type = "boolean" },
		["plain"] = { type = "boolean" },
		["notext"] = { type = "boolean" },
		["sense"] = {}
	}

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local rootLetters = {}
	local roots = args[1]

	local roots_len = #roots

	if #roots == 0 and namespace == "Template" then
		table.insert(rootLetters, { "כ", "ת", "ב" })
	elseif roots then
		for _, root in ipairs(roots) do
			table.insert(rootLetters, rsplit(root, "־"))
		end
	else
		table.insert(rootLetters, rsplit(fulltitle, "־"))
	end

	local joined_roots = {}
	for i, rootLetter in ipairs(rootLetters) do
		table.insert(joined_roots, table.concat(rootLetter, "־"))
		validateRoot(rootLetter, joined_roots[i])
	end

	local sense = args["sense"]
	local sense_formatted = ""
	if sense ~= nil then
		sense_formatted = " (" .. sense .. ") "
	end

	if fulltitle == joined_roots[1] then
		if roots_len > 1 then
			error("There should be only one root.")
		end

		table.insert(output,
			m_headword.full_headword({ lang = lang, pos_category = "roots", categories = {}, heads = { fulltitle }, nomultiwordcat = true }))

		if args["nocat"] then
			return table.concat(output)
		else
			return table.concat(output) .. table.concat(categories)
		end
	else
		local link_texts = {}
		local term_counts = {}

		for i, joined_root in ipairs(joined_roots) do
			table.insert(link_texts,
				link(joined_root, joined_root .. sense_formatted, transliterate_root(joined_root), sense))
			table.insert(
				categories,
				m_utilities.format_categories(
					{ lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted },
					lang)
			)
			table.insert(term_counts,
				mw.site.stats.pagesInCategory(
					lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted, "pages")
			)
		end

		if args["nocat"] or args["plain"] then
			return table.concat(link_texts, ", ")
		else
			local link_text_output = ""
			for i, link_text in ipairs(link_texts) do
				link_text_output = link_text_output .. '<tr><td>' ..
					link_text ..
					"</td></tr><tr><td>[[:Category:" ..
					lang:getCanonicalName() ..
					" terms belonging to the root " ..
					joined_roots[i] ..
					sense_formatted ..
					"|" ..
					term_counts[i] ..
					" term" .. (term_counts[i] == 1 and "" or "s") .. "]]</td></tr>"
			end
			return
				'<table class="wikitable" style="float: right; clear: right; text-align: center;"><tr><th>[[w:Semitic root|Root' ..
				(#term_counts == 1 and "" or "s") .. ']]</th></tr>' .. link_text_output ..
				'</table>' .. table.concat(categories)
		end
	end
end

return export