Module:gez-utilities

From Wiktionary, the free dictionary
Jump to navigation Jump to search


-- Module:gez-utilities
-- Author: Weshyaunt
-- Based on Module:sem-arb-utilities by Fenakhay, Sept 2023

local export = {}

local lang = require("Module:languages").getByCode("gez")
local m_utilities = require("Module:utilities")
local m_links = require("Module:links")
local m_headword = require("Module:headword")
local lang = require("Module:languages").getByCode("gez")
local m_str_utils = require("Module:string utilities")
local u = m_str_utils.char
local ulen = m_str_utils.len
local usub = m_str_utils.sub
local rmatch = m_str_utils.match
local rsplit = mw.text.split

-- geez diacritics
local XD = u(0x135F) -- combining gemination points above the character

-- letters and numerals      
local QF6    = u(0x1245) -- 6kˀāf ቅ, 1s 2s 2p preterite assimilation
local QWF6   = u(0x124A) -- 6kˀʷāf ቊ, 1s 2s 2p preterite assimilation
local KF6    = u(0x12AD) -- 6kāf ክ, 1s 2s 2p preterite assimilation
local KWF6   = u(0x12B5) -- 6kʷāf ኵ, 1s 2s 2p preterite assimilation
local GL6    = u(0x130D) -- 6gaməl ግ, 1s 2s 2p preterite assimilation
local GL6    = u(0x1315) -- 6gʷaməl ጕ, 1s 2s 2p preterite assimilation
local HYPHEN = u(0x0640)
local NS6    = u(0x1295) -- 6nahās = ን, 1p preterite assimilation
local WW6    = u(0x12CD) -- 6wāw = ው, weak consonant
local YN6    = u(0x12ED) -- 6yaman = ይ, weak consonant
local HY6    = u(0x1205) -- 6hoy = ህ, velar continuant or post-velar, vowel changes
local HT6    = u(0x1215) -- 6ħawt = ሕ, velar continuant or post-velar, vowel changes
local XM6    = u(0x1285) -- 6xam  = ኅ, velar continuant or post-velar, vowel changes
local XWM6   = u(0x128D) -- 6xʷam = ኍ, velar continuant or post-velar, vowel changes
local AF6    = u(0x12A5) -- 6ʔalf = እ, velar continuant or post-velar, vowel changes
local AN6    = u(0x12D5) -- 6ʕayn = ዕ, velar continuant or post-velar, vowel changes
local AF1    = u(0x12A0) -- 1ʔalf = አ, velar continuant or post-velar, vowel changes
local TW1	 = u(0x1270) -- 1taw = ተ, premorph for medio-passive stems
local TW4	 = u(0x1273) -- 4taw = ታ, variant premorph for medio-passive stems before 6th-series velar cont/post-velar
local ST6	 = u(0x1235) -- 4sat = ስ, for S stem in asta-, astaa-
local NN6	 = u(0x1295) -- 6nun = ን, for Nt stem

local SER1	 = "ሀለሐመሠረሰቀቈበተኀኈነአከኰወዐዘየደገጐጠጰጸፀፈፐ" -- radicals with series 1 vowel a
local SER2	 = "ሁሉሑሙሡሩሱቁቡቱኁኑኡኩዉዑዙዩዱጉጡጱጹፁፉፑ" -- radicals with series 2 vowel u
local SER3	 = "ሂሊሒሚሢሪሲቂቊቢቲኂኊኒኢኪኲዊዒዚዪዲጊጒጢጲጺፂፊፒ" -- radicals with series 3 vowel i
local SER4	 = "ሃላሓማሣራሳቃቋባታኃኋናኣካኳዋዓዛያዳጋጓጣጳጻፃፋፓ" -- radicals with series 4 vowel ā
local SER5   = "ሄሌሔሜሤሬሴቄቌቤቴኄኌኔኤኬኴዌዔዜዬዴጌጔጤጴጼፄፌፔ" -- radicals with series 5 vowel e
local SER6   = "ህልሕምሥርስቅቍብትኅኍንእክኵውዕዝይድግጕጥጵጽፅፍፕ" -- radicals with series 6 vowel ə or no vowel 
local SER7   = "ሆሎሖሞሦሮሶቆቦቶኆኖኦኮዎዖዞዮዶጎጦጶጾፆፎፖ" -- radicals with series 7 vowel u
local ALL	 = XD .. SER1 .. SER2 .. SER3 .. SER4 .. SER5 .. SER6 .. SER7 --all the allowable letters

-- syllabogram tables arranged by consonant
local HOY	 = {"ሀ", "ሁ", "ሂ", "ሃ", "ሄ", "ህ", "ሆ"} -- hoy
local LAW	 = {"ለ", "ሉ", "ሊ", "ላ", "ሌ", "ል", "ሎ"} -- law
local HAWT	 = {"ሐ", "ሑ", "ሒ", "ሓ", "ሔ", "ሕ", "ሖ"} -- hawt
local MAY	 = {"መ", "ሙ", "ሚ", "ማ", "ሜ", "ም", "ሞ"} -- may
local SAWT	 = {"ሠ", "ሡ", "ሢ", "ሣ", "ሤ", "ሥ", "ሦ"} -- sawt
local REES	 = {"ረ", "ሩ", "ሪ", "ራ", "ሬ", "ር", "ሮ"} -- rees
local SAT	 = {"ሰ", "ሱ", "ሲ", "ሳ", "ሴ", "ስ", "ሶ"} -- sat
local QAF	 = {"ቀ", "ቁ", "ቂ", "ቃ", "ቄ", "ቅ", "ቆ"} -- qaf
local QWAF	 = {"ቈ", "ቊ", "ቋ", "ቌ", "ቍ"} -- qwaf
local BET	 = {"በ", "ቢ", "ቡ", "ባ", "ቤ", "ብ", "ቦ"} -- bet
local TAW	 = {"ተ", "ቲ", "ቱ", "ታ", "ቴ", "ት", "ቶ"} -- taw
local HARM	 = {"ኀ", "ኁ", "ኂ", "ኃ", "ኄ", "ኅ", "ኆ"} -- harm
local HWARM	 = {"ኈ", "ኊ", "ኋ", "ኌ", "ኍ"} -- hwarm
local NAHAS	 = {"ነ", "ኑ", "ኒ", "ና", "ኔ", "ን", "ኖ"} -- nahas, triggers total assimilation with 1p preterite suffix -na
local ALF	 = {"አ", "ኡ", "ኢ", "ኣ", "ኤ", "እ", "ኦ"} -- alf
local KAF	 = {"ከ", "ኩ", "ኪ", "ካ", "ኬ", "ክ", "ኮ"} -- kaf
local KWAF	 = {"ኰ", "ኲ", "ኳ", "ኴ", "ኵ"} -- kwaf
local WAW	 = {"ወ", "ዉ", "ዊ", "ዋ", "ዌ", "ው", "ዎ"} -- waw
local AYN	 = {"ዐ", "ዑ", "ዒ", "ዓ", "ዔ", "ዕ", "ዖ"} -- ayn
local ZAY	 = {"ዘ", "ዙ", "ዚ", "ዛ", "ዜ", "ዝ", "ዞ"} -- zay
local YAMAN	 = {"የ", "ዩ", "ዪ", "ያ", "ዬ", "ይ", "ዮ"} -- yaman
local DENT	 = {"ደ", "ዱ", "ዲ", "ዳ", "ዴ", "ድ", "ዶ"} -- dent
local GAML	 = {"ገ", "ጉ", "ጊ", "ጋ", "ጌ", "ግ", "ጎ"} -- gaml
local GWAML  = {"ጐ", "ጒ", "ጓ", "ጔ", "ጕ"} -- gwaml
local TAYT	 = {"ጠ", "ጡ", "ጢ", "ጣ", "ጤ", "ጥ", "ጦ"} -- tayt
local PAYT	 = {"ጰ", "ጱ", "ጲ", "ጳ", "ጴ", "ጵ", "ጶ"} -- payt
local SADAY	 = {"ጸ", "ጹ", "ጺ", "ጻ", "ጼ", "ጽ", "ጾ"} -- saday
local DAPPA  = {"ፀ", "ፁ", "ፂ", "ፃ", "ፄ", "ፅ", "ፆ"} -- dappa
local AF	 = {"ፈ", "ፉ", "ፊ", "ፋ", "ፌ", "ፍ", "ፎ"} -- af
local PA	 = {"ፐ", "ፑ", "ፒ", "ፓ", "ፔ", "ፕ", "ፖ"} -- pa

-------------------------------------------------------------------------------
--                                Utility functions                          --
-------------------------------------------------------------------------------
-- checks the series to which a syllabogram belongs
local function series_check(letter)
	local sernum = 0
	if rmatch(letter, SER1) then
		sernum = 1
	elseif rmatch(letter, SER2) then
		sernum = 2
	elseif rmatch(letter, SER3) then
		sernum = 3
	elseif rmatch(letter, SER4) then
		sernum = 4
	elseif rmatch(letter, SER5) then
		sernum = 5
	elseif rmatch(letter, SER6) then
		sernum = 6
	elseif rmatch(letter, SER7) then
		sernum = 7
	end
	return sernum
end
-- given a syllabogram, returns the desired syllabogram of the same consonant
-- e.g., given yə, get ya
local function syllabogram(letter, number) 
	local retsyl = ""
	local sernum = series_check(letter)
	
	if letter == HOY[sernum] then
		retsyl = HOY[number]
	elseif letter == LAW[sernum] then
		retsyl = LAW[number]
	elseif letter == HAWT[sernum] then
		retsyl = HAWT[number]
	elseif letter == MAY[sernum] then
		retsyl = MAY[number]
	elseif letter == SAWT[sernum] then
		retsyl = SAWT[number]
	elseif letter == REES[sernum] then
		retsyl = REES[number]
	elseif letter == SAT[sernum] then
		retsyl = SAT[number]
	elseif letter == QAF[sernum] then
		retsyl = QAF[number]
	elseif letter == BET[sernum] then
		retsyl = BET[number]
	elseif letter == TAW[sernum] then
		retsyl = TAW[number]
	elseif letter == HARM[sernum] then
		retsyl = HARM[number]
	elseif letter == NAHAS[sernum] then
		retsyl = NAHAS[number]
	elseif letter == ALF[sernum] then
		retsyl = ALF[number]
	elseif letter == KAF[sernum] then
		retsyl = KAF[number]
	elseif letter == WAW[sernum] then
		retsyl = WAW[number]
	elseif letter == AYN[sernum] then
		retsyl = AYN[number]
	elseif letter == ZAY[sernum] then
		retsyl = ZAY[number]
	elseif letter == YAMAN[sernum] then
		retsyl = YAMAN[number]
	elseif letter == DENT[sernum] then
		retsyl = DENT[number]
	elseif letter == GAML[sernum] then
		retsyl = GAML[number]
	elseif letter == TAYT[sernum] then
		retsyl = TAYT[number]
	elseif letter == PAYT[sernum] then
		retsyl = PAYT[number]
	elseif letter == SADAY[sernum] then
		retsyl = SADAY[number]
	elseif letter == DAPPA[sernum] then
		retsyl = DAPPA[number]
	elseif letter == AF[sernum] then
		retsyl = AF[number]
	elseif letter == PA[sernum] then
		retsyl = PA[number]
	elseif sernum == 1 then
		if number == 1 then
			if letter == QWAF[sernum] then 
				retsyl = QWAF[number]
			elseif letter == HWARM[sernum]  then
				retsyl = HWARM[number]
			elseif letter == KWAF[sernum]  then
				retsyl = KWAF[number]
			elseif letter == GWAF[sernum]  then
				retsyl = GWAF[number]
			end
		else
			if letter == QWAF[sernum] then 
				retsyl = QWAF[number-1]
			elseif letter == HWARM[sernum]  then
				retsyl = HWARM[number-1]
			elseif letter == KWAF[sernum]  then
				retsyl = KWAF[number-1]
			elseif letter == GWAF[sernum]  then
				retsyl = GWAF[number-1]
			end
		end
	elseif sernum > 1 then
		if number == 1 then
			if letter == QWAF[sernum-1] then
				retsyl = QWAF[number]
			elseif letter == HWARM[sernum-1] then
				retsyl = HWARM[number]
			elseif letter == KWAF[sernum-1] then
				retsyl = KWAF[number]
			elseif letter == GWAML[sernum-1] then
				retsyl = GWAML[number]
			end
		else 
			if letter == QWAF[sernum-1] then
				retsyl = QWAF[number-1]
			elseif letter == HWARM[sernum-1] then
				retsyl = HWARM[number-1]
			elseif letter == KWAF[sernum-1] then
				retsyl = KWAF[number-1]
			elseif letter == GWAML[sernum-1] then
				retsyl = GWAML[number-1]
			end
		end
	end
	return retsyl
end

local function link(term, alt, id)
	if word == "" or word == "—" then
		return word
	else
		return m_links.full_link({
			term = term,
			alt = alt,
			lang = lang,
			id = id,
		}, face)
	end
end

local function validateRoot(rootTable, joined_root)
	if type(rootTable) ~= "table" then
		error("rootTable is not a table", 2)
	end

	for i, letter in ipairs(rootTable) do
		local letlength = mw.ustring.len(letter)
		if letlength > 1 then
            error("The letter ".. letter .. ", which is the " .. ordinal[i] ..
                      " letter in the root '" .. joined_root ..
                      "' should be a single letter.")
        end
	end
end

-- function to split a string and create a table with separators
function mysplit(inputstr, sep)
  if sep == nil then
    sep = "%s"
  end
  local t = {}
  for str in string.gmatch(inputstr, "([^"..sep.."]+)") do
    table.insert(t, str)
  end
  return t
end

function export.root(frame)
	local output = {}
	local categories = {}
	local title = mw.title.getCurrentTitle()
	local fulltitle = title.fullText
	local namespace = title.nsText
	local separator = "-"

	local params = {
		[1] = {},
		["nocat"] = { type = "boolean" },
		["plain"] = { type = "boolean" },
		["notext"] = { type = "boolean" },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args,
			params)

	local rootLetters = {}
	local tempLetters = {}
	
	-- split string, iterate through string to remove all non-Geez characters and convert each Geez character to sixth series
	if not args[1] and namespace == "Template" then
		args[1] = "ከ-ተ-በ"
	end
	
	local link_text
	link_text = link(args[1])
	table.insert(output, link_text)
	table.insert(categories, m_utilities.format_categories(
		{"Ge'ez terms belonging to the root " .. args[1]}, gez))
		if args["nocat"] or args["plain"] then
			return table.concat(output)
		else
			local term_count = mw.site.stats.pagesInCategory("Ge'ez terms belonging to the root " .. args[1], "pages")
			return "<table class=\"wikitable\" style=\"float: right; clear: right; text-align: center;\"><tr><th>[[w:Semitic root|Root]]</th></tr><tr><td>" .. link_text .. "</td></tr><tr><td>[[:Category:" .. lang:getCanonicalName() .. " terms belonging to the root " .. args[1] .. "|" .. term_count .. " term" .. (term_count == 1 and "" or "s") .. "]]</td></tr></table>" .. table.concat(categories)
		end
end

return export