Jump to content

Module:sem-eth-utilities

From Wiktionary, the free dictionary


-- Module:gez-utilities
-- Author: Weshyaunt
-- Based on Module:sem-arb-utilities by Fenakhay, Sept 2023

local export = {}

local m_utilities = require("Module:utilities")
local m_links = require("Module:links")
local m_headword = require("Module:headword")

local m_str_utils = require("Module:string utilities")
local u = m_str_utils.char
local ulen = m_str_utils.len
local usub = m_str_utils.sub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub

-- geez diacritics
local XD = u(0x135F) -- combining gemination points above the character

-- letters and numerals      
local QF6    = u(0x1245) -- 6kˀāf ቅ, 1s 2s 2p preterite assimilation
local QWF6   = u(0x124A) -- 6kˀʷāf ቊ, 1s 2s 2p preterite assimilation
local KF6    = u(0x12AD) -- 6kāf ክ, 1s 2s 2p preterite assimilation
local KWF6   = u(0x12B5) -- 6kʷāf ኵ, 1s 2s 2p preterite assimilation
local GL6    = u(0x130D) -- 6gaməl ግ, 1s 2s 2p preterite assimilation
local GL6    = u(0x1315) -- 6gʷaməl ጕ, 1s 2s 2p preterite assimilation
local HYPHEN = u(0x0640)
local NS6    = u(0x1295) -- 6nahās = ን, 1p preterite assimilation
local WW6    = u(0x12CD) -- 6wāw = ው, weak consonant
local YN6    = u(0x12ED) -- 6yaman = ይ, weak consonant
local HY6    = u(0x1205) -- 6hoy = ህ, velar continuant or post-velar, vowel changes
local HT6    = u(0x1215) -- 6ħawt = ሕ, velar continuant or post-velar, vowel changes
local XM6    = u(0x1285) -- 6xam  = ኅ, velar continuant or post-velar, vowel changes
local XWM6   = u(0x128D) -- 6xʷam = ኍ, velar continuant or post-velar, vowel changes
local AF6    = u(0x12A5) -- 6ʔalf = እ, velar continuant or post-velar, vowel changes
local AN6    = u(0x12D5) -- 6ʕayn = ዕ, velar continuant or post-velar, vowel changes
local AF1    = u(0x12A0) -- 1ʔalf = አ, velar continuant or post-velar, vowel changes
local TW1	 = u(0x1270) -- 1taw = ተ, premorph for medio-passive stems
local TW4	 = u(0x1273) -- 4taw = ታ, variant premorph for medio-passive stems before 6th-series velar cont/post-velar
local ST6	 = u(0x1235) -- 4sat = ስ, for S stem in asta-, astaa-
local NN6	 = u(0x1295) -- 6nun = ን, for Nt stem

local SER1	 = "ሀለሐመሠረሰቀቈበተኀኈነአከኰወዐዘየደገጐጠጰጸፀፈፐ" -- radicals with series 1 vowel a
local SER2	 = "ሁሉሑሙሡሩሱቁቡቱኁኑኡኩዉዑዙዩዱጉጡጱጹፁፉፑ" -- radicals with series 2 vowel u
local SER3	 = "ሂሊሒሚሢሪሲቂቊቢቲኂኊኒኢኪኲዊዒዚዪዲጊጒጢጲጺፂፊፒ" -- radicals with series 3 vowel i
local SER4	 = "ሃላሓማሣራሳቃቋባታኃኋናኣካኳዋዓዛያዳጋጓጣጳጻፃፋፓ" -- radicals with series 4 vowel ā
local SER5   = "ሄሌሔሜሤሬሴቄቌቤቴኄኌኔኤኬኴዌዔዜዬዴጌጔጤጴጼፄፌፔ" -- radicals with series 5 vowel e
local SER6   = "ህልሕምሥርስቅቍብትኅኍንእክኵውዕዝይድግጕጥጵጽፅፍፕ" -- radicals with series 6 vowel ə or no vowel 
local SER7   = "ሆሎሖሞሦሮሶቆቦቶኆኖኦኮዎዖዞዮዶጎጦጶጾፆፎፖ" -- radicals with series 7 vowel u
local ALL	 = XD .. SER1 .. SER2 .. SER3 .. SER4 .. SER5 .. SER6 .. SER7 --all the allowable letters

-- syllabogram tables arranged by consonant
local HOY	 = {"ሀ", "ሁ", "ሂ", "ሃ", "ሄ", "ህ", "ሆ"} -- hoy
local LAW	 = {"ለ", "ሉ", "ሊ", "ላ", "ሌ", "ል", "ሎ"} -- law
local HAWT	 = {"ሐ", "ሑ", "ሒ", "ሓ", "ሔ", "ሕ", "ሖ"} -- hawt
local MAY	 = {"መ", "ሙ", "ሚ", "ማ", "ሜ", "ም", "ሞ"} -- may
local SAWT	 = {"ሠ", "ሡ", "ሢ", "ሣ", "ሤ", "ሥ", "ሦ"} -- sawt
local REES	 = {"ረ", "ሩ", "ሪ", "ራ", "ሬ", "ር", "ሮ"} -- rees
local SAT	 = {"ሰ", "ሱ", "ሲ", "ሳ", "ሴ", "ስ", "ሶ"} -- sat
local QAF	 = {"ቀ", "ቁ", "ቂ", "ቃ", "ቄ", "ቅ", "ቆ"} -- qaf
local QWAF	 = {"ቈ", "ቊ", "ቋ", "ቌ", "ቍ"} -- qwaf
local BET	 = {"በ", "ቢ", "ቡ", "ባ", "ቤ", "ብ", "ቦ"} -- bet
local TAW	 = {"ተ", "ቲ", "ቱ", "ታ", "ቴ", "ት", "ቶ"} -- taw
local HARM	 = {"ኀ", "ኁ", "ኂ", "ኃ", "ኄ", "ኅ", "ኆ"} -- harm
local HWARM	 = {"ኈ", "ኊ", "ኋ", "ኌ", "ኍ"} -- hwarm
local NAHAS	 = {"ነ", "ኑ", "ኒ", "ና", "ኔ", "ን", "ኖ"} -- nahas, triggers total assimilation with 1p preterite suffix -na
local ALF	 = {"አ", "ኡ", "ኢ", "ኣ", "ኤ", "እ", "ኦ"} -- alf
local KAF	 = {"ከ", "ኩ", "ኪ", "ካ", "ኬ", "ክ", "ኮ"} -- kaf
local KWAF	 = {"ኰ", "ኲ", "ኳ", "ኴ", "ኵ"} -- kwaf
local WAW	 = {"ወ", "ዉ", "ዊ", "ዋ", "ዌ", "ው", "ዎ"} -- waw
local AYN	 = {"ዐ", "ዑ", "ዒ", "ዓ", "ዔ", "ዕ", "ዖ"} -- ayn
local ZAY	 = {"ዘ", "ዙ", "ዚ", "ዛ", "ዜ", "ዝ", "ዞ"} -- zay
local YAMAN	 = {"የ", "ዩ", "ዪ", "ያ", "ዬ", "ይ", "ዮ"} -- yaman
local DENT	 = {"ደ", "ዱ", "ዲ", "ዳ", "ዴ", "ድ", "ዶ"} -- dent
local GAML	 = {"ገ", "ጉ", "ጊ", "ጋ", "ጌ", "ግ", "ጎ"} -- gaml
local GWAML  = {"ጐ", "ጒ", "ጓ", "ጔ", "ጕ"} -- gwaml
local TAYT	 = {"ጠ", "ጡ", "ጢ", "ጣ", "ጤ", "ጥ", "ጦ"} -- tayt
local PAYT	 = {"ጰ", "ጱ", "ጲ", "ጳ", "ጴ", "ጵ", "ጶ"} -- payt
local SADAY	 = {"ጸ", "ጹ", "ጺ", "ጻ", "ጼ", "ጽ", "ጾ"} -- saday
local DAPPA  = {"ፀ", "ፁ", "ፂ", "ፃ", "ፄ", "ፅ", "ፆ"} -- dappa
local AF	 = {"ፈ", "ፉ", "ፊ", "ፋ", "ፌ", "ፍ", "ፎ"} -- af
local PA	 = {"ፐ", "ፑ", "ፒ", "ፓ", "ፔ", "ፕ", "ፖ"} -- pa

-------------------------------------------------------------------------------
--                                Utility functions                          --
-------------------------------------------------------------------------------
-- checks the series to which a syllabogram belongs
local function series_check(letter)
	local sernum = 0
	if rmatch(letter, SER1) then
		sernum = 1
	elseif rmatch(letter, SER2) then
		sernum = 2
	elseif rmatch(letter, SER3) then
		sernum = 3
	elseif rmatch(letter, SER4) then
		sernum = 4
	elseif rmatch(letter, SER5) then
		sernum = 5
	elseif rmatch(letter, SER6) then
		sernum = 6
	elseif rmatch(letter, SER7) then
		sernum = 7
	end
	return sernum
end
-- given a syllabogram, returns the desired syllabogram of the same consonant
-- e.g., given yə, get ya
local function syllabogram(letter, number) 
	local retsyl = ""
	local sernum = series_check(letter)
	
	if letter == HOY[sernum] then
		retsyl = HOY[number]
	elseif letter == LAW[sernum] then
		retsyl = LAW[number]
	elseif letter == HAWT[sernum] then
		retsyl = HAWT[number]
	elseif letter == MAY[sernum] then
		retsyl = MAY[number]
	elseif letter == SAWT[sernum] then
		retsyl = SAWT[number]
	elseif letter == REES[sernum] then
		retsyl = REES[number]
	elseif letter == SAT[sernum] then
		retsyl = SAT[number]
	elseif letter == QAF[sernum] then
		retsyl = QAF[number]
	elseif letter == BET[sernum] then
		retsyl = BET[number]
	elseif letter == TAW[sernum] then
		retsyl = TAW[number]
	elseif letter == HARM[sernum] then
		retsyl = HARM[number]
	elseif letter == NAHAS[sernum] then
		retsyl = NAHAS[number]
	elseif letter == ALF[sernum] then
		retsyl = ALF[number]
	elseif letter == KAF[sernum] then
		retsyl = KAF[number]
	elseif letter == WAW[sernum] then
		retsyl = WAW[number]
	elseif letter == AYN[sernum] then
		retsyl = AYN[number]
	elseif letter == ZAY[sernum] then
		retsyl = ZAY[number]
	elseif letter == YAMAN[sernum] then
		retsyl = YAMAN[number]
	elseif letter == DENT[sernum] then
		retsyl = DENT[number]
	elseif letter == GAML[sernum] then
		retsyl = GAML[number]
	elseif letter == TAYT[sernum] then
		retsyl = TAYT[number]
	elseif letter == PAYT[sernum] then
		retsyl = PAYT[number]
	elseif letter == SADAY[sernum] then
		retsyl = SADAY[number]
	elseif letter == DAPPA[sernum] then
		retsyl = DAPPA[number]
	elseif letter == AF[sernum] then
		retsyl = AF[number]
	elseif letter == PA[sernum] then
		retsyl = PA[number]
	elseif sernum == 1 then
		if number == 1 then
			if letter == QWAF[sernum] then 
				retsyl = QWAF[number]
			elseif letter == HWARM[sernum]  then
				retsyl = HWARM[number]
			elseif letter == KWAF[sernum]  then
				retsyl = KWAF[number]
			elseif letter == GWAF[sernum]  then
				retsyl = GWAF[number]
			end
		else
			if letter == QWAF[sernum] then 
				retsyl = QWAF[number-1]
			elseif letter == HWARM[sernum]  then
				retsyl = HWARM[number-1]
			elseif letter == KWAF[sernum]  then
				retsyl = KWAF[number-1]
			elseif letter == GWAF[sernum]  then
				retsyl = GWAF[number-1]
			end
		end
	elseif sernum > 1 then
		if number == 1 then
			if letter == QWAF[sernum-1] then
				retsyl = QWAF[number]
			elseif letter == HWARM[sernum-1] then
				retsyl = HWARM[number]
			elseif letter == KWAF[sernum-1] then
				retsyl = KWAF[number]
			elseif letter == GWAML[sernum-1] then
				retsyl = GWAML[number]
			end
		else 
			if letter == QWAF[sernum-1] then
				retsyl = QWAF[number-1]
			elseif letter == HWARM[sernum-1] then
				retsyl = HWARM[number-1]
			elseif letter == KWAF[sernum-1] then
				retsyl = KWAF[number-1]
			elseif letter == GWAML[sernum-1] then
				retsyl = GWAML[number-1]
			end
		end
	end
	return retsyl
end

local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub

local separator_langs = { }
local template_preview_per_langcode = {  }
local lang

local function link(term, alt, id)
	if term == "" or term == "—" then
		return term
	else
		return m_links.full_link({
			term = term,
			alt = alt,
			lang = lang,
			id = id,
		})
	end
end

local function validateRoot(rootTable, joined_root)
	if type(rootTable) ~= "table" then
		error("rootTable is not a table", 2)
	end
	
	local len = #rootTable

	if len < 3 then
		error("Root must have at least three radicals.")
	end

	for i, letter in ipairs(rootTable) do
		if mw.ustring.len(letter) > 1 then
			error("'" .. letter .. "', the " .. ordinal[i] ..
				" letter in the root '" .. joined_root ..
				"' should be a single letter.")
		end
	end
end

function export.root(frame)
	local output = {}
	local categories = {}
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
    local fulltitle = title.fullText

	if frame.args["lang"] then
		lang = require("Module:languages").getByCode(frame.args["lang"])
	else
		error("Please provide a language code.")
	end

	local subpage = "Appendix:" .. lang:getCanonicalName() .. " roots/"
	local fulltitle = rsubn(fulltitle, rsubn(subpage, "([^%w])", "%%%1"), "")

	local params = {
		[1] = { list = true },
		["nocat"] = { type = "boolean" },
		["plain"] = { type = "boolean" },
		["notext"] = { type = "boolean" },
		["sense"] = {}
	}

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local rootLetters = {}
	local roots = args[1]
	
	local plain = args["plain"]
	if frame.args["plain"] then
		plain = true
	end

	local langCode = lang:getCode()

	local separator = "-"
	if separator_langs[langCode] then
		separator = " "
	else
		separator = "-"
	end

	local roots_len = #roots

	if #roots == 0 and namespace == "Template" then
		if template_preview_per_langcode[langCode] ~= nil then
			table.insert(rootLetters, rsplit(template_preview_per_langcode[langCode], separator))
		else
			table.insert(rootLetters, rsplit("ከ-ተ-በ", separator))
		end
	elseif #roots ~= 0 then
		for _, root in ipairs(roots) do
			table.insert(rootLetters, rsplit(root, separator))
		end
	else
		table.insert(rootLetters, rsplit(fulltitle, separator))
	end

	local joined_roots = {}
	for i, rootLetter in ipairs(rootLetters) do
		table.insert(joined_roots, table.concat(rootLetter, separator))
		validateRoot(rootLetter, joined_roots[i])
	end

	local sense = args["sense"]
	local sense_formatted = ""
	if sense ~= nil then
		sense_formatted = " (" .. sense .. ") "
	end

	if fulltitle == joined_roots[1] then
		if namespace == "" then
			error("The root page should be in the Appendix namespace. Please move it to : [[" ..
			subpage .. joined_roots[1] .. "]]")
		end

		if roots_len > 1 then
			error("There should be only one root.")
		end

		table.insert(output,
			m_headword.full_headword({ lang = lang, pos_category = "roots", categories = {}, heads = { fulltitle }, nomultiwordcat = true, noposcat = true }))

		if args["nocat"] then
			return table.concat(output)
		else
			return table.concat(output) .. table.concat(categories)
		end
	else
		local link_texts = {}
		local term_counts = {}

		for i, joined_root in ipairs(joined_roots) do
			local link_text = subpage .. joined_root
			table.insert(link_texts, link(link_text, joined_root .. sense_formatted, sense))
			table.insert(
				categories,
				m_utilities.format_categories(
					{ lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted },
					lang)
			)
			table.insert(term_counts,
				mw.site.stats.pagesInCategory(
					lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted, "pages")
			)
		end

		if args["nocat"] or plain then
			return table.concat(link_texts, ", ")
		else
			local link_text_output = ""
			for i, link_text in ipairs(link_texts) do
				link_text_output = link_text_output .. '<tr><td>' ..
					link_text ..
					"</td></tr><tr><td>[[:Category:" ..
					lang:getCanonicalName() ..
					" terms belonging to the root " ..
					joined_roots[i] ..
					sense_formatted ..
					"|" ..
					term_counts[i] ..
					" term" .. (term_counts[i] == 1 and "" or "s") .. "]]</td></tr>"
			end
			return
				'<table class="wikitable" style="float: right; clear: right; text-align: center;"><tr><th>[[w:Semitic root|Root' ..
				(#term_counts == 1 and "" or "s") .. ']]</th></tr>' .. link_text_output ..
				'</table>' .. table.concat(categories)
		end
	end
end

return export