Module:sem-eth-utilities
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
-- Module:gez-utilities
-- Author: Weshyaunt
-- Based on Module:sem-arb-utilities by Fenakhay, Sept 2023
local export = {}
local m_utilities = require("Module:utilities")
local m_links = require("Module:links")
local m_headword = require("Module:headword")
local m_str_utils = require("Module:string utilities")
local u = m_str_utils.char
local ulen = m_str_utils.len
local usub = m_str_utils.sub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
-- geez diacritics
local XD = u(0x135F) -- combining gemination points above the character
-- letters and numerals
local QF6 = u(0x1245) -- 6kˀāf ቅ, 1s 2s 2p preterite assimilation
local QWF6 = u(0x124A) -- 6kˀʷāf ቊ, 1s 2s 2p preterite assimilation
local KF6 = u(0x12AD) -- 6kāf ክ, 1s 2s 2p preterite assimilation
local KWF6 = u(0x12B5) -- 6kʷāf ኵ, 1s 2s 2p preterite assimilation
local GL6 = u(0x130D) -- 6gaməl ግ, 1s 2s 2p preterite assimilation
local GL6 = u(0x1315) -- 6gʷaməl ጕ, 1s 2s 2p preterite assimilation
local HYPHEN = u(0x0640)
local NS6 = u(0x1295) -- 6nahās = ን, 1p preterite assimilation
local WW6 = u(0x12CD) -- 6wāw = ው, weak consonant
local YN6 = u(0x12ED) -- 6yaman = ይ, weak consonant
local HY6 = u(0x1205) -- 6hoy = ህ, velar continuant or post-velar, vowel changes
local HT6 = u(0x1215) -- 6ħawt = ሕ, velar continuant or post-velar, vowel changes
local XM6 = u(0x1285) -- 6xam = ኅ, velar continuant or post-velar, vowel changes
local XWM6 = u(0x128D) -- 6xʷam = ኍ, velar continuant or post-velar, vowel changes
local AF6 = u(0x12A5) -- 6ʔalf = እ, velar continuant or post-velar, vowel changes
local AN6 = u(0x12D5) -- 6ʕayn = ዕ, velar continuant or post-velar, vowel changes
local AF1 = u(0x12A0) -- 1ʔalf = አ, velar continuant or post-velar, vowel changes
local TW1 = u(0x1270) -- 1taw = ተ, premorph for medio-passive stems
local TW4 = u(0x1273) -- 4taw = ታ, variant premorph for medio-passive stems before 6th-series velar cont/post-velar
local ST6 = u(0x1235) -- 4sat = ስ, for S stem in asta-, astaa-
local NN6 = u(0x1295) -- 6nun = ን, for Nt stem
local SER1 = "ሀለሐመሠረሰቀቈበተኀኈነአከኰወዐዘየደገጐጠጰጸፀፈፐ" -- radicals with series 1 vowel a
local SER2 = "ሁሉሑሙሡሩሱቁቡቱኁኑኡኩዉዑዙዩዱጉጡጱጹፁፉፑ" -- radicals with series 2 vowel u
local SER3 = "ሂሊሒሚሢሪሲቂቊቢቲኂኊኒኢኪኲዊዒዚዪዲጊጒጢጲጺፂፊፒ" -- radicals with series 3 vowel i
local SER4 = "ሃላሓማሣራሳቃቋባታኃኋናኣካኳዋዓዛያዳጋጓጣጳጻፃፋፓ" -- radicals with series 4 vowel ā
local SER5 = "ሄሌሔሜሤሬሴቄቌቤቴኄኌኔኤኬኴዌዔዜዬዴጌጔጤጴጼፄፌፔ" -- radicals with series 5 vowel e
local SER6 = "ህልሕምሥርስቅቍብትኅኍንእክኵውዕዝይድግጕጥጵጽፅፍፕ" -- radicals with series 6 vowel ə or no vowel
local SER7 = "ሆሎሖሞሦሮሶቆቦቶኆኖኦኮዎዖዞዮዶጎጦጶጾፆፎፖ" -- radicals with series 7 vowel u
local ALL = XD .. SER1 .. SER2 .. SER3 .. SER4 .. SER5 .. SER6 .. SER7 --all the allowable letters
-- syllabogram tables arranged by consonant
local HOY = {"ሀ", "ሁ", "ሂ", "ሃ", "ሄ", "ህ", "ሆ"} -- hoy
local LAW = {"ለ", "ሉ", "ሊ", "ላ", "ሌ", "ል", "ሎ"} -- law
local HAWT = {"ሐ", "ሑ", "ሒ", "ሓ", "ሔ", "ሕ", "ሖ"} -- hawt
local MAY = {"መ", "ሙ", "ሚ", "ማ", "ሜ", "ም", "ሞ"} -- may
local SAWT = {"ሠ", "ሡ", "ሢ", "ሣ", "ሤ", "ሥ", "ሦ"} -- sawt
local REES = {"ረ", "ሩ", "ሪ", "ራ", "ሬ", "ር", "ሮ"} -- rees
local SAT = {"ሰ", "ሱ", "ሲ", "ሳ", "ሴ", "ስ", "ሶ"} -- sat
local QAF = {"ቀ", "ቁ", "ቂ", "ቃ", "ቄ", "ቅ", "ቆ"} -- qaf
local QWAF = {"ቈ", "ቊ", "ቋ", "ቌ", "ቍ"} -- qwaf
local BET = {"በ", "ቢ", "ቡ", "ባ", "ቤ", "ብ", "ቦ"} -- bet
local TAW = {"ተ", "ቲ", "ቱ", "ታ", "ቴ", "ት", "ቶ"} -- taw
local HARM = {"ኀ", "ኁ", "ኂ", "ኃ", "ኄ", "ኅ", "ኆ"} -- harm
local HWARM = {"ኈ", "ኊ", "ኋ", "ኌ", "ኍ"} -- hwarm
local NAHAS = {"ነ", "ኑ", "ኒ", "ና", "ኔ", "ን", "ኖ"} -- nahas, triggers total assimilation with 1p preterite suffix -na
local ALF = {"አ", "ኡ", "ኢ", "ኣ", "ኤ", "እ", "ኦ"} -- alf
local KAF = {"ከ", "ኩ", "ኪ", "ካ", "ኬ", "ክ", "ኮ"} -- kaf
local KWAF = {"ኰ", "ኲ", "ኳ", "ኴ", "ኵ"} -- kwaf
local WAW = {"ወ", "ዉ", "ዊ", "ዋ", "ዌ", "ው", "ዎ"} -- waw
local AYN = {"ዐ", "ዑ", "ዒ", "ዓ", "ዔ", "ዕ", "ዖ"} -- ayn
local ZAY = {"ዘ", "ዙ", "ዚ", "ዛ", "ዜ", "ዝ", "ዞ"} -- zay
local YAMAN = {"የ", "ዩ", "ዪ", "ያ", "ዬ", "ይ", "ዮ"} -- yaman
local DENT = {"ደ", "ዱ", "ዲ", "ዳ", "ዴ", "ድ", "ዶ"} -- dent
local GAML = {"ገ", "ጉ", "ጊ", "ጋ", "ጌ", "ግ", "ጎ"} -- gaml
local GWAML = {"ጐ", "ጒ", "ጓ", "ጔ", "ጕ"} -- gwaml
local TAYT = {"ጠ", "ጡ", "ጢ", "ጣ", "ጤ", "ጥ", "ጦ"} -- tayt
local PAYT = {"ጰ", "ጱ", "ጲ", "ጳ", "ጴ", "ጵ", "ጶ"} -- payt
local SADAY = {"ጸ", "ጹ", "ጺ", "ጻ", "ጼ", "ጽ", "ጾ"} -- saday
local DAPPA = {"ፀ", "ፁ", "ፂ", "ፃ", "ፄ", "ፅ", "ፆ"} -- dappa
local AF = {"ፈ", "ፉ", "ፊ", "ፋ", "ፌ", "ፍ", "ፎ"} -- af
local PA = {"ፐ", "ፑ", "ፒ", "ፓ", "ፔ", "ፕ", "ፖ"} -- pa
-------------------------------------------------------------------------------
-- Utility functions --
-------------------------------------------------------------------------------
-- checks the series to which a syllabogram belongs
local function series_check(letter)
local sernum = 0
if rmatch(letter, SER1) then
sernum = 1
elseif rmatch(letter, SER2) then
sernum = 2
elseif rmatch(letter, SER3) then
sernum = 3
elseif rmatch(letter, SER4) then
sernum = 4
elseif rmatch(letter, SER5) then
sernum = 5
elseif rmatch(letter, SER6) then
sernum = 6
elseif rmatch(letter, SER7) then
sernum = 7
end
return sernum
end
-- given a syllabogram, returns the desired syllabogram of the same consonant
-- e.g., given yə, get ya
local function syllabogram(letter, number)
local retsyl = ""
local sernum = series_check(letter)
if letter == HOY[sernum] then
retsyl = HOY[number]
elseif letter == LAW[sernum] then
retsyl = LAW[number]
elseif letter == HAWT[sernum] then
retsyl = HAWT[number]
elseif letter == MAY[sernum] then
retsyl = MAY[number]
elseif letter == SAWT[sernum] then
retsyl = SAWT[number]
elseif letter == REES[sernum] then
retsyl = REES[number]
elseif letter == SAT[sernum] then
retsyl = SAT[number]
elseif letter == QAF[sernum] then
retsyl = QAF[number]
elseif letter == BET[sernum] then
retsyl = BET[number]
elseif letter == TAW[sernum] then
retsyl = TAW[number]
elseif letter == HARM[sernum] then
retsyl = HARM[number]
elseif letter == NAHAS[sernum] then
retsyl = NAHAS[number]
elseif letter == ALF[sernum] then
retsyl = ALF[number]
elseif letter == KAF[sernum] then
retsyl = KAF[number]
elseif letter == WAW[sernum] then
retsyl = WAW[number]
elseif letter == AYN[sernum] then
retsyl = AYN[number]
elseif letter == ZAY[sernum] then
retsyl = ZAY[number]
elseif letter == YAMAN[sernum] then
retsyl = YAMAN[number]
elseif letter == DENT[sernum] then
retsyl = DENT[number]
elseif letter == GAML[sernum] then
retsyl = GAML[number]
elseif letter == TAYT[sernum] then
retsyl = TAYT[number]
elseif letter == PAYT[sernum] then
retsyl = PAYT[number]
elseif letter == SADAY[sernum] then
retsyl = SADAY[number]
elseif letter == DAPPA[sernum] then
retsyl = DAPPA[number]
elseif letter == AF[sernum] then
retsyl = AF[number]
elseif letter == PA[sernum] then
retsyl = PA[number]
elseif sernum == 1 then
if number == 1 then
if letter == QWAF[sernum] then
retsyl = QWAF[number]
elseif letter == HWARM[sernum] then
retsyl = HWARM[number]
elseif letter == KWAF[sernum] then
retsyl = KWAF[number]
elseif letter == GWAF[sernum] then
retsyl = GWAF[number]
end
else
if letter == QWAF[sernum] then
retsyl = QWAF[number-1]
elseif letter == HWARM[sernum] then
retsyl = HWARM[number-1]
elseif letter == KWAF[sernum] then
retsyl = KWAF[number-1]
elseif letter == GWAF[sernum] then
retsyl = GWAF[number-1]
end
end
elseif sernum > 1 then
if number == 1 then
if letter == QWAF[sernum-1] then
retsyl = QWAF[number]
elseif letter == HWARM[sernum-1] then
retsyl = HWARM[number]
elseif letter == KWAF[sernum-1] then
retsyl = KWAF[number]
elseif letter == GWAML[sernum-1] then
retsyl = GWAML[number]
end
else
if letter == QWAF[sernum-1] then
retsyl = QWAF[number-1]
elseif letter == HWARM[sernum-1] then
retsyl = HWARM[number-1]
elseif letter == KWAF[sernum-1] then
retsyl = KWAF[number-1]
elseif letter == GWAML[sernum-1] then
retsyl = GWAML[number-1]
end
end
end
return retsyl
end
local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local separator_langs = { }
local template_preview_per_langcode = { }
local lang
local function link(term, alt, id)
if term == "" or term == "—" then
return term
else
return m_links.full_link({
term = term,
alt = alt,
lang = lang,
id = id,
})
end
end
local function validateRoot(rootTable, joined_root)
if type(rootTable) ~= "table" then
error("rootTable is not a table", 2)
end
local len = #rootTable
if len < 3 then
error("Root must have at least three radicals.")
end
for i, letter in ipairs(rootTable) do
if mw.ustring.len(letter) > 1 then
error("'" .. letter .. "', the " .. ordinal[i] ..
" letter in the root '" .. joined_root ..
"' should be a single letter.")
end
end
end
function export.root(frame)
local output = {}
local categories = {}
local title = mw.title.getCurrentTitle()
local namespace = title.nsText
local fulltitle = title.fullText
if frame.args["lang"] then
lang = require("Module:languages").getByCode(frame.args["lang"])
else
error("Please provide a language code.")
end
local subpage = "Appendix:" .. lang:getCanonicalName() .. " roots/"
local fulltitle = rsubn(fulltitle, rsubn(subpage, "([^%w])", "%%%1"), "")
local params = {
[1] = { list = true },
["nocat"] = { type = "boolean" },
["plain"] = { type = "boolean" },
["notext"] = { type = "boolean" },
["sense"] = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local rootLetters = {}
local roots = args[1]
local plain = args["plain"]
if frame.args["plain"] then
plain = true
end
local langCode = lang:getCode()
local separator = "-"
if separator_langs[langCode] then
separator = " "
else
separator = "-"
end
local roots_len = #roots
if #roots == 0 and namespace == "Template" then
if template_preview_per_langcode[langCode] ~= nil then
table.insert(rootLetters, rsplit(template_preview_per_langcode[langCode], separator))
else
table.insert(rootLetters, rsplit("ከ-ተ-በ", separator))
end
elseif #roots ~= 0 then
for _, root in ipairs(roots) do
table.insert(rootLetters, rsplit(root, separator))
end
else
table.insert(rootLetters, rsplit(fulltitle, separator))
end
local joined_roots = {}
for i, rootLetter in ipairs(rootLetters) do
table.insert(joined_roots, table.concat(rootLetter, separator))
validateRoot(rootLetter, joined_roots[i])
end
local sense = args["sense"]
local sense_formatted = ""
if sense ~= nil then
sense_formatted = " (" .. sense .. ") "
end
if fulltitle == joined_roots[1] then
if namespace == "" then
error("The root page should be in the Appendix namespace. Please move it to : [[" ..
subpage .. joined_roots[1] .. "]]")
end
if roots_len > 1 then
error("There should be only one root.")
end
table.insert(output,
m_headword.full_headword({ lang = lang, pos_category = "roots", categories = {}, heads = { fulltitle }, nomultiwordcat = true, noposcat = true }))
if args["nocat"] then
return table.concat(output)
else
return table.concat(output) .. table.concat(categories)
end
else
local link_texts = {}
local term_counts = {}
for i, joined_root in ipairs(joined_roots) do
local link_text = subpage .. joined_root
table.insert(link_texts, link(link_text, joined_root .. sense_formatted, sense))
table.insert(
categories,
m_utilities.format_categories(
{ lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted },
lang)
)
table.insert(term_counts,
mw.site.stats.pagesInCategory(
lang:getCanonicalName() .. " terms belonging to the root " .. joined_root .. sense_formatted, "pages")
)
end
if args["nocat"] or plain then
return table.concat(link_texts, ", ")
else
local link_text_output = ""
for i, link_text in ipairs(link_texts) do
link_text_output = link_text_output .. '<tr><td>' ..
link_text ..
"</td></tr><tr><td>[[:Category:" ..
lang:getCanonicalName() ..
" terms belonging to the root " ..
joined_roots[i] ..
sense_formatted ..
"|" ..
term_counts[i] ..
" term" .. (term_counts[i] == 1 and "" or "s") .. "]]</td></tr>"
end
return
'<table class="wikitable" style="float: right; clear: right; text-align: center;"><tr><th>[[w:Semitic root|Root' ..
(#term_counts == 1 and "" or "s") .. ']]</th></tr>' .. link_text_output ..
'</table>' .. table.concat(categories)
end
end
end
return export