Jump to content

Module:grc-utilities/betacode

From Wiktionary, the free dictionary


local export = {}

local m_str_utils = require("Module:string utilities")

local concat = table.concat
local explode = m_str_utils.explode_utf8
local insert = table.insert
local sort = table.sort
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ulower = m_str_utils.lower
local umatch = mw.ustring.match
local uupper = m_str_utils.upper

local beta_chars = {
	["a"] = "α", ["b"] = "β", ["c"] = "ξ", ["d"] = "δ", ["e"] = "ε", ["f"] = "φ",
	["g"] = "γ", ["h"] = "η", ["i"] = "ι", ["k"] = "κ", ["l"] = "λ", ["m"] = "μ",
	["n"] = "ν", ["o"] = "ο", ["p"] = "π", ["q"] = "θ", ["r"] = "ρ", ["s"] = "σ",
	["s1"] = "σ", ["s2"] = "ς", ["s3"] = "ϲ", ["t"] = "τ", ["u"] = "υ", ["v"] = "ϝ",
	["w"] = "ω", ["x"] = "χ", ["y"] = "ψ", ["z"] = "ζ", ["#1"] = "ϟ", ["#2"] = "ϛ",
	["#3"] = "ϙ", ["#5"] = "ϡ", ["#400"] = "ⱶ", ["#401"] = "ϳ", ["#711"] = "ϻ",
	[")"] = u(0x313), ["("] = u(0x314), ["/"] = u(0x301), ["\\"] = u(0x300),
	["="] = u(0x342), ["+"] = u(0x308), ["|"] = u(0x345), ["?"] = u(0x323),
}

local trail_weights = require("Module:table").invert{
	")", "(", "+", "/", "\\", "=", "|"
}

local function sort_trail(a, b)
	return trail_weights[a] < trail_weights[b]
end

function export.beta_to_Grek(text)
	local output, start, s = {}, 1
	while true do
		local ch, trail, loc, beta_ch = text:match("^(.%d*)([)(+/\\=|]*)()", start)
		if not loc then
			break
		elseif #trail > 1 then
			trail = explode(trail)
			sort(trail, sort_trail)
			trail = concat(trail)
		end
		if ch == "*" then
			ch, loc = text:match("^(.%d*)()", loc)
			if ch == "" then
				insert(output, (("*" .. trail):gsub(".%d*", beta_chars)))
				break
			end
			beta_ch = beta_chars[ulower(ch)]
			beta_ch = beta_ch and beta_ch:gsub("^.[\128-\191]*", uupper)
		else
			beta_ch = beta_chars[ulower(ch)]
			if ch == "s" then
				if not s then
					s = {}
				end
				s[#output + 1] = true
			end
		end
		insert(output, (beta_ch or ch) .. trail:gsub(".%d*", beta_chars))
		start = loc
	end
	-- Convert any final sigmas from "σ" to "ς".
	-- `s` is used to keep track of these, because only sigmas encoded as "s"
	-- should be converted (i.e. not those encoded as "s1", which are static).
	if s then
		for loc in pairs(s) do
			-- A final sigma must be preceded by a word character, but not
			-- followed by one (e.g. "λόγος", but not "σῖγμα" or a lone "σ").
			local prev = output[loc - 1]
			if prev and umatch(prev, "%w") then
				local nxt = output[loc + 1]
				if not (nxt and umatch(nxt, "%w")) then
					output[loc] = output[loc]:gsub("σ", "ς")
				end
			end
		end
	end
	return toNFC(concat(output))
end

return export