Jump to content

Module:R:Perseus/polytonic-to-perseus-betacode

From Wiktionary, the free dictionary

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = mw.ustring.char
local ulower = mw.ustring.lower

local letters = {
	["α"] = "a", ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ε"] = "e", ["ϝ"] = "v",
	["ϛ"] = "#2", ["ζ"] = "z", ["η"] = "h", ["θ"] = "q", ["ι"] = "i", ["κ"] = "k",
	["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "c", ["ο"] = "o", ["π"] = "p",
	["ϟ"] = "#1", ["ϙ"] = "#3", ["ρ"] = "r", ["σ"] = "s", ["ς"] = "s", ["ϲ"] = "s3",
	["τ"] = "t", ["υ"] = "u", ["φ"] = "f", ["χ"] = "x", ["ψ"] = "y", ["ω"] = "w",
	["ϡ"] = "#5", 
	[u(0x313)] = ")", [u(0x343)] = ")", [u(0x2BC)] = ")", [u(0x1FBD)] = ")", [u(0x1FBF)] = ")", -- psili
	[u(0x314)] = "(", [u(0x2BD)] = "(", [u(0x1FFE)] = "(", --
	[u(0x301)] = "/", [u(0x341)] = "/", [u(0xB4)] = "/", [u(0x384)] = "/", [u(0x1FFD)] = "/", -- acute
	[u(0x342)] = "=", [u(0x1FC0)] = "=", -- perispomeni
	[u(0x300)] = "\\", [u(0x340)] = "\\", [u(0x60)] = "\\", [u(0x1FEF)] = "\\", -- grave
	[u(0x308)] = "+", [u(0xA8)] = "+", -- dialytika
	[u(0x345)] = "|", [u(0x1FBE)] = "|", -- ypogegrammeni
	[u(0x323)] = "?",
	[u(0xB7)] = ":", [u(0x387)] = ":", -- ano teleia
	[u(0x37E)] = ";", -- erotimatiko
	[u(0x304)] = "", -- macron
	[u(0x306)] = "", -- breve
	["-"] = "", [u(0x2010)] = "", -- hyphen
	[u(0x2019)] = "'" -- apostrophe
}

local function convert(ch)
	local ret = letters[ch]
	if ret then
		return ret
	end
	local ch_lower = ulower(ch)
	if ch_lower ~= ch then
		ret = letters[ch_lower]
		if ret then
			ret = "*" .. ret
		end
	end
	return ret
end

local function handle_char(base, diacritics, pre)
	local set = ")(/\\=|+"
	if pre then
		set = set:reverse()
	end
	for ch in (set):gmatch(".") do
		if diacritics:find(ch, nil, true) then
			if pre then
				base = ch .. base
			else
				base = base .. ch
			end
		end
	end
	return (pre and "*" or "") .. base
end

function export.polytonic_to_perseus_betacode(polytonic)
	return toNFC(toNFD(polytonic):gsub(".[\128-\191]*", convert)
		:gsub("%*(%l)([)(/\\=|+]+)()", handle_char)
		:gsub("%*(#%d+)([)(/\\=|+]+)()", handle_char)
		:gsub("(%l)([)(/\\=|+]+)", handle_char)
		:gsub("(#%d+)([)(/\\=|+]+)", handle_char)
	)
end

return export