Module:grc-sandhi

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Testcases

[edit]
  • τῑμώντων
  • τῑμώ
  • τῑμᾱ́
  • τῑμᾱ́ͅ

local export = {}

local m_languages = require("Module:languages")
local m_scripts = require("Module:scripts")
local m_script_utils = require("Module:script utilities")
local m_utils_data = require("Module:grc-utilities/data")

local lang = m_languages.getByCode("grc")
local sc = m_scripts.getByCode("Polyt")

local gsub = mw.ustring.gsub
local match = mw.ustring.match
local U = mw.ustring.char

local macron = m_utils_data.named.macron
local breve = m_utils_data.named.breve
local circumflex = m_utils_data.named.circum
local diaeresisOrBreathing = m_utils_data.groups[2]
local subscript = m_utils_data.named.subscript
local vowel = "[αεηιοωυ]"
local diacritic = m_utils_data.all
local dotted_circle = U(0x25CC)

local data = {
	["att"] = {
		[1] = {
			["ᾰ ει"] = "ᾳ",
			["ᾰ ει"] = "ᾱ",
			["ᾰ οι"] = "ῳ",
			["ᾰ ου"] = "ω",
			["ᾰ αι"] = "αι",
		},
		[2] = {
			["ᾰ ᾰ"] = "ᾱ",
			["ᾱ ᾰ"] = "ᾱ",
			["ᾰ ε"] = "ᾱ",
			["ᾱ ε"] = "η",
			["ᾰ ᾳ"] = "ᾳ",
			["ᾰ η"] = "ᾱ",
			["ᾰ ῃ"] = "ᾳ",
			["α ι"] = "αι",
			["ᾰ ο"] = "ω",
			["ᾰ ω"] = "ω",
--			[""] = "",
		},
	},
}

local function tag(text)
	return m_script_utils.tag_text(text, lang, sc, nil)
end

local function process(regex, replacement)
	regex = mw.ustring.toNFD(regex)
	
	local vowels = mw.text.split(regex, "%s") or error('Regex for vowel2 didn\'t find a match in "' .. regex .. '".')
	if vowels[1] and vowels[2] then
		regex = vowels[1] .. "(" .. diacritic .. "*) ?" .. vowels[2]
		replacement = replacement .. "%1"
	else
		if not vowels[2] then
			regex = gsub(regex, "(" .. diacritic .. ")", dotted_circle .. "%1")
			table.insert(err, '<br>Regex for vowels[2] didn\'t find a match in "' .. tag(regex) .. '".')
		end
		if not vowel1 then
			regex = gsub(regex, "(" .. diacritic .. ")", dotted_circle .. "%1")
			table.insert(err, '<br>Regex for vowels[1] didn\'t find a match in "' .. tag(regex) .. '".')
		end
	end
	return regex, replacement
end

local function connect(text, dialect)
	local err = {}
	
	if not dialect then
		dialect = "att"
	end
	local dialect = data[dialect] or error('Dialect "' .. dialect .. '" not recognized')
	
	if not text or type(text) ~= "string" or text == "" then
		error("Text is invalid.")
	end
	
	text = mw.ustring.toNFD(text)
	text = gsub(text, "([αιυ])(" .. diaeresisOrBreathing .. ")" .. circumflex, "%1" .. macron .. "%2" .. circumflex)
	
	if dialect[1] then
		for _, replacement_table in ipairs(dialect) do
			for regex, replacement in pairs(replacement_table) do
				text = gsub(text, process(regex, replacement))
			end
		end
	end
	
	return text, table.concat(err)
end

function export.show(frame)
	local dialect = frame.args[2] and frame.args[1]
	local text = frame.args[2] or frame.args[1]
	
	text, err = connect(text, dialect)
	text = tag(text)
	
	return text .. err
end

return export