Module:User:Erutuon/grc/Latin to Greek
Appearance
- The following documentation is located at Module:User:Erutuon/grc/Latin to Greek/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
A function that converts from ASCII to Ancient Greek. Based on Module:typing-aids and Module:typing-aids/data/grc, but faster.
local reorder_diacritics = require("Module:grc-utilities").reorderDiacritics
local ustring = mw.ustring
local U = ustring.char
local ufind = ustring.find
local str_gsub = string.gsub
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" -- roughly equivalent to "." in Ustring patterns
local one_UTF8_char_or_none = "[%z\1-\127\194-\244]?[\128-\191]*" -- roughly equivalent to ".?" in Ustring patterns
local subscript = U(0x345) -- iota subscript (ypogegrammeni)
local macron = U(0x304) -- macron
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9) -- modifier letter macron
local breve = U(0x306) -- breve
local spacing_breve = "˘" -- spacing breve
local diaeresis = U(0x308) -- diaeresis
local rough = U(0x314) -- rough breathing (reversed comma)
local smooth = U(0x313) -- smooth breathing (comma)
local acute = U(0x301) -- acute
local grave = U(0x300) -- grave
local circumflex = U(0x342) -- Greek circumflex (perispomeni)
local question_mark = U(0x37E) -- Greek question mark
local spacing_rough = "῾" -- spacing rough breathing
local spacing_smooth = "᾿" -- spacing smooth breathing
local multiple = {
["_i"] = subscript,
}
local single = {
["a"] = "α", ["A"] = "Α",
["b"] = "β", ["B"] = "Β",
["c"] = "ξ", ["C"] = "Ξ",
["d"] = "δ", ["D"] = "Δ",
["e"] = "ε", ["E"] = "Ε",
["f"] = "φ", ["F"] = "Φ",
["g"] = "γ", ["G"] = "Γ",
["h"] = "η", ["H"] = "Η",
["i"] = "ι", ["I"] = "Ι",
["k"] = "κ", ["K"] = "Κ",
["l"] = "λ", ["L"] = "Λ",
["m"] = "μ", ["M"] = "Μ",
["n"] = "ν", ["N"] = "Ν",
["o"] = "ο", ["O"] = "Ο",
["p"] = "π", ["P"] = "Π",
["q"] = "θ", ["Q"] = "Θ",
["r"] = "ρ", ["R"] = "Ρ",
["s"] = "σ", ["S"] = "Σ",
["t"] = "τ", ["T"] = "Τ",
["u"] = "υ", ["U"] = "Υ",
["v"] = "ϝ", ["V"] = "Ϝ",
["w"] = "ω", ["W"] = "Ω",
["x"] = "χ", ["X"] = "Χ",
["y"] = "ψ", ["Y"] = "Ψ",
["z"] = "ζ", ["Z"] = "Ζ",
-- vowel length
["_"] = macron, [spacing_macron] = macron, [modifier_macron] = macron,
["^"] = breve, [spacing_breve] = breve,
-- diaeresis and breathings
["+"] = diaeresis, ["("] = rough, [")"] = smooth,
-- accents
["/"] = acute, ["\\"] = grave,
["="] = circumflex, ["{{=}}"] = circumflex, ["~"] = circumflex,
-- punctuation
["'"] = "’",
["?"] = question_mark,
[";"] = "·",
["*"] = "", -- place after s to prevent it from turning into final sigma
-- pipe
["!"] = "|",
}
local function convert_s_to_sigma(text)
text = str_gsub(text,
"s(" .. one_UTF8_char_or_none .. ")",
function (following)
return ((following == ""
or following ~= "*" and following ~= "-" and ufind(following, "[%s%p]"))
and "ς"
or "σ") .. following
end)
return text
end
local function combining_to_spacing(text)
for i, accents in ipairs{ { rough, spacing_rough }, { smooth, spacing_smooth } } do
local combining, spacing = unpack(accents)
text = str_gsub(text,
"(" .. one_UTF8_char_or_none .. ")" .. combining,
function (preceding)
if preceding == "" then
return spacing
else
return preceding .. combining
end
end)
end
return text
end
return function(text)
text = convert_s_to_sigma(text)
for k, v in pairs(multiple) do
text = str_gsub(text, k, v)
end
text = str_gsub(text, UTF8_char, single)
text = combining_to_spacing(text)
return reorder_diacritics(text)
end