Module:User:Sarri.greek/grk-translit-modern
Appearance
- The following documentation is located at Module:User:Sarri.greek/grk-translit-modern/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
User:Sarri.greek (CAT) » grk-translit-modern doc » test?
- grk-translit-classic is Module:grc-translit |tr= for grc, grc-koi & their dialects, Katharevousa and learned texts
- grk-translit-modern is |tr= for gkm, el, their dialects -- See Module:el-translit
- gkm-transcript & el-transcript are |ts= for gkm, el withIPA symbols, accent-on-vowel.
Used OUTSIDE Module:el-IPA or gkm-IPA as e.g. at inflectional tables.
-- 2024.03.06. [[wikt:en:User:Sarri.greek]]
-- tests at [[Module talk:User:Sarri.greek/grk-translit-modern]]
-- This is version of [[Module:grc-translit]]
-- See [[Module:el-translit]]
--[=[
* grk-translit-classic = for Ancient Greek grc, Koine, grc-koi, learned Medieval & their dialects, Katharevousa el-kth
-- script polytonic Greek
* grk-translit-modern = for any Medieval Greek gkm, Modern Greek & their dialects,
-- script monotonic or polytonic Greek (any script may be found in quotations)
* Learned Medieval Greek is transliterated exactly as Ancient Greek script (rho with daseia/rough, hypogegrammeni)
* Main Medieval Greek (vulgar) rho was written with or without daseia.
Trasliterate like Modern Greek ISO843 (TypeB, slightly more phonemic than TypeA,
i macron ī for eta, o macron ō for omega
with corrections γ=gh, δ=dh, χ=kh as proposed for a mixed type C)
Pronunciation as at [[Template:R:gkm:Grammar Cambrdige]]
In modern, prosody marks are not needed, but are kept for possible examples of metrics in poetry.
CORRECTIONS - PROLBEMS
* add ligatures for quotations only? -- no, we can use param substitute at Template:quote
HOW it is USED?? [[Template:xlit]] has:
<onlyinclude>{{{{{|safesubst:}}}#invoke:languages/templates|getByCode|{{{1|und}}}|transliterate|{{{{{|safesubst:}}}#invoke:links|remove_links|{{{2}}}}}|{{{sc|}}}|{{{module|}}}}}</onlyinclude>
]=]--
local export = {}
local m_data = require('Module:grc-utilities/data')
-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics
local tokenize = require('Module:grc-utilities').tokenize
--local ufind = mw.ustring.find --
--local ugsub = mw.ustring.gsub --
--local U = mw.ustring.char --
--local ulower = mw.ustring.lower --
--local uupper = mw.ustring.upper --
-- This means: ??
local UTF8char = '[%z\1-\127\194-\244][\128-\191]*'
-- Diacritics from Module:grc-utilities/data
--[=[ it says:
local U = require("Module:string/char")
]=]--
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute -- U(0x301) this is okseia ´ and the overall tonos
local grave = diacritics.grave -- U(0x300) this is bareia `
local circumflex = diacritics.circum -- U(0x342) this is perispomeni ῀
-- Latin_circum = U(0x302)
local diaeresis = diacritics.diaeresis -- U(0x308) this are the dialytics ¨
local smooth = diacritics.smooth -- U(0x313) this is psile ᾿
local rough = diacritics.rough -- U(0x314) this is daseia ῾
local macron = diacritics.macron -- U(0x304) this is macron ˉ , normally not needed, needed exceptionally in quotations
-- spacing_macron = U(0xAF)
-- modifier_macron = U(0x2C9)
local breve = diacritics.breve -- U(0x306) this is brachy ˘ , normally not needed
-- spacing_breve = U(0x2D8)
local subscript = diacritics.subscript -- U(0x345) this is hypogegrammene
--?? (adscript prosgegrammene is written out with i??) see below, a_subscript
-- ALSO has
-- coronis = U(0x343)
-- undertie = U(0x35C) -- actually "combining double breve below"
-- Latin
local hat = diacritics.Latin_circum -- Latin_circum = U(0x302)
local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this
local a_subscript = '^[αΑ].*' .. subscript .. '$'
local velar = 'κγχξ'
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "i" .. macron, -- the 'ī' with macron looks bad, like perispomeni / The classic ē reminds more of 'eta'
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "o" .. macron, -- ō
-- Consonants
["β"] = "v", -- instead of ancient = b
["γ"] = "gh", -- instead of g
["δ"] = "dh", -- instead of d
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "ks", --?? instead of x
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "f", -- instead of latinization ph ?
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters (AncGr) -- at modern, may be found in quotations and some, for numbering system
["ϝ"] = "Ϝ", -- do not transliterate to "w" -- this is always the capital Ϝ
["ϻ"] = "ϻ", -- do not transliterate to "ś"
["ϙ"] = "Ϙ", -- do not transliterate to "q" -- this is always the capital Ϙ
["ϡ"] = "ϡ", -- do not transliterate to "š"
["ͷ"] = "ͷ", -- do not transliterate to "v" number
-- special characters, for quotations only
-- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]].
-- These are tracked by [[Module:script utilities]].
["ϐ"] = "v", -- instead of 'b'
["ϑ"] = "th",
["ϰ"] = "k",
["ϱ"] = "r",
["ϲ"] = "s",
["ϕ"] = "f", -- instead of ph
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
[breve] = '', -- brachy
[smooth] = '', -- psile
[rough] = '', -- daseia
[circumflex] = hat, -- perispomene
[subscript] = 'i', -- hypogegrammene
}
-- change name from export.tr to export.translit
function export.translit(text, lang, sc)
-- daseia -- ANCE if rough daseia: return h, in Koine a grey h, in MedGr onwards nothing
if text == '῾' then
return '' -- instead of h
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = mw.ustring.gsub(text, "([^A-Za-z0-9])[;" .. mw.ustring.char(0x37E) .. "]", "%1?")
-- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = mw.ustring.lower(token):gsub(UTF8char, tt)
local next_token = tokens[i + 1]
-- the previous is tokens[i - 1]
-- CONDITIONS for modern transliteration
-- tests [[Module_talk:User:Sarri.greek/grk-translit-modern]]
-- #mp = mu and pi μπ -- capitals are taken care of
--[=[ this does not work. when i write gsub it has error. When i write sub it just does not work
whattt must i use? there are:
string.gsub
string.sub
mw.ustring.gsub
mw.ustring.sub
I want to say: If you find ^[μΜ][πΠ] at the beginning of a word, substitute them with b else... mb
]=]--
-- if token:find('^[μ][π]') then
if token == "μ" and tokens[i + 1] == "π" then
-- if mw.ustring.find(token, '^[μ]') then
if mw.ustring.find(text, '^μ') then
-- if token:find('^[μ][π]') then
-- if mw.ustring.find(token, '^[μ][π]') then
token = 'μ'
tokens[i + 1] = ""
translit = "b"
else
token = 'μ'
tokens[i + 1] = ""
translit = "mb"
end
end
--if mw.ustring.find(text, '^[μ][π]') then
-- translit = mw.ustring.sub(text, '[μΜ][πΠ]', "b")
-- end
--[=[noooooooooooooooooooooo
if token == "μ" and tokens[i + 1] == "π"
then
text = string.sub(token, "(.?)([μ])([π])", -- capitals are ok "(.?)([μΜ])([πΠ])"
function (before, mupi)
--?? what example is before == "-" ????
if before == "" or before == " " or before == "-"
then
translit = before .. "b"
else -- not at beginning
translit = before .. "mb"
end
end)
end -- close mu
]=]--
-- nu and ντ -- capitals are taken care of
--todo
-- gamma ?? Please, could you correct this, so that it works?
if token == 'γ' or token == 'Γ' -- capitals are ok
then
-- γκ -- capitals are taken care of
if token == "γ" and tokens[i + 1] == "κ"
then
-- γ before a velar = 'κγχξ' should be <n> BUT NOT at beginning of word
--ANC-- if next_token and velar:find(next_token, 1, true) then
-- arctic [[Γκάνα]]
text = gsub(token, "(.?)([γ])([κ])", -- capitals are ok "(.?)([γΓ])([κΚ])"
function (before, gammakappa)
--?? what example is before == "-" ????
if before == "" or before == " " or before == "-"
then
translit = before .. "g"
end
end)
-- γγ = ng NOT ngh -- [['γγίζω]] = [[γγίζω]]
elseif (token == "γ" and tokens[i + 1] == "γ")
then
text = gsub(token, "(.?)([γ])([γ])", -- capitals are ok "(.?)([γΓ])([γΓ])"
function (before, gammagamma)
--?? what is before == "-" --assumed median?
if before == "" or before == " " or before == "-"
then
translit = before .. "ng"
end
end)
-- γχ nkh γξ = nks with normal translit of 2nd letter. These are always median [[άγχος]] [[ελέγξω]]
elseif (token == "γ" and tokens[i + 1] == "[χξ]") -- capitals are ok "[χΧξΞ]"
then
translit = "n"
end -- close elseifs
end -- close gamma
if token == 'ρ' and tokens[i - 1] == 'ρ' then
--ANC-- -- ρ after ρ should be <rh>
translit = 'r'
elseif mw.ustring.find(token, a_subscript) then
-- add macron to ᾳ --??should we keep this for examples of metrics?
--ANC-- translit = mw.ustring.gsub(translit, '([aA])', '%1' .. macron)
translit = 'a'
end
if token:find(rough) then
if mw.ustring.find(token, '^[Ρρ]') then
--ANC-- translit = translit .. 'h'
translit = translit
else -- vowel
--ANC-- translit = 'h' .. translit
translit = translit
end
end
-- AncGr -- keep it just in case...
-- Remove macron from a vowel that has a circumflex.
if mw.ustring.find(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
--[=[ CONDITIONS for classic ancient transliteration
if token == 'γ' and next_token and velar:find(next_token, 1, true) then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif ufind(token, a_subscript) then
-- add macron to ᾳ
translit = ugsub(translit, '([aA])', '%1' .. macron)
end
if token:find(rough) then
if ufind(token, '^[Ρρ]') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
]=]--
-- Capitalize first character of transliteration.
if token ~= mw.ustring.lower(token) then
translit = translit:gsub("^" .. UTF8char, mw.ustring.upper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
-- Module_talk:User:Sarri.greek/grk-translit-modern
-- ============= use it with arguemtns =============== --
function export.get_tr(frame)
-- local args = frame:getParent().args -- for Templates
local args = frame.args -- invoke
-- lemma
local text = args['1'] or ''
if args['1'] ~= '' and args['1'] ~= nil then
text = export.translit(args['1'])
end
return text
end
return export
-- check [[Module:el-translit]] for αυ, ευ, ηυ, μπ inital,
-- add ντ initial, γκ initial (we have delta = dh, and gamma = gh)
--[=[
text = gsub(text, "([αεηΑΕΗ])([υύ])()",
function (vowel, upsilon, position)
-- Find next character that is not whitespace or punctuation.
local following = ""
while true do
local next = mw.ustring.sub(text, position, position)
if next == "" then -- reached end of string
break
elseif next:find "[%s%p]" then
position = position + 1
else
following = next
break
end
end
return tt[vowel]
.. (upsilon == "ύ" and acute or "")
.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
end)
text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",
function (vowel, ita)
if ita == "ή" then
return tt[vowel] .. "i" .. diaeresis .. acute
else
return tt[vowel] .. "i" .. diaeresis
end
end)
text = gsub(text, "[ωΩ][ιί]",
{["ωι"] = "oï", ["ωί"] = "oḯ",
["Ωι"] = "Oï", ["Ωί"] = "Oḯ"})
text = gsub(text, "[οΟ][υύ]",
{["ου"] = "ou", ["ού"] = "oú",
["Ου"] = "Ou", ["Ού"] = "Oú"})
text = gsub(text, "(.?)([μΜ])π",
function (before, mi)
if before == "" or before == " " or before == "-" then
if mi == "Μ" then
return before .. "B"
else
return before .. "b"
end
end
end)
]=]--