Module:User:Sarri.greek/grk-translit-modern

The following documentation is located at Module:User:Sarri.greek/grk-translit-modern/documentation. ^[edit]
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
User:Sarri.greek (CAT) » grk-translit-modern ^doc » test?
grk-translit-classic is Module:grc-translit |tr= for grc, grc-koi & their dialects, Katharevousa and learned texts
grk-translit-modern is |tr= for gkm, el, their dialects -- See Module:el-translit
gkm-transcript & el-transcript are |ts= for gkm, el withIPA symbols, accent-on-vowel.
Used OUTSIDE Module:el-IPA or gkm-IPA as e.g. at inflectional tables.
-- 2024.03.06. [[wikt:en:User:Sarri.greek]] 
-- tests at [[Module talk:User:Sarri.greek/grk-translit-modern]]
-- This is version of [[Module:grc-translit]]
-- See [[Module:el-translit]]
--[=[
* grk-translit-classic = for Ancient Greek grc, Koine, grc-koi, learned Medieval & their dialects, Katharevousa el-kth
	-- script polytonic Greek
* grk-translit-modern = for any Medieval Greek gkm, Modern Greek & their dialects, 
	-- script monotonic or polytonic Greek (any script may be found in quotations)

* Learned Medieval Greek is transliterated exactly as Ancient Greek script (rho with daseia/rough, hypogegrammeni)
* Main Medieval Greek (vulgar) rho was written with or without daseia.
	Trasliterate like Modern Greek ISO843 (TypeB, slightly more phonemic than TypeA, 
	i macron ī for eta, o macron ō for omega
	with corrections  γ=gh, δ=dh, χ=kh as proposed for a mixed type C)
	Pronunciation as at [[Template:R:gkm:Grammar Cambrdige]]
In modern, prosody marks are not needed, but are kept for possible examples of metrics in poetry.

CORRECTIONS - PROLBEMS
* add ligatures for quotations only? -- no, we can use param substitute at Template:quote

HOW it is USED?? [[Template:xlit]] has:
<onlyinclude>{{{{{|safesubst:}}}#invoke:languages/templates|getByCode|{{{1|und}}}|transliterate|{{{{{|safesubst:}}}#invoke:links|remove_links|{{{2}}}}}|{{{sc|}}}|{{{module|}}}}}</onlyinclude>
]=]--

local export = {}

local m_data = require('Module:grc-utilities/data')

-- Break Greek text into units of a single consonant or monophthong letter, or diphthong, with any diacritics
local tokenize = require('Module:grc-utilities').tokenize

--local ufind = mw.ustring.find -- 
--local ugsub = mw.ustring.gsub --
--local U = mw.ustring.char --
--local ulower = mw.ustring.lower --
--local uupper = mw.ustring.upper --

-- This means: ??
local UTF8char = '[%z\1-\127\194-\244][\128-\191]*'

--  Diacritics from Module:grc-utilities/data
--[=[ it says:
local U = require("Module:string/char")
]=]--
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute -- U(0x301) this is okseia ´ and the overall tonos
local grave = diacritics.grave -- U(0x300) this is bareia `
local circumflex = diacritics.circum -- U(0x342) this is perispomeni ῀
	-- Latin_circum = U(0x302)
local diaeresis = diacritics.diaeresis -- U(0x308) this are the dialytics ¨
local smooth = diacritics.smooth -- U(0x313) this is psile ᾿
local rough = diacritics.rough -- U(0x314) this is daseia ῾
local macron = diacritics.macron -- U(0x304) this is macron ˉ , normally not needed, needed exceptionally in quotations
	-- spacing_macron = U(0xAF)
	-- modifier_macron = U(0x2C9)
local breve = diacritics.breve -- U(0x306) this is brachy ˘  , normally not needed
	-- spacing_breve = U(0x2D8)
local subscript = diacritics.subscript -- U(0x345) this is hypogegrammene
--?? (adscript prosgegrammene is written out with i??) see below, a_subscript
-- ALSO has
	-- coronis = U(0x343)
	-- undertie = U(0x35C) -- actually "combining double breve below"
	
-- Latin
local hat = diacritics.Latin_circum -- Latin_circum = U(0x302)

local macron_diaeresis = macron .. diaeresis .. "?" .. hat -- ??what is this
local a_subscript = '^[αΑ].*' .. subscript .. '$'
local velar = 'κγχξ'

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["η"] = "i" .. macron,  -- the 'ī' with macron looks bad, like perispomeni / The classic ē reminds more of 'eta'
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "u",
	["ω"] = "o" .. macron, -- ō

	-- Consonants
	["β"] = "v", -- instead of ancient = b
	["γ"] = "gh", -- instead of g
	["δ"] = "dh", -- instead of d
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "ks", --?? instead of x
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "f", -- instead of latinization ph ?
	["χ"] = "kh",
	["ψ"] = "ps",
	
	-- Archaic letters (AncGr) -- at modern, may be found in quotations and some, for numbering system
	["ϝ"] = "Ϝ", -- do not transliterate to "w" -- this is always the capital Ϝ
	["ϻ"] = "ϻ", -- do not transliterate to "ś"
	["ϙ"] = "Ϙ", -- do not transliterate to "q" -- this is always the capital Ϙ
	["ϡ"] = "ϡ", -- do not transliterate to "š"
	["ͷ"] = "ͷ", -- do not transliterate to "v" number

-- special characters, for quotations only	
	-- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]].
	-- These are tracked by [[Module:script utilities]].
	["ϐ"] = "v", -- instead of 'b'
	["ϑ"] = "th",
	["ϰ"] = "k",
	["ϱ"] = "r",
	["ϲ"] = "s",
	["ϕ"] = "f", -- instead of ph
	
	-- Diacritics
	-- unchanged: macron, diaeresis, grave, acute
	[breve] = '', -- brachy
	[smooth] = '', --  psile
	[rough] = '', --  daseia
	[circumflex] = hat, --  perispomene
	[subscript] = 'i', -- hypogegrammene
}

-- change name from export.tr to export.translit
function export.translit(text, lang, sc)
	
-- daseia -- ANCE if rough daseia: return h, in Koine a grey h, in MedGr onwards nothing
	if text == '῾' then
		return '' -- instead of h
	end
	
--[[
		Replace semicolon or Greek question mark with regular question mark,
		except after an ASCII alphanumeric character (to avoid converting
		semicolons in HTML entities).
	]]
	text = mw.ustring.gsub(text, "([^A-Za-z0-9])[;" .. mw.ustring.char(0x37E) .. "]", "%1?")
	
	-- Handle the middle dot = semicolon. In AncGr is equivalent to semicolon or colon, but semicolon is probably more common.
	text = text:gsub("·", ";")
	
	local tokens = tokenize(text)

	--now read the tokens
	local output = {}
	for i, token in pairs(tokens) do
		-- Convert token to lowercase and substitute each character
		-- for its transliteration
		local translit = mw.ustring.lower(token):gsub(UTF8char, tt)
		
		local next_token = tokens[i + 1]
		-- the previous is tokens[i - 1]

-- CONDITIONS for modern transliteration
-- tests [[Module_talk:User:Sarri.greek/grk-translit-modern]]
-- #mp =  mu and pi  μπ -- capitals are taken care of
--[=[ this does not work. when i write gsub it has error. When i write sub it just does not work
whattt must i use? there are:
string.gsub
string.sub
mw.ustring.gsub
mw.ustring.sub
I want to say: If you find ^[μΜ][πΠ] at the beginning of a word, substitute them with b  else... mb
]=]--
--	if token:find('^[μ][π]') then
	if token == "μ" and tokens[i + 1] == "π" then
--	if mw.ustring.find(token, '^[μ]') then
		if mw.ustring.find(text, '^μ') then
--		if token:find('^[μ][π]') then
--		if mw.ustring.find(token, '^[μ][π]') then
		token = 'μ'
		tokens[i + 1] = ""
		translit = "b"
		else
		token = 'μ'
		tokens[i + 1] = ""
		translit = "mb"
		end
	end

--if mw.ustring.find(text, '^[μ][π]') then
--	translit = mw.ustring.sub(text, '[μΜ][πΠ]', "b")
--	end

--[=[noooooooooooooooooooooo
	if token == "μ" and tokens[i + 1] == "π"
	then
		text = string.sub(token, "(.?)([μ])([π])",  -- capitals are ok "(.?)([μΜ])([πΠ])"
			function (before, mupi)
				--?? what example is before == "-" ????
				if before == "" or before == " " or before == "-"
				then
					translit = before .. "b"
				else -- not at beginning
					translit = before .. "mb"
				end
			end)
	end -- close mu		
]=]--


-- nu and ντ -- capitals are taken care of
--todo
	

-- gamma ?? Please, could you correct this, so that it works?
	if token == 'γ' or token == 'Γ' -- capitals are ok
	then
-- γκ -- capitals are taken care of
		if token == "γ" and tokens[i + 1] == "κ"
		then
			-- γ before a velar = 'κγχξ' should be <n> BUT NOT at beginning of word
--ANC--			if next_token and velar:find(next_token, 1, true) then
			-- arctic [[Γκάνα]]
		text = gsub(token, "(.?)([γ])([κ])",  -- capitals are ok "(.?)([γΓ])([κΚ])"
				function (before, gammakappa)
					--?? what example is before == "-" ????
					if before == "" or before == " " or before == "-"
					then
						translit = before .. "g"
					end
				end)

-- γγ = ng NOT ngh -- [['γγίζω]] = [[γγίζω]]
		elseif (token == "γ" and tokens[i + 1] == "γ")
		then
		text = gsub(token, "(.?)([γ])([γ])", -- capitals are ok "(.?)([γΓ])([γΓ])"
				function (before, gammagamma)
					--?? what is before == "-" --assumed median?
					if before == "" or before == " " or before == "-"
					then
						translit = before .. "ng"
					end
				end)
-- γχ nkh γξ = nks with normal translit of 2nd letter. These are always median [[άγχος]] [[ελέγξω]]
		elseif (token == "γ" and tokens[i + 1] == "[χξ]") -- capitals are ok "[χΧξΞ]"
		then
			translit = "n"
		end -- close elseifs
	end -- close gamma


		if token == 'ρ' and tokens[i - 1] == 'ρ' then
--ANC--			-- ρ after ρ should be <rh>
			translit = 'r'
		elseif mw.ustring.find(token, a_subscript) then
			-- add macron to ᾳ --??should we keep this for examples of metrics?
--ANC--			translit = mw.ustring.gsub(translit, '([aA])', '%1' .. macron)
			translit = 'a'
		end
		
	if token:find(rough) then
		if mw.ustring.find(token, '^[Ρρ]') then
--ANC--				translit = translit .. 'h'
				translit = translit
			else -- vowel
--ANC--				translit = 'h' .. translit
				translit = translit
			end
		end

-- AncGr -- keep it just in case...		
		-- Remove macron from a vowel that has a circumflex.
		if mw.ustring.find(translit, macron_diaeresis) then
			translit = translit:gsub(macron, '')
		end

--[=[ CONDITIONS for classic ancient transliteration	
		
		if token == 'γ' and next_token and velar:find(next_token, 1, true) then
			-- γ before a velar should be <n>
			translit = 'n'
		elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
			-- ρ after ρ should be <rh>
			translit = 'rh'
		elseif ufind(token, a_subscript) then
			-- add macron to ᾳ
			translit = ugsub(translit, '([aA])', '%1' .. macron)
		end
		
		if token:find(rough) then
			if ufind(token, '^[Ρρ]') then
				translit = translit .. 'h'
			else -- vowel
				translit = 'h' .. translit
			end
		end
		
		-- Remove macron from a vowel that has a circumflex.
		if ufind(translit, macron_diaeresis) then
			translit = translit:gsub(macron, '')
		end
]=]--



		-- Capitalize first character of transliteration.
		if token ~= mw.ustring.lower(token) then
			translit = translit:gsub("^" .. UTF8char, mw.ustring.upper)
		end
		
		table.insert(output, translit)
	end
	output = table.concat(output)
	
	return output
end

-- Module_talk:User:Sarri.greek/grk-translit-modern
-- ============= use it with arguemtns  =============== --
function export.get_tr(frame)
--	local args = frame:getParent().args	-- for Templates
	local args = frame.args				-- invoke
-- lemma
	local text = args['1'] or ''
	if args['1'] ~= '' and args['1'] ~= nil then
		text = export.translit(args['1'])
	end

return text
end

return export




-- check [[Module:el-translit]] for αυ, ευ, ηυ, μπ inital, 
-- add ντ initial, γκ initial (we have delta = dh, and gamma =  gh)
--[=[
	text = gsub(text, "([αεηΑΕΗ])([υύ])()",
				function (vowel, upsilon, position)
					-- Find next character that is not whitespace or punctuation.
					local following = ""
					while true do
						local next = mw.ustring.sub(text, position, position)
						if next == "" then -- reached end of string
							break
						elseif next:find "[%s%p]" then
							position = position + 1
						else
							following = next
							break
						end
					end
					return tt[vowel]
						.. (upsilon == "ύ" and acute or "")
						.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
				end)

	text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",
				function (vowel, ita)
					if ita == "ή" then
						return tt[vowel] .. "i" .. diaeresis .. acute
					else
						return tt[vowel] .. "i" .. diaeresis
					end
				end)

	text = gsub(text, "[ωΩ][ιί]",
				{["ωι"] = "oï", ["ωί"] = "oḯ",
				 ["Ωι"] = "Oï", ["Ωί"] = "Oḯ"})

	text = gsub(text, "[οΟ][υύ]",
				{["ου"] = "ou", ["ού"] = "oú",
				 ["Ου"] = "Ou", ["Ού"] = "Oú"})

	text = gsub(text, "(.?)([μΜ])π",
				function (before, mi)
					if before == "" or before == " " or before == "-" then
						if mi == "Μ" then
							return before .. "B"
						else
							return before .. "b"
						end
					end
				end)

]=]--