Jump to content

Module:rki-pron

From Wiktionary, the free dictionary

This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

Rakhine (Arakanese) pronunciation and romanisation module. See {{rki-IPA}}.

Testcases

[edit]

See Module:rki-pron/testcases.


local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match

local system_list = {
	{ 1, ["type"] = "phonetic",		["name"] = "IPA" },
	{ 2, ["type"] = "orthographic",	["name"] = "MLCTS" },
	{ 3, ["type"] = "orthographic",	["name"] = "ALA-LC" },
}

local initial_table = {
	["က"]	=	{ "k", "k", "k" },
	["ကျ"]	=	{ "t͡ɕ", "ky", "ky" },
	["ကြ"]	=	{ "kɹ", "kr", "kr" },
	["ကျွ"]	=	{ "t͡ɕw", "kyw", "kyv" },
	["ကြွ"]	=	{ "kɹw", "krw", "krv" },
	["ကွ"]	=	{ "kw", "kw", "kv" },
	["ခ"]	=	{ "kʰ", "hk", "kh" },
	["ချ"]	=	{ "t͡ɕʰ", "hky", "khy" },
	["*ချ"]	=	{ "ʃ", "hkr", "khr" },
	["ခြ"]	=	{ "t͡ɕʰ", "hkr", "khr" },
	["*ခြ"]	=	{ "ʃ", "hkr", "khr" },
	["ချွ"]	=	{ "t͡ɕʰw", "hkyw", "khyv" },
	["ခြွ"]	=	{ "kʰɹw", "hkrw", "khrv" },
	["ခွ"]	=	{ "kʰw", "hkw", "khv" },
	["ဂ"]	=	{ "ɡ", "g", "g" },
	["ဂျ"]	=	{ "d͡ʑ", "gy", "gy" },
	["ဂြ"]	=	{ "gɹ", "gr", "gr" },
	["ဂျွ"]	=	{ "d͡ʑw", "gyw", "gyv" },
	["ဂွ"]	=	{ "ɡw", "gw", "gv" },
	["ဃ"]	=	{ "ɡ", "gh", "gh" },
	["င"]	=	{ "ŋ", "ng", "ṅ" },
	["ငှ"]	=	{ "ŋ̊", "hng", "ṅh" },
	["ငြ"]	=	{ "ɲ", "ngr", "ṅr" },
	["ငြှ"]	=	{ "ɲ̊", "hngr", "ṅrh" },
	["ငွ"]	=	{ "ŋw", "ngw", "ṅv" },
	["ငွှ"]	=	{ "ŋ̊w", "hngw", "ṅvh" },
	["စ"]	=	{ "s", "c", "c" },
	["စွ"]	=	{ "sw", "cw", "cv" },
	["ဆ"]	=	{ "sʰ", "hc", "ch" },
	["ဆွ"]	=	{ "sʰw", "hcw", "chv" },
	["ဇ"]	=	{ "z", "j", "j" },
	["ဇွ"]	=	{ "zw", "jw", "jv" },
	["ဈ"]	=	{ "z", "jh", "jh" },
	["ဉ"]	=	{ "ɲ", "ny", "ñ" },
	["ည"]	=	{ "ɲ", "ny", "ññ" },
	["ဉှ"]	=	{ "ɲ̊", "hny", "ñh" },
	["ညှ"]	=	{ "ɲ̊", "hny", "ññh" },
	["ညွ"]	=	{ "ɲw", "nyw", "ñv" },
	["ညွှ"]	=	{ "ɲ̊w", "hnyw", "ñvh" },
	["ဋ"]	=	{ "t", "t", "ṭ" },
	["ဌ"]	=	{ "tʰ", "ht", "ṭh" },
	["ဍ"]	=	{ "d", "d", "ḍ" },
	["ဎ"]	=	{ "d", "dh", "ḍh" },
	["ဏ"]	=	{ "n", "n", "ṇ" },
	["ဏှ"]	=	{ "n̥", "hn", "ṇh" },
	["တ"]	=	{ "t", "t", "t" },
	["တျ"]	=	{ "tj", "ty", "ty" },
	["တြ"]	=	{ "tɹ", "tr", "tr" },
	["တွ"]	=	{ "tw", "tw", "tv" },
	["ထ"]	=	{ "tʰ", "ht", "th" },
	["ထွ"]	=	{ "tʰw", "htw", "thv" },
	["ဒ"]	=	{ "d", "d", "d" },
	["ဒျ"]	=	{ "dj", "dy", "dy" },
	["ဒြ"]	=	{ "dɹ", "dr", "dr" },
	["ဒွ"]	=	{ "dw", "dw", "dv" },
	["ဓ"]	=	{ "d", "dh", "dh" },
	["န"]	=	{ "n", "n", "n" },
	["နှ"]	=	{ "n̥", "hn", "nh" },
	["နျ"]	=	{ "nj", "ny", "ny" },
	["နွ"]	=	{ "nw", "nw", "nv" },
	["နွှ"]	=	{ "n̥w", "hnw", "nvh" },
	["ပ"]	=	{ "p", "p", "p" },
	["ပျ"]	=	{ "pj", "py", "py" },
	["ပြ"]	=	{ "pɹ", "pr", "pr" },
	["ပြွ"]	=	{ "pw", "prw", "prv" },
	["ပွ"]	=	{ "pw", "pw", "pv" },
	["ဖ"]	=	{ "pʰ", "hp", "ph" },
	["ဖျ"]	=	{ "pʰj", "hpy", "phy" },
	["ဖြ"]	=	{ "pʰɹ", "hpr", "phr" },
	["ဖွ"]	=	{ "pʰw", "hpw", "phv" },
	["ဗ"]	=	{ "b", "b", "b" },
	["ဗျ"]	=	{ "bj", "by", "by" },
	["ဗြ"]	=	{ "bɹ", "br", "br" },
	["ဗွ"]	=	{ "bw", "bw", "bv" },
	["ဘ"]	=	{ "b", "bh", "bh" },
	["-ဘ"]	=	{ "pʰ", "bh", "bh" },
	["ဘွ"]	=	{ "bw", "bhw", "bhv" },
	["-ဘွ"]	=	{ "pʰw", "bhw", "bhw" },
	["မ"]	=	{ "m", "m", "m" },
	["မှ"]	=	{ "m̥", "hm", "mh" },
	["မျ"]	=	{ "mj", "my", "my" },
	["မျှ"]	=	{ "m̥j", "hmy", "myh" },
	["မြ"]	=	{ "mɹ", "mr", "mr" },
	["မြှ"]	=	{ "m̥ɹ", "hmr", "mrh" },
	["မြွ"]	=	{ "mɹw", "mrw", "mrv" },
	["မြွှ"]	=	{ "m̥w", "hmrw", "mrvh" },
	["မွ"]	=	{ "mw", "mw", "mv" },
	["မွှ"]	=	{ "m̥w", "hmw", "mvh" },
	["ယ"]	=	{ "j", "y", "y" },
	["ယှ"]	=	{ "ʃ", "hy", "yh" },
	["သျှ"] =	{ "ʃ", "hsy", "syh" },
	["ယွ"]	=	{ "jw", "yw", "yv" },
	["ရ"]	=	{ "ɹ", "r", "r" },
	["ရှ"]	=	{ "hɹ", "hr", "rh" },
	["*ရှ"]	=	{ "ʃ", "hr", "rh" },
	["ရွ"]	=	{ "ɹw", "rw", "rv" },
	["ရွှ"]	=	{ "hɹw", "hrw", "rvh" },
	["လ"]	=	{ "l", "l", "l" },
	["လှ"]	=	{ "l̥", "hl", "lh" },
	["လျ"]	=	{ "j", "ly", "ly" },
	["+သျှ"]=	{ "j", "hsy", "syh" },
	["*လျ"]=	{ "lj", "ly", "ly" },
	["လျှ"]	=	{ "ʃ", "hly", "lyh" },
	["*လျှ"]=	{ "l̥j", "hly", "lyh" },
	["လွ"]	=	{ "lw", "lw", "lv" },
	["လွှ"]	=	{ "l̥w", "hlw", "lvh" },
	["ဝ"]	=	{ "w", "w", "v" },
	["ဝှ"]	=	{ "ʍ", "hw", "vh" },
	["သ"]	=	{ "θ", "s", "s" },
	["+သ"]	=	{ "ð", "s", "s" },
	["သွ"]	=	{ "θw", "sw", "sv" },
	["+သွ"]	=	{ "ðw", "sw", "sw" },
	["ဟ"]	=	{ "h", "h", "h" },
	["ဟွ"]	=	{ "hw", "hw", "hv" },
	["ဠ"]	=	{ "l", "l", "ḷ" },
	["အ"]	=	{ "ʔ", "", "ʼ" },
	-- only appears after a vowel in the same word
	["ဿ"]	=	{ "ʔθ", "ss", "ss" },
	[""]	=	{ "ʔ", "", "" },
	["-"]	=	{ "", "", "" },
	
	["ျ"]	=	{ nil, "y", "y" },
	["ြ"]	=	{ nil, "r", "r" },
	["ွ"]	=	{ nil, "w", "w" },
}

local initial_voicing = {
	["+က"]	=	"ဂ", 
	["+ခ"]	=	"ဂ", 
	["+စ"]	=	"ဇ", 
	["+ဆ"]	=	"ဇ", 
	["+ဋ"]	=	"ဍ", 
	["+ဌ"]	=	"ဍ", 
	["+တ"]	=	"ဒ", 
	["+ထ"]	=	"ဒ", 
	["+ပ"]	=	"ဗ", 
	["+ဖ"]	=	"ဗ", 
	["-ဘ"]	=	"ဖ",
}

local final_table = {
	[""]	=	{ "a̰", "a.", "a" },
	["က်"]	=	{ "ɔʔ", "ak", "akʻ" },
	["င်"]	=	{ "ɔ̀ɴ", "ang", "aṅʻ" },
	["စ်"]	=	{ "aɪʔ", "ac", "acʻ" },
	["ည်"]	=	{ "è", "any", "aññʻ" },
	["ည်2"]	=	{ "ì", "any", "aññʻ" },
	["ည်3"]	=	{ "ɛ̀", "any", "aññʻ" },
	["ဉ်"]	=	{ "aɪ̀ɴ", "any", "añʻ" },
	["တ်"]	=	{ "ɛʔ", "at", "atʻ" },
	["န်"]	=	{ "ɛ̀ɴ", "an", "anʻ" },
	["ပ်"]	=	{ "ɛʔ", "ap", "apʻ" },
	["မ်"]	=	{ "ɛ̀ɴ", "am", "amʻ" },
	["ယ်"]	=	{ "è", "ai", "ayʻ" },
	["ံ"]	=	{ "ɛ̀ɴ", "am", "aṃ" },
	["ာ"]	=	{ "à", "a", "ā" },
	["ါ"]	=	{ "à", "a", "ā" },
	["ိ"]	=	{ "ḭ", "i.", "i" },
	["ိတ်"]	=	{ "eɪʔ", "it", "itʻ" },
	["ိန်"]	=	{ "èɪɴ", "in", "inʻ" },
	["ိပ်"]	=	{ "eɪʔ", "ip", "ipʻ" },
	["ိမ်"]	=	{ "èɪɴ", "im", "imʻ" },
	["ိံ"]	=	{ "èɪɴ", "im", "iṃ" },
	["ီ"]	=	{ "ì", "i", "ī" },
	["ု"]	=	{ "ṵ", "u.", "u" },
	["ုတ်"]	=	{ "oʊʔ", "ut", "utʻ" },
	["ုန်"]	=	{ "òʊɴ", "un", "unʻ" },
	["ုပ်"]	=	{ "oʊʔ", "up", "upʻ" },
	["ုမ်"]	=	{ "òʊɴ", "um", "umʻ" },
	["ုံ"]	=	{ "òʊɴ", "um", "uṃ" },
	["ူ"]	=	{ "ù", "u", "ū" },
	["ေ"]	=	{ "ì", "e", "e" },
	["ေ2"]	=	{ "è", "e", "e" },
	["ဲ"]	=	{ "é", "ai:", "ai" },
	["ော"]	=	{ "ɔ́", "au:", "o" },
	["ောက်"]	=	{ "aʊʔ", "auk", "okʻ" },
	["ောင်"]	=	{ "àʊɴ", "aung", "oṅʻ" },
	["ော်"]	=	{ "ɔ̀", "au", "oʻ" },
	["ို"]	=	{ "ò", "ui", "ui" },
	["ိုက်"]	=	{ "aɪʔ", "uik", "uikʻ" },
	["ိုင်"]	=	{ "àɪɴ", "uing", "uiṅʻ" },
	["ွတ်"]	=	{ "wɛʔ", "wat", "vatʻ" },
	["ွန်"]	=	{ "wɛ̀ɴ", "wan", "vanʻ" },
	["ွပ်"]	=	{ "wɛ̀ʔ", "wap", "vapʻ" },
	["ွမ်"]	=	{ "wɛ̀ɴ", "wam", "vamʻ" },
	["ွံ"]	=	{ "wɛ̀ɴ", "wam", "vaṃ" },
	["'"]	=	{ "ə", "a", "a" },
	["်"]	=	{ "", "", "ʻ" },
}

local nucleus_table = {
	[""]	=	{ "à", "a", "a" }, 
	["ိ"]	=	{ "ì", "i", "i" },
	["ု"]	=	{ "ù", "u", "u" },
	["ော"]	=	{ "ɔ̀", "au", "o" },
	["ေါ"]	=	{ "ɔ̀", "au", "o" },
	["ွ"]	=	{ "ʊ̀", "wa", "va" },
}

local indep_letter_table = {
	["ဣ"]	=	{ "ḭ", "i.", "i" },
	["ဤ"]	=	{ "ì", "i", "ī" },
	["ဥ"]	=	{ "ṵ", "u.", "u" },
	["ဦ"]	=	{ "ù", "u", "ū" },
	["ဧ"]	=	{ "ì", "e", "e" },
	["၏"]	=	{ "ḭ", "e", "e*" },
	["ဩ"]	=	{ "ɔ́", "au:", "o" },
	["ဪ"]	=	{ "ɔ̀", "au", "oʻ" },
	["၌"]	=	{ "n̥aɪʔ", "hnai.", "n*" },
	["၍"]	=	{ "ɹwḭ", "rwe", "r*" },
}

local tone_table = {
	["း"] = { "́", ":", "ʺ" },
	["့"] = { "̰", ".", "ʹ" },
}

local ambig_intersyl = {
	[1] = {
	},

	[2] = {
		["ky"] = 1, ["kr"] = 1, ["kw"] = 1, 
		["gy"] = 1, ["gr"] = 1, ["gw"] = 1, 
		["ng"] = 1, ["ny"] = 1, 
		["cw"] = 1, ["tw"] = 1, ["nw"] = 1, 
		["py"] = 1, ["pr"] = 1, ["pw"] = 1, 
		["my"] = 1, ["mr"] = 1, ["mw"] = 1, 
	},

	[3] = {
	},

	[4] = {
		["ky"] = 1, ["kr"] = 1, ["kw"] = 1, 
		["gy"] = 1, ["gr"] = 1, ["gw"] = 1, 
		["ng"] = 1, ["ny"] = 1, 
		["cw"] = 1, ["tw"] = 1, ["nw"] = 1, 
		["tr"] = 1, ["tw"] = 1, 
		["py"] = 1, ["pr"] = 1, ["pw"] = 1, 
		["my"] = 1, ["mr"] = 1, ["mw"] = 1, 
	},

	[5] = {
		["ou"] = 1,
	},
}

local reverse_table = {
	["hm"] = "မှ", ["m"] = "မ", 
	["hn"] = "နှ", ["n"] = "န", 
	["hny"] = "ညှ", ["ny"] = "ည", 
	["hng"] = "ငှ", ["ng"] = "င", 
	["p"] = "ပ", ["hp"] = "ဖ", ["b"] = "ဗ", 
	["t"] = "တ", ["ht"] = "ထ", ["d"] = "ဒ", 
	["c"] = "ကျ", ["hc"] = "ချ", ["j"] = "ဂျ", 
	["k"] = "က", ["hk"] = "ခ", ["g"] = "ဂ", 
	[""] = "အ", 
	["th"] = "သ", ["+th"] = "+သ", 
	["s"] = "စ", ["hs"] = "ဆ", ["z"] = "ဇ", 
	["hr"] = "ရှ", 
	["h"] = "ဟ", 
	["r"] = "ရ", 
	["y"] = "ယ", 
	["hw"] = "ဝှ", ["w"] = "ဝ", 
	["hl"] = "လှ", ["l"] = "လ", 
	["hmw"] = "မွှ", ["mw"] = "မွ", ["hmy"] = "မျှ", ["my"] = "မျ", 
	["hnw"] = "နွှ", ["nw"] = "နွ", 
	["hnyw"] = "ညွှ", ["nyw"] = "ညွ", 
	["hngw"] = "ငွှ", ["ngw"] = "ငွ", 
	["pw"] = "ပွ", ["hpw"] = "ဖွ", ["bw"] = "ဗွ", 
	["py"] = "ပျ", ["hpy"] = "ဖျ", ["by"] = "ဗျ", 
	["tw"] = "တွ", ["htw"] = "ထွ", ["dw"] = "ဒွ", 
	["cw"] = "ကျွ", ["hcw"] = "ချွ", ["jw"] = "ဂျွ", 
	["kw"] = "ကွ", ["hkw"] = "ခွ", ["gw"] = "ဂွ", 
	["thw"] = "သွ", 
	["sw"] = "စွ", ["hsw"] = "ဆွ", ["zw"] = "ဇွ", 
	["hrw"] = "ရွှ", 
	["hw"] = "ဟွ", 
	["yw"] = "ယွ", 
	["hlw"] = "လွှ", ["lw"] = "လွ", ["hly"] = "*လျှ", ["ly"] = "*လျ", 
	
	["i"] = "ီ", ["i\\"] = "ီး", ["i/"] = "ိ", ["i?"] = "စ်", 
	["i~"] = "င်", ["i\\~"] = "င်း", ["i/~"] = "င့်", 
	["ei"] = "ေ", ["ei\\"] = "ေး", ["ei/"] = "ေ့", ["ei?"] = "ိတ်", 
	["ei~"] = "ိန်", ["ei\\~"] = "ိန်း", ["ei/~"] = "ိန့်", 
	["e"] = "ယ်", ["e\\"] = "ဲ", ["e/"] = "ယ့်", ["e?"] = "က်", 
	["ai~"] = "ိုင်", ["ai\\~"] = "ိုင်း", ["ai/~"] = "ိုင့်", 
	["ai?"] = "ိုက်", 
	["a"] = "ာ", ["a\\"] = "ား", ["a/"] = "", ["a?"] = "တ်", 
	["a~"] = "န်", ["a\\~"] = "န်း", ["a/~"] = "န့်", 
	["o"] = "ော်", ["o\\"] = "ော", ["o/"] = "ော့", ["au?"] = "ောက်", 
	["au~"] = "ောင်", ["au\\~"] = "ောင်း", ["au/~"] = "ောင့်", 
	["ou"] = "ို", ["ou\\"] = "ိုး", ["ou/"] = "ို့", ["ou?"] = "ုပ်", 
	["ou~"] = "ုန်", ["ou\\~"] = "ုန်း", ["ou/~"] = "ုန့်", 
	["u"] = "ူ", ["u\\"] = "ူး", ["u/"] = "ု", ["u?"] = "ွတ်", 
	["u~"] = "ွန်", ["u\\~"] = "ွန်း", ["u/~"] = "ွန့်", 
	["a'"] = "'",
}

local repl_string = "([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][့]?[^့်္])"

function syllabify(text)
	text = gsub(text, "('?)([%+%-%*]*)", function(a, b)
		if a .. b ~= "" then return a .. " " .. b end
		end)
	
	text = gsub(text, "([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)", function(a, b, c)
		return (c == "္" and " "..a..b.." "..c or (c == "်" and " "..a..b..c or " "..a.." "..b..c))
		end) .. " "
	
	text = gsub(text, "(်း?'?)", "%1 ")
	text = gsub(text, "([း့])([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ]်)", "%2%1")
	
	while match(text, repl_string) do
		text = gsub(text, repl_string, "%1 %2")
	end
	
	text = gsub(text, "္", " , ")
	text = gsub(text, " +", " ")
	text = gsub(text, "^ ?(.*[^ ]) ?$", "%1")
	text = gsub(text, " , ", " ")
	text = gsub(text, " ([23])", "%1")
	return text
end

function initial_by_char(initial_string, system_index, ref_table)
	local initial_set = {}
	for character in mw.text.gsplit(initial_string, "") do
		local temp_initial = ref_table[character] or error("Initial data not found.")
		table.insert(initial_set, temp_initial[system_index] or temp_initial)
	end
	return table.concat(initial_set)
end

function generate_respelling(text)
	text = gsub(text, " ", "   ")
	text = gsub(text, "ါ", "ာ")
	if match(text, "[က-႟ꩠ-ꩻ]") then return text end
	text = gsub(text, "(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final)
		return 
			voicing_mark .. 
			(reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) .. 
			opt_sep ..
			reverse_table[latin_final]
	end)
	return text
end

function process(initial, final, tone, schwa, system, system_index)
	if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then
		initial = gsub(initial, "[ွ/]", "")
		final = "ွ" .. final
	else
		initial = gsub(initial, "/", "")
	end
	
	initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial
	
	if indep_letter_table[initial_new] then
		initial_new = match(initial_new, "[၌၍]") and "-" or ""
		final = initial .. final
	end
	if initial_new == "မြွ" then require('Module:debug').track('rki-pron/mrw') end

	initial_data = 
		initial_table[initial_new]
		or initial_table[gsub(initial_new, "[%+%-%*]", "")]
		or (system["type"] == "orthographic" 
			and initial_by_char(initial_new, system_index, initial_table)
			or error("Initial data not found."))
		
	initial_value = initial_data[system_index] or initial_data

	if match(initial, "^%+") and system_index == 5 then
		initial_value = initial_table[gsub(initial, "%+", "")][system_index]
		initial_value = gsub(initial_value, "^([^rwy]+)", "<u>%1</u>")
	end

	final_data =
		final_table[system["type"] .. schwa == "phonetic'" and schwa or final]
		or (system["type"] == "phonetic" 
			and (final_table[final .. "်"] or indep_letter_table[final]) 
			or indep_letter_table[final])
		or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third) 
			first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first
			second_data = initial_table[second] or second
			first = first_data ~= first and first_data[system_index] or first
			second = second_data ~= second
				and second_data[system_index] .. ((system_index == 3 and third ~= "") and "ʻ" or "")
				or second
			return (gsub(first .. second, "([%.:])(.*)", "%2"))
			end)
		
	final_value = type(final_data) == "table" and final_data[system_index] or final_data
	final_value = mw.ustring.toNFD(final_value)
	if tone == "" then
		tone_value = ""
	else
		if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end
		final_value = gsub(final_value, "[́:%.]", "")
		if system["type"] .. schwa == "phonetic'" then
			tone_value = ""
		else
			tone_data = tone_table[tone] or error("Tone data not found.")
			tone_value = tone_data[system_index]
		end
	end

	if system_index == 1 then
		final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value)
	elseif system_index == 5 then
		final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2")
	else
		final_value = final_value .. tone_value
	end
	
	return mw.ustring.toNFC(initial_value .. final_value)
end

function remove_wide_space(text)
	return (gsub(text, " ", ""))
end

function concatenate(set, system_index)
	if system_index == 1 then return remove_wide_space(table.concat(set)) end
	result_text = remove_wide_space(table.concat(set, " "))
	
	for count = 1, 3 do
		result_text = gsub(result_text, "(.) (.)([^ ]?)",
			function(previous, next, after_next)
				if ambig_intersyl[system_index][previous .. next] 
				or ((system_index == 2 or system_index == 4)
					and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]")
					or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then
						return previous .. "-" .. next .. after_next
				else
					return previous .. next .. after_next
				end
			end)
	end
		
	return result_text
end

function export.get_romanisation(word, pronunciations, system, system_index, mode)
	local sentences = {}
	word = gsub(word, " ", "|")
	if system["type"] == "phonetic" then
		word = gsub(word, "ဿ", "တ်သ")
	end
	word = syllabify(word)
	word = gsub(word, "ါ", "ာ")
	if system["type"] == "phonetic" then
		word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1")
	end
	for phrase in mw.text.gsplit(word, "|", true) do
		local temp = {}
		local syllable = mw.text.split(phrase, " ", true)
		for syllable_index = 1, #syllable do
			syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1")
			temp[syllable_index] = gsub(
				syllable[syllable_index], 
				"^([%+%-%*]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍ဿ][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",
				function(initial, final, tone, schwa)
					return process(initial, final, tone, schwa, system, system_index)
				end)
		end
		table.insert(sentences, concatenate(temp, system_index))
	end
	if mode == "translit_module" then return table.concat(sentences, " ") end
	table.insert(pronunciations[system_index], table.concat(sentences, " "))
	return pronunciations[system_index]
end

function respelling_format(phonetic, page_title)
	local page_title_set = mw.text.split(syllabify(page_title), " ")
	local new_respellings = {}
	for _, respelling in ipairs(phonetic) do
		local respelling_set = mw.text.split(syllabify(respelling), " ")
		if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then
			for index, element in ipairs(respelling_set) do
				if element ~= page_title_set[index] then
					respelling_set[index] = '<span style="font-size:110%; color:#A32214; font-weight: bold">' .. element .. '</span>'
				end
			end
		end
		table.insert(new_respellings, table.concat(respelling_set))
	end
	text = table.concat(new_respellings, ", ")
	text = remove_wide_space(text)
	text = gsub(text, "[%+%-].", initial_voicing)
	text = gsub(text, "([ခဂငဒပဝ]ေ?)ာ", "%1ါ")
	return text
end

function export.generate_tests(word, respelling)
	respelling, word = generate_respelling(respelling), generate_respelling(word)
	local pronunciations = {
		[1] = {},
		[2] = {},
		[3] = {},
		[4] = {},
		[5] = {},
	}
	local p, result = { ["orthographic"] = word, ["phonetic"] = respelling or word }, {}
	
	table.sort(system_list, function(first, second) return first[1] < second[1] end)
	for system_index, system in ipairs(system_list) do
		pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index)
	end
	for system_index = 1, 5 do
		table.insert(result, table.concat(pronunciations[system_index]))
	end
	return (gsub(gsub(table.concat(result, " | "), "<u>", "("), "</u>", ")"))
end

function export.make(frame)
	local args = frame:getParent().args
	local page_title = mw.title.getCurrentTitle().text
	local title = generate_respelling(args["word"] or page_title)
	
	local p, result = { ["orthographic"] = { title }, ["phonetic"] = {} }, {}
	local pronunciations = {
		[1] = {},
		[2] = {},
		[3] = {},
		[4] = {},
		[5] = {},
	}

	if not args[1] then args = { title } end
	for index, item in ipairs(args) do
		table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil)
	end
	
	table.sort(system_list, function(first, second) return first[1] < second[1] end)
	for system_index, system in ipairs(system_list) do
		for _, word in ipairs(p[system["type"]]) do
		 	pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index)
		end
	end
	
	if title ~= table.concat(args) then
		table.insert(result, 
			"* Phonetic respelling" .. (#p["phonetic"] > 1 and "s" or "") .. ": " ..
			tostring( mw.html.create( "span" )
				:attr( "lang", "rki" )
				:attr( "class", "Mymr" )
				:wikitext( respelling_format( p["phonetic"], page_title ))) .. "\n" )
	end

	table.insert(result,
		'* [[Wiktionary:International Phonetic Alphabet|IPA]]' ..
		'<sup>([[Appendix:Burmese pronunciation|key]])</sup>: ' ..
		
		(tostring( mw.html.create( "span" )
			:attr( "class", "IPA" )
			:wikitext( "/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/" ))) ..
		
		'\n* [[Wiktionary:Burmese transliteration|Romanization:]] ')
		
	for system_index = 2, 5 do
		table.insert(result, 
			(system_index ~= 2 and " • " or "") ..
			"''" .. system_list[system_index]["name"] .. ":'' " .. 
			table.concat(pronunciations[system_index], "/"))
	end
	
	return table.concat(result)
end

return export