Module:User:Verdy p/my-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Burmese pronunciation and romanisation module. See {{my-IPA}}.

Testcases

[edit]

See Module:User:Verdy p/my-pron.


local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match

local system_list = {
	{ --[[1,]] name = 'IPA',      type = 'phonetic',     },
	{ --[[2,]] name = 'MLCTS',    type = 'orthographic', },
	{ --[[3,]] name = 'ALA-LC',   type = 'orthographic', },
	{ --[[4,]] name = 'BGN/PCGN', type = 'phonetic',     },
	{ --[[5,]] name = 'Okell',    type = 'phonetic',     },
}
-- This sort order is assumed in both export.generate_tests and export.make functions, so do it only once
-- but actually the table is already sorted by its first element which is also its index position,
-- so this is not even needed (and storing the system_index in table is not even needed, so it was commented out):
--table.sort(system_list, function(a, b) return a[1] < b[1] end)

--[[
Helper tables for phonetic transcriptions or orthographic romanisations:
- initial_voicing:    from Myanmar initial consonnants to de/voiced Myanmar consonnants
- initial_table:      from Myanmar consonnants to phonetic/orthographic initial clusters
- nucleus_table:      from Myanmar central vowels to phonetic/orthographic central vowels
- final_table:        from Myanmar vowel+trailers to phonetic/orthographic final clusters
- indep_letter_table: from Myanmar independant vowel to phonetic/orthographic vowels with diacritics or syllables
- tone_table:         from Myanmar preposed tone diacritics to phonetic/orthographic diacritics or modifiers
- ambig_intersyl:     pairs of letters that must be hyphen-separated in orthographic romanizations to preserve Myanmar syllables
]]
local initial_voicing = {
	['+က']	= 'ဂ',	['+ခ']	= 'ဂ',
	['+စ']	= 'ဇ',	['+ဆ']	= 'ဇ',
	['+ဋ']	= 'ဍ',	['+ဌ']	= 'ဍ',
	['+တ']	= 'ဒ',	['+ထ']	= 'ဒ',
	['+ပ']	= 'ဗ',	['+ဖ']	= 'ဗ',
	['-ဘ']	= 'ဖ',
}
local initial_table = {
	--[[	   		PA      MLCTS    ALA-LC  BGN/PCGN Okell  ]]
	['က']		= { 'k',    'k',     'k',    'k',     'k'     },
	['ကျ']		= { 't͡ɕ',   'ky',    'ky',   'ky',    'c'     },
	['ကြ']		= { 't͡ɕ',   'kr',    'kr',   'ky',    'c'     },
	['ကျွ']		= { 't͡ɕw',  'kyw',   'kyv',  'kyw',   'cw'    },
	['ကြွ']		= { 't͡ɕw',  'krw',   'krv',  'kyw',   'cw'    },
	['ကွ']		= { 'kw',   'kw',    'kv',   'kw',    'kw'    },
	['ခ']		= { 'kʰ',   'hk',    'kh',   'hk',    'hk'    },
	['ချ']		= { 't͡ɕʰ',  'hky',   'khy',  'ch',    'hc'    },
	['ခြ']		= { 't͡ɕʰ',  'hkr',   'khr',  'ch',    'hc'    },
	['ချွ']		= { 't͡ɕʰw', 'hkyw',  'khyv', 'chw',   'hcw'   },
	['ခြွ']		= { 't͡ɕʰw', 'hkrw',  'khrv', 'chw',   'hcw'   },
	['ခွ']		= { 'kʰw',  'hkw',   'khv',  'hkw',   'hkw'   },
	['ဂ']		= { 'ɡ',    'g',     'g',    'g',     'g'     },
	['ဂျ']		= { 'd͡ʑ',   'gy',    'gy',   'gy',    'j'     },
	['ဂြ']		= { 'd͡ʑ',   'gr',    'gr',   'gy',    'j'     },
	['ဂျွ']		= { 'd͡ʑw',  'gyw',   'gyv',  'gyw',   'jw'    },
	['ဂွ']		= { 'ɡw',   'gw',    'gv',   'gw',    'gw'    },
	['ဃ']		= { 'ɡ',    'gh',    'gh',   'g',     'g'     },
	['င']		= { 'ŋ',    'ng',    'ṅ',    'ng',    'ng'    },
	['ငှ']		= { 'ŋ̊',    'hng',   'ṅh',   'hng',   'hng'   },
	['ငြ']		= { 'ɲ',    'ngr',   'ṅr',   'ny',    'ny'    },
	['ငြှ']		= { 'ɲ̊',    'hngr',  'ṅrh',  'hny',   'hny'   },
	['ငွ']		= { 'ŋw',   'ngw',   'ṅv',   'ngw',   'ngw'   },
	['ငွှ']		= { 'ŋ̊w',   'hngw',  'ṅvh',  'hngw',  'hngw'  },
	['စ']		= { 's',    'c',     'c',    's',     's'     },
	['စွ']		= { 'sw',   'cw',    'cv',   'sw',    'sw'    },
	['ဆ']		= { 'sʰ',   'hc',    'ch',   'hs',    'hs'    },
	['ဆွ']		= { 'sʰw',  'hcw',   'chv',  'hsw',   'hsw'   },
	['ဇ']		= { 'z',    'j',     'j',    'z',     'z'     },
	['ဇွ']		= { 'zw',   'jw',    'jv',   'zw',    'zw'    },
	['ဈ']		= { 'z',    'jh',    'jh',   'z',     'z'     },
	['ဉ']		= { 'ɲ',    'ny',    'ñ',    'ny',    'ny'    },
	['ည']		= { 'ɲ',    'ny',    'ññ',   'ny',    'ny'    },
	['ဉှ']		= { 'ɲ̊',    'hny',   'ñh',   'hny',   'hny'   },
	['ညှ']		= { 'ɲ̊',    'hny',   'ññh',  'hny',   'hny'   },
	['ညွ']		= { 'ɲw',   'nyw',   'ñv',   'nyw',   'nyw'   },
	['ညွှ']		= { 'ɲ̊w',   'hnyw',  'ñvh',  'hnyw',  'hnyw'  },
	['ဋ']		= { 't',    't',     'ṭ',    't',     't'     },
	['ဌ']		= { 'tʰ',   'ht',    'ṭh',   'ht',    'ht'    },
	['ဍ']		= { 'd',    'd',     'ḍ',    'd',     'd'     },
	['ဎ']		= { 'd',    'dh',    'ḍh',   'd',     'd'     },
	['ဏ']		= { 'n',    'n',     'ṇ',    'n',     'n'     },
	['ဏှ']		= { 'n̥',    'hn',    'ṇh',   'hn',    'hn'    },
	['တ']		= { 't',    't',     't',    't',     't'     },
	['တျ']		= { 'tj',   'ty',    'ty',   'ty',    'ty'    },
	['တြ']		= { 'tɹ',   'tr',    'tr',   'tr',    'tr'    },
	['တွ']		= { 'tw',   'tw',    'tv',   'tw',    'tw'    },
	['ထ']		= { 'tʰ',   'ht',    'th',   'ht',    'ht'    },
	['ထွ']		= { 'tʰw',  'htw',   'thv',  'htw',   'htw'   },
	['ဒ']		= { 'd',    'd',     'd',    'd',     'd'     },
	['ဒျ']		= { 'dj',   'dy',    'dy',   'dy',    'dy'    },
	['ဒြ']		= { 'dɹ',   'dr',    'dr',   'dr',    'dr'    },
	['ဒွ']		= { 'dw',   'dw',    'dv',   'dw',    'dw'    },
	['ဓ']		= { 'd',    'dh',    'dh',   'd',     'd'     },
	['န']		= { 'n',    'n',     'n',    'n',     'n'     },
	['နှ']		= { 'n̥',    'hn',    'nh',   'hn',    'hn'    },
	['နွ']		= { 'nw',   'nw',    'nv',   'nw',    'nw'    },
	['နွှ']		= { 'n̥w',   'hnw',   'nvh',  'hnw',   'hnw'   },
	['ပ']		= { 'p',    'p',     'p',    'p',     'p'     },
	['ပျ']		= { 'pj',   'py',    'py',   'py',    'py'    },
	['ပြ']		= { 'pj',   'pr',    'pr',   'py',    'py'    },
	['ပြွ']		= { 'pw',   'prw',   'prv',  'pw',    'pw'    },
	['ပွ']		= { 'pw',   'pw',    'pv',   'pw',    'pw'    },
	['ဖ']		= { 'pʰ',   'hp',    'ph',   'hp',    'hp'    },
	['ဖျ']		= { 'pʰj',  'hpy',   'phy',  'hpy',   'hpy'   },
	['ဖြ']		= { 'pʰj',  'hpr',   'phr',  'hpy',   'hpy'   },
	['ဖွ']		= { 'pʰw',  'hpw',   'phv',  'hpw',   'hpw'   },
	['ဗ']		= { 'b',    'b',     'b',    'b',     'b'     },
	['ဗျ']		= { 'bj',   'by',    'by',   'by',    'by'    },
	['ဗြ']		= { 'bj',   'br',    'br',   'by',    'by'    },
	['ဗွ']		= { 'bw',   'bw',    'bv',   'bw',    'bw'    },
	['ဘ']		= { 'b',    'bh',    'bh',   'b',     'b'     },
	['-ဘ']		= { 'pʰ',   'bh',    'bh',   'hp',    'hp'    },
	['ဘွ']		= { 'bw',   'bhw',   'bhv',  'bw',    'bw'    },
	['-ဘွ']		= { 'pʰw',  'bhw',   'bhw',  'hpw',   'hpw'   },
	['မ']		= { 'm',    'm',     'm',    'm',     'm'     },
	['မှ']		= { 'm̥',    'hm',    'mh',   'hm',    'hm'    },
	['မျ']		= { 'mj',   'my',    'my',   'my',    'my'    },
	['မျှ']		= { 'm̥j',   'hmy',   'myh',  'hmy',   'hmy'   },
	['မြ']		= { 'mj',   'mr',    'mr',   'my',    'my'    },
	['မြှ']		= { 'm̥j',   'hmr',   'mrh',  'hmy',   'hmy'   },
	['မြွ']		= { 'mjw',  'mrw',   'mrv',  'myw',   'myw'   },
	['မြွှ']		= { 'm̥w',   'hmrw',  'mrvh', 'hmw',   'hmw'   },
	['မွ']		= { 'mw',   'mw',    'mv',   'mw',    'mw'    },
	['မွှ']		= { 'm̥w',   'hmw',   'mvh',  'hmw',   'hmw'   },
	['ယ']		= { 'j',    'y',     'y',    'y',     'y'     },
	['ယှ']		= { 'ʃ',    'hy',    'yh',   'sh',    'hy'    },
	['သျှ']		= { 'ʃ',    'hsy',   'syh',  'sh',    'hy'    },
	['ယွ']		= { 'jw',   'yw',    'yv',   'yw',    'yw'    },
	['ရ']		= { 'j',    'r',     'r',    'y',     'y'     },
	['*ရ']		= { 'ɹ',    'r',     'r',    'r',     'r'     },
	['ရှ']		= { 'ʃ',    'hr',    'rh',   'sh',    'hy'    },
	['ရွ']		= { 'jw',   'rw',    'rv',   'yw',    'yw'    },
	['ရွှ']		= { 'ʃw',   'hrw',   'rvh',  'shw',   'hyw'   },
	['လ']		= { 'l',    'l',     'l',    'l',     'l'     },
	['လှ']		= { 'l̥',    'hl',    'lh',   'hl',    'hl'    },
	['လျ']		= { 'j',    'ly',    'ly',   'y',     'y'     },
	['+သျှ']		= { 'j',    'hsy',   'syh',  'y',     'y'     },
	['*လျ']		= { 'lj',   'ly',    'ly',   'ly',    'ly'    },
	['လျှ']		= { 'ʃ',    'hly',   'lyh',  'sh',    'hy'    },
	['*လျှ']		= { 'l̥j',   'hly',   'lyh',  'hly',   'hly'   },
	['လွ']		= { 'lw',   'lw',    'lv',   'lw',    'lw'    },
	['လွှ']		= { 'l̥w',   'hlw',   'lvh',  'hlw',   'hlw'   },
	['ဝ']		= { 'w',    'w',     'v',    'w',     'w'     },
	['ဝှ']		= { 'ʍ',    'hw',    'vh',   'hw',    'hw'    },
	['သ']		= { 'θ',    's',     's',    'th',    'th'    },
	['+သ']		= { 'ð',    's',     's',    'dh',    'th'    },
	['သွ']		= { 'θw',   'sw',    'sv',   'thw',   'thw'   },
	['+သွ']		= { 'ðw',   'sw',    'sw',   'dhw',   'thw'   },
	['ဟ']		= { 'h',    'h',     'h',    'h',     'h'     },
	['ဟွ']		= { 'hw',   'hw',    'hv',   'hw',    'hw'    },
	['ဠ']		= { 'l',    'l',     'ḷ',    'l',     'l'     },
	['အ']		= { 'ʔ',    '',      '’',    '',      ''      },
	['ဿ']		= { nil,    'ss',    'ss',   nil,     nil     },
	['']		= { 'ʔ',    '',      '',     '',      ''      },
	['-']		= { '',     '',      '',     '',      ''      },
	['ျ']		= { nil,    'y',     'y',    nil,     nil     },
	['ြ']		= { nil,    'r',     'r',    nil,     nil     },
	['ွ']		= { nil,    'w',     'w',    nil,     nil     },
}
local final_table = {
	--[[	    	IPA     MLCTS    ALA-LC  BGN/PCGN Okell  ]]
	['']		= { 'a̰',    'a.',    'a',    'a.',    'á'     },
	['က်']		= { 'ɛʔ',   'ak',    'ak‘',  'et',    'eʔ'    },
	['င်']		= { 'ɪ̀ɴ',   'ang',   'aṅ‘',  'in',    'iñ'    },
	['စ်']		= { 'ɪʔ',   'ac',    'ac‘',  'it',    'iʔ'    },
	['ည်']		= { 'ì',    'any',   'aññ‘', 'i',     'i'     },
	['ည်2']		= { 'è',    'any',   'aññ‘', 'e',     'ei'    },
	['ည်3']		= { 'ɛ̀',    'any',   'aññ‘', 'è',     'e'     },
	['ဉ်']		= { 'ɪ̀ɴ',   'any',   'añ‘',  'in',    'iñ'    },
	['တ်']		= { 'aʔ',   'at',    'at‘',  'at',    'aʔ'    },
	['န်']		= { 'àɴ',   'an',    'an‘',  'an',    'añ'    },
	['ပ်']		= { 'aʔ',   'ap',    'ap‘',  'at',    'aʔ'    },
	['မ်']		= { 'àɴ',   'am',    'am‘',  'an',    'añ'    },
	['ယ်']		= { 'ɛ̀',    'ai',    'ay‘',  'è',     'e'     },
	['ံ']		= { 'àɴ',   'am',    'aṃ',   'an',    'añ'    },
	['ာ']		= { 'à',    'a',     'ā',    'a',     'a'     },
	['ါ']		= { 'à',    'a',     'ā',    'a',     'a'     },
	['ိ']		= { 'ḭ',    'i.',    'i',    'i.',    'í'     },
	['ိတ်']		= { 'eɪʔ',  'it',    'it‘',  'eik',   'eiʔ'   },
	['ိန်']		= { 'èɪɴ',  'in',    'in‘',  'ein',   'eiñ'   },
	['ိပ်']		= { 'eɪʔ',  'ip',    'ip‘',  'eik',   'eiʔ'   },
	['ိမ်']		= { 'èɪɴ',  'im',    'im‘',  'ein',   'eiñ'   },
	['ိံ']		= { 'èɪɴ',  'im',    'iṃ',   'ein',   'eiñ'   },
	['ီ']		= { 'ì',    'i',     'ī',    'i',     'i'     },
	['ု']		= { 'ṵ',    'u.',    'u',    'u.',    'ú'     },
	['ုတ်']		= { 'oʊʔ',  'ut',    'ut‘',  'ok',    'ouʔ'   },
	['ုန်']		= { 'òʊɴ',  'un',    'un‘',  'on',    'ouñ'   },
	['ုပ်']		= { 'oʊʔ',  'up',    'up‘',  'ok',    'ouʔ'   },
	['ုမ်']		= { 'òʊɴ',  'um',    'um‘',  'on',    'ouñ'   },
	['ုံ']		= { 'òʊɴ',  'um',    'uṃ',   'on',    'ouñ'   },
	['ူ']		= { 'ù',    'u',     'ū',    'u',     'u'     },
	['ေ']		= { 'è',    'e',     'e',    'e',     'ei'    },
	['ဲ']		= { 'ɛ́',    'ai:',   'ai',   'è:',    'è'     },
	['ော']		= { 'ɔ́',    'au:',   'o',    'aw:',   'ò'     },
	['ောက်']	= { 'aʊʔ',  'auk',   'ok‘',  'auk',   'auʔ'   },
	['ောင်']		= { 'àʊɴ',  'aung',  'oṅ‘',  'aung',  'auñ'   },
	['ော်']		= { 'ɔ̀',    'au',    'o‘',   'aw',    'o'     },
	['ို']		= { 'ò',    'ui',    'ui',   'o',     'ou'    },
	['ိုက်']		= { 'aɪʔ',  'uik',   'uik‘', 'aik',   'aiʔ'   },
	['ိုင်']		= { 'àɪɴ',  'uing',  'uiṅ‘', 'aing',  'aiñ'   },
	['ွတ်']		= { 'ʊʔ',   'wat',   'vat‘', 'ut',    'uʔ'    },
	['ွန်']		= { 'ʊ̀ɴ',   'wan',   'van‘', 'un',    'uñ'    },
	['ွပ်']		= { 'ʊʔ',   'wap',   'vap‘', 'ut',    'uʔ'    },
	['ွမ်']		= { 'ʊ̀ɴ',   'wam',   'vam‘', 'un',    'uñ'    },
	['ွံ']		= { 'ʊ̀ɴ',   'wam',   'vaṃ',  'un',    'uñ'    },
	["'"]		= { 'ə',    'a',     'a',    'ă',     'ă'     },
	['်']		= { '',     '',      '‘',    '',      ''      },
}
local nucleus_table = {
	--[[		    IPA     MLCTS    ALA-LC  BGN/PCGN Okell  ]]
	['']		= { 'à',    'a',     'a',    'a',     'a'     },
	['ိ']		= { 'ì',    'i',     'i',    'i',     'i'     },
	['ု']		= { 'ù',    'u',     'u',    'u',     'u'     },
	['ော']		= { 'ɔ̀',    'au',    'o',    'aw',    'o'     },
	['ေါ']		= { 'ɔ̀',    'au',    'o',    'aw',    'o'     },
	['ွ']		= { 'ʊ̀',    'wa',    'va',   'u',     'u'     },
}
local indep_letter_table = {
	--[[		    IPA     MLCTS    ALA-LC  BGN/PCGN Okell  ]]
	['ဣ']		= { 'ḭ',    'i.',    'i',    'i.',    'í'     },
	['ဤ']		= { 'ì',    'i',     'ī',    'i',     'i'     },
	['ဥ']		= { 'ṵ',    'u.',    'u',    'u.',    'ú'     },
	['ဦ']		= { 'ù',    'u',     'ū',    'u',     'u'     },
	['ဧ']		= { 'è',    'e',     'e',    'e',     'ei'    },
	['၏']		= { 'ɛ̰',    'e',     'e*',   'è.',    'é'     },
	['ဩ']		= { 'ɔ́',    'au:',   'o',    'aw:',   'ò'     },
	['ဪ']	= { 'ɔ̀',    'au',    'o‘',   'aw',    'o'     },
	['၌']		= { 'n̥aɪʔ', 'hnai.', 'n*',   'hnaik', 'hnaiʔ' },
	['၍']		= { 'jwḛ',  'rwe',   'r*',   'ywe.',  'yweí'  },
}
local tone_table = {
	--[[		    IPA     MLCTS    ALA-LC  BGN/PCGN Okell  ]]
	['း']		= { '́',     ':',     '″',    ':',     '̀'      },
	['့']		= { '̰',     '.',     '′',    '.',     '́'      },
}
local ambig_intersyl = {
	[1]	= {},	--[[IPA phonetic]]
	[2]	= {		--[[MLCTS orthographic]]
		ky = 1, kr = 1, kw = 1,
		gy = 1, gr = 1, gw = 1,
		ny = 1, ng = 1, 
		cw = 1, tw = 1, nw = 1,
		py = 1, pr = 1, pw = 1,
		my = 1, mr = 1, mw = 1,
	},
	[3]	= {},	--[[ALA-LC orthographic]]
	[4]	= {		--[[BGN/PCGN phonetic]]
		ky = 1, kr = 1, kw = 1,
		gy = 1, gr = 1, gw = 1,
		ny = 1, ng = 1, 
		cw = 1, tw = 1, nw = 1,
		tr = 1, tw = 1,
		py = 1, pr = 1, pw = 1,
		my = 1, mr = 1, mw = 1,
	},
	[5]	= {		--[[Okonn phonetic]]
		ou = 1,
	},
}
--[[
Helper tables for reverse transcriptions from romanisations to Myanmar script:
Used to exhibit cases where phonetic transcriptions or romanizations are not reversible.
The following is made for the BGN/PCGN romanization.
]]
local reverse_table = {
	k		= 'က',		hk		= 'ခ',	g		= 'ဂ',
	kw		= 'ကွ',		hkw		= 'ခွ',	gw		= 'ဂွ',
	c		= 'ကျ',		hc		= 'ချ',	j		= 'ဂျ',
	cw		= 'ကျွ',		hcw		= 'ချွ',	jw		= 'ဂျွ',
	ng		= 'င',		hng		= 'ငှ',
	ngw		= 'ငွ',		hngw	= 'ငွှ',
	s		= 'စ',		hs		= 'ဆ', z		= 'ဇ',
	sw		= 'စွ',		hsw		= 'ဆွ',	zw		= 'ဇွ',
	ny		= 'ည',		hny		= 'ညှ',
	nyw		= 'ညွ',		hnyw	= 'ညွှ',
	t		= 'တ',		ht		= 'ထ',	d		= 'ဒ',
	tw		= 'တွ',		htw		= 'ထွ',	dw		= 'ဒွ',
	n		= 'န',		hn		= 'နှ',
	nw		= 'နွ',		hnw		= 'နွှ',
	p		= 'ပ',		hp		= 'ဖ',	b		= 'ဗ',
	py		= 'ပျ',		hpy		= 'ဖျ',	by		= 'ဗျ',
	pw		= 'ပွ',		hpw		= 'ဖွ',	bw		= 'ဗွ',
	m		= 'မ',		hm		= 'မှ',
	my		= 'မျ',		hmy		= 'မျှ',
	mw		= 'မွ',		hmw		= 'မွှ',
	y		= 'ယ',		hy		= 'ရှ',
	yw		= 'ယွ',		hyw		= 'ရွှ',
	r		= '*ရ',
	th		= 'သ',		['+th']	= '+သ',
	thw		= 'သွ',
	l		= 'လ',		hl		= 'လှ',
	ly		= '*လျ',	hly		= '*လျှ',
	lw		= 'လွ',		hlw		= 'လွှ',
	w		= 'ဝ',		hw		= 'ဝှ',	--[[FIXME: duplicate mapping, which one is correct?]]
	h		= 'ဟ',		hw		= 'ဟွ',	--[[FIXME: duplicate mapping, which one is correct?]]
	['']	= 'အ',
	  a		= 'ာ',		['a\\']		= 'ား',		['a/']		= '',		['a?']	= 'တ်',
	['a~']	= 'န်',		['a\\~']	= 'န်း',		['a/~']		= 'န့်',
	  i		= 'ီ',		['i\\']		= 'ီး',		['i/']		= 'ိ',		['i?']	= 'စ်',
	['i~']	= 'င်',		['i\\~']	= 'င်း',		['i/~']		= 'င့်',
	  ei	= 'ေ',		['ei\\']	= 'ေး',		['ei/']		= 'ေ့',		['ei?']	= 'ိတ်',
	['ei~'] = 'ိန်',		['ei\\~']	= 'ိန်း',		['ei/~']	= 'ိန့်',
	  e		= 'ယ်',		['e\\']		= 'ဲ',		['e/']		= 'ယ့်',		['e?']	= 'က်',
	['ai~']	= 'ိုင်',		['ai\\~']	= 'ိုင်း',		['ai/~']	= 'ိုင့်',		['ai?']	= 'ိုက်',
	  o		= 'ော်',		['o\\']		= 'ော',		['o/']		= 'ော့',	['au?']	= 'ောက်',
	['au~']	= 'ောင်',	['au\\~']	= 'ောင်း',	['au/~']	= 'ောင့်',
	  ou	= 'ို',		['ou\\']	= 'ိုး',		['ou/']		= 'ို့',		['ou?']	= 'ုပ်',
	['ou~']	= 'ုန်',		['ou\\~']	= 'ုန်း',		['ou/~']	= 'ုန့်',
	  u		= 'ူ',		['u\\']		= 'ူး',		['u/']		= 'ု',		['u?']	= 'ွတ်',
	['u~']	= 'ွန်',		['u\\~']	= 'ွန်း',		['u/~']		= 'ွန့်',
	["a'"]	= "'",
}

function syllabify(text)
	text = text
		:gsub("('?)([%+%-%*]*)", function(a, b)
			if a .. b ~= '' then return a .. ' ' .. b end
		end)
		:gsub('([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)', function(a, b, c)
			return
				c == '္' and ' ' .. a ..        b .. ' ' .. c or
				c == '်' and ' ' .. a ..        b ..        c or
							' ' .. a .. ' ' .. b ..        c
		end) .. ' '
	text = text
		:gsub("(်း?'?)",                                       '%1 ')
		:gsub('([း့])([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ]်)', '%2%1')
	local breaker = '([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][့]?[^့်္])'
	while match(text, breaker) do
		text = text:gsub(breaker,  '%1 %2')
	end
	text = text
		:gsub('္',              ' , ')
		:gsub(' +',             ' '  )
		:gsub('^ ?(.*[^ ]) ?$', '%1' )
		:gsub(' , ',            ' '  )
		:gsub(' ([23])',        '%1' )
	return text
end

function initial_by_char(initial_string, system_index, ref_table)
	local initial_set = {}
	for character in mw.text.gsplit(initial_string, '') do
		local temp_initial = ref_table[character] or error('Initial data not found.')
		table.insert(initial_set, temp_initial[system_index] or temp_initial)
	end
	return table.concat(initial_set)
end

function process(initial, final, tone, schwa, system, system_index)
	if system.type == 'phonetic' and match(initial .. final, 'ွှ?[တနပမံ]') then
		initial = gsub(initial, '[ွ/]', '')
		final = 'ွ' .. final
	else
		initial = gsub(initial, '/', '')
	end

	local initial_new = system.type == 'phonetic' and gsub(initial, '%+.', initial_voicing) or initial
	if indep_letter_table[initial_new] then
		initial_new = match(initial_new, '[၌၍]') and '-' or ''
		final = initial .. final
	end
	if initial_new == 'မြွ' then require('Module:debug').track('my-pron/mrw') end

	local initial_data =
		initial_table[initial_new]
	or 	initial_table[gsub(initial_new, '[%+%-%*]', '')]
	or	system.type == 'orthographic' and initial_by_char(initial_new, system_index, initial_table)
	or	error('Initial data not found.')

	local initial_value =
		initial_data[system_index]
	or	initial_data
	if match(initial, '^%+') and system_index == 5 then
		initial_value = initial_table[gsub(initial, '%+', '')][system_index]
		initial_value = gsub(initial_value, '^([^rwy]+)', '<u>%1</u>')
	end

	local final_value =
			final_table[system.type .. schwa == "phonetic'" and schwa or final]
	or	system.type == 'phonetic' and
			final_table[final .. '်']
	or		indep_letter_table[final]
	or		gsub(final, '^([^်]*)([^်])(်?)$', function(first, second, third)
				first_data =
					nucleus_table[first]
				or	final_table[first]
				or	indep_letter_table[first]
				or	first
				second_data =
					initial_table[second]
				or	second
				first =
					first_data ~= first and first_data[system_index]
				or	first
				second = second_data ~= second
					and second_data[system_index] .. ((system_index == 3 and third ~= '') and '‘' or '')
					or second
				return (gsub(first .. second, '([%.:])(.*)', '%2'))
			end)
	final_value = mw.ustring.toNFD(
		type(final_value) == 'table' and final_value[system_index] or final_value
		)

	if tone == '' then
		tone_value = ''
	else
		if system_index ~= 4 then final_value = gsub(final_value, '̀', '') end
		final_value = gsub(final_value, '[́:%.]', '')
		if system.type .. schwa == "phonetic'" then
			tone_value = ''
		else
			tone_data = tone_table[tone] or error('Tone data not found.')
			tone_value = tone_data[system_index]
		end
	end

	if system_index == 1 then
		final_value = gsub(final_value, '^([aeəɛiɪoɔuʊ])', '%1' .. tone_value)
	elseif system_index == 5 then
		final_value = gsub(final_value, '([aeiou])([^aeiou]*)$', '%1' .. tone_value .. '%2')
	else
		final_value = final_value .. tone_value
	end
	
	return mw.ustring.toNFC(initial_value .. final_value)
end

function remove_wide_space(text)
	return gsub(text, ' ', '')
end

function concatenate(set, system_index)
	if system_index == 1 then
		return remove_wide_space(table.concat(set))
	else
		local result = remove_wide_space(table.concat(set, ' '))
		for count = 1, 3 do
			result = gsub(result, '(.) (.)([^ ]?)',
				function(previous, next, after_next)
					if	ambig_intersyl[system_index][previous .. next]
					or	(system_index == 2 or system_index == 4)
						and	(	match(previous .. ' ' .. next, '[ptkgmngy] [aeiou]')
							or	match(previous .. next .. after_next, '[aeiou][ptkmn][rwyg]')
								and not match(after_next, '[aeiou]')
							)
					then
						return previous .. '-' .. next .. after_next
					else
						return previous .. next .. after_next
					end
				end)
		end
		return result
	end
end

function export.get_romanisation(word, pronunciations, system, system_index, mode)
	word = syllabify(word:gsub(' ', '|')):gsub('ါ', 'ာ')
	if system.type == 'phonetic' then
		word = word:gsub('ဝ([တနပမံ])', 'ဝွ%1')
	end
	local sentences = {}
	for phrase in mw.text.gsplit(word, '|', true) do
		local temp, syllable = {}, mw.text.split(phrase, ' ', true)
		for syllable_index = 1, #syllable do
			syllable[syllable_index] = gsub(syllable[syllable_index], '([း့])(်)', '%2%1')
			temp[syllable_index] = gsub(
				syllable[syllable_index],
				"^([%+%-%*]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍ဿ][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",
				function(initial, final, tone, schwa)
					return process(initial, final, tone, schwa, system, system_index)
				end)
		end
		table.insert(sentences, concatenate(temp, system_index))
	end
	sentences = table.concat(sentences, ' ')
	if mode ~= 'translit_module' then
		table.insert(pronunciations[system_index], sentences)
	end
	return sentences
end

function generate_respelling(text)
	text = text:gsub(' ', '  '):gsub('ါ', 'ာ')
	if match(text, '[က-႟ꩠ-ꩻ]') then return text end
	text = text
		:gsub("(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)",
			function(voicing_mark, latin_initial, opt_sep, latin_final)
				return
					voicing_mark ..
					(	reverse_table[latin_initial] or
						initial_by_char(latin_initial, nil, reverse_table)
					) ..
					opt_sep ..
					reverse_table[latin_final]
			end)
	return text
end

function export.generate_tests(word, respelling)
	word = word:generate_respelling()
	local p = {
		orthographic	= word,
		phonetic		= respelling:generate_respelling() or word
	}
	local result = {}
	for system_index, system in ipairs(system_list) do
		local pronunciations = export.get_romanisation(p[system.type], pronunciations, system, system_index)
		table.insert(result, table.concat(pronunciations))
	end
	return table.concat(result, ' | '):gsub('<u>', '('):gsub('</u>', ')')
end

function respelling_format(phonetic, page_title)
	local page_title_set = mw.text.split(syllabify(page_title), ' ')
	local new_respellings = {}
	for _, respelling in ipairs(phonetic) do
		local respelling_set = mw.text.split(syllabify(respelling), ' ')
		if table.concat(respelling_set):gsub("[%+%-%*']", '') == table.concat(page_title_set):gsub('ါ', 'ာ') then
			for index, element in ipairs(respelling_set) do
				if element ~= page_title_set[index] then
					respelling_set[index] = tostring(mw.html.create('b'):attr('style', 'font-size:110%;color:#A32214'):wikitext(element))
				end
			end
		end
		table.insert(new_respellings, table.concat(respelling_set))
	end
	return table.concat(new_respellings, ', ')
		:remove_wide_space()
		:gsub('[%+%-].', initial_voicing)
		:gsub('([ခဂငဒပဝ]ေ?)ာ', '%1ါ')
end

function export.make(frame)
	local args = frame:getParent().args

	local page_title = mw.title.getCurrentTitle().text
	local title = generate_respelling(args.word or page_title)
	if not args[1] then args = { title } end

	local p = {
		phonetic = {},
		orthographic = {},
	}
	for index, item in ipairs(args) do
		if item ~= ''  then
			table.insert(p.phonetic, generate_respelling(item))
		end
	end
	
	local pronunciations = {}
	for system_index, system in ipairs(system_list) do
		for _, word in ipairs(p[system.type]) do
		 	pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index)
		end
	end
	
	local result = {
--		orthographic	= { title },
--		phonetic		= {}
	}
	if title ~= table.concat(args) then
		table.insert(result,
			'* Phonetic respelling' .. (#p.phonetic > 1 and 's' or '') ..
			': ' ..
			tostring(mw.html.create('span')
				:attr('lang', 'my')
				:attr('class', 'Mymr')
				:wikitext(
					respelling_format(p.phonetic, page_title)
				)) .. '\n'
		)
	end
	table.insert(result,
		'* [[Wiktionary:International Phonetic Alphabet|IPA]]' ..
		tostring(mw.html.create('sup'):wikitext(
			'([[Appendix:Burmese pronunciation|key]])'
		))
		.. ': ' ..
		tostring(mw.html.create('span')
			:attr('class', 'IPA')
			:wikitext(
				'/' ..
				gsub(table.concat(pronunciations[1], '/, /'), 'ʔʔ', 'ʔ.ʔ') ..
				'/'
		)) ..
		'\n* [[Wiktionary:Burmese transliteration|Romanization:]] '
	)
	for system_index = 2, 5 do
		table.insert(result,
			'<em>' .. system_list[system_index].name .. ':</em> ' ..
			table.concat(pronunciations[system_index], '/')
		)
	end
	return table.concat(result, ' • ')
end

return export