Module:User:Verdy p/my-pron
Appearance
- The following documentation is located at Module:User:Verdy p/my-pron/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Burmese pronunciation and romanisation module. See {{my-IPA}}
.
Testcases
[edit]See Module:User:Verdy p/my-pron.
local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match
local system_list = {
{ --[[1,]] name = 'IPA', type = 'phonetic', },
{ --[[2,]] name = 'MLCTS', type = 'orthographic', },
{ --[[3,]] name = 'ALA-LC', type = 'orthographic', },
{ --[[4,]] name = 'BGN/PCGN', type = 'phonetic', },
{ --[[5,]] name = 'Okell', type = 'phonetic', },
}
-- This sort order is assumed in both export.generate_tests and export.make functions, so do it only once
-- but actually the table is already sorted by its first element which is also its index position,
-- so this is not even needed (and storing the system_index in table is not even needed, so it was commented out):
--table.sort(system_list, function(a, b) return a[1] < b[1] end)
--[[
Helper tables for phonetic transcriptions or orthographic romanisations:
- initial_voicing: from Myanmar initial consonnants to de/voiced Myanmar consonnants
- initial_table: from Myanmar consonnants to phonetic/orthographic initial clusters
- nucleus_table: from Myanmar central vowels to phonetic/orthographic central vowels
- final_table: from Myanmar vowel+trailers to phonetic/orthographic final clusters
- indep_letter_table: from Myanmar independant vowel to phonetic/orthographic vowels with diacritics or syllables
- tone_table: from Myanmar preposed tone diacritics to phonetic/orthographic diacritics or modifiers
- ambig_intersyl: pairs of letters that must be hyphen-separated in orthographic romanizations to preserve Myanmar syllables
]]
local initial_voicing = {
['+က'] = 'ဂ', ['+ခ'] = 'ဂ',
['+စ'] = 'ဇ', ['+ဆ'] = 'ဇ',
['+ဋ'] = 'ဍ', ['+ဌ'] = 'ဍ',
['+တ'] = 'ဒ', ['+ထ'] = 'ဒ',
['+ပ'] = 'ဗ', ['+ဖ'] = 'ဗ',
['-ဘ'] = 'ဖ',
}
local initial_table = {
--[[ PA MLCTS ALA-LC BGN/PCGN Okell ]]
['က'] = { 'k', 'k', 'k', 'k', 'k' },
['ကျ'] = { 't͡ɕ', 'ky', 'ky', 'ky', 'c' },
['ကြ'] = { 't͡ɕ', 'kr', 'kr', 'ky', 'c' },
['ကျွ'] = { 't͡ɕw', 'kyw', 'kyv', 'kyw', 'cw' },
['ကြွ'] = { 't͡ɕw', 'krw', 'krv', 'kyw', 'cw' },
['ကွ'] = { 'kw', 'kw', 'kv', 'kw', 'kw' },
['ခ'] = { 'kʰ', 'hk', 'kh', 'hk', 'hk' },
['ချ'] = { 't͡ɕʰ', 'hky', 'khy', 'ch', 'hc' },
['ခြ'] = { 't͡ɕʰ', 'hkr', 'khr', 'ch', 'hc' },
['ချွ'] = { 't͡ɕʰw', 'hkyw', 'khyv', 'chw', 'hcw' },
['ခြွ'] = { 't͡ɕʰw', 'hkrw', 'khrv', 'chw', 'hcw' },
['ခွ'] = { 'kʰw', 'hkw', 'khv', 'hkw', 'hkw' },
['ဂ'] = { 'ɡ', 'g', 'g', 'g', 'g' },
['ဂျ'] = { 'd͡ʑ', 'gy', 'gy', 'gy', 'j' },
['ဂြ'] = { 'd͡ʑ', 'gr', 'gr', 'gy', 'j' },
['ဂျွ'] = { 'd͡ʑw', 'gyw', 'gyv', 'gyw', 'jw' },
['ဂွ'] = { 'ɡw', 'gw', 'gv', 'gw', 'gw' },
['ဃ'] = { 'ɡ', 'gh', 'gh', 'g', 'g' },
['င'] = { 'ŋ', 'ng', 'ṅ', 'ng', 'ng' },
['ငှ'] = { 'ŋ̊', 'hng', 'ṅh', 'hng', 'hng' },
['ငြ'] = { 'ɲ', 'ngr', 'ṅr', 'ny', 'ny' },
['ငြှ'] = { 'ɲ̊', 'hngr', 'ṅrh', 'hny', 'hny' },
['ငွ'] = { 'ŋw', 'ngw', 'ṅv', 'ngw', 'ngw' },
['ငွှ'] = { 'ŋ̊w', 'hngw', 'ṅvh', 'hngw', 'hngw' },
['စ'] = { 's', 'c', 'c', 's', 's' },
['စွ'] = { 'sw', 'cw', 'cv', 'sw', 'sw' },
['ဆ'] = { 'sʰ', 'hc', 'ch', 'hs', 'hs' },
['ဆွ'] = { 'sʰw', 'hcw', 'chv', 'hsw', 'hsw' },
['ဇ'] = { 'z', 'j', 'j', 'z', 'z' },
['ဇွ'] = { 'zw', 'jw', 'jv', 'zw', 'zw' },
['ဈ'] = { 'z', 'jh', 'jh', 'z', 'z' },
['ဉ'] = { 'ɲ', 'ny', 'ñ', 'ny', 'ny' },
['ည'] = { 'ɲ', 'ny', 'ññ', 'ny', 'ny' },
['ဉှ'] = { 'ɲ̊', 'hny', 'ñh', 'hny', 'hny' },
['ညှ'] = { 'ɲ̊', 'hny', 'ññh', 'hny', 'hny' },
['ညွ'] = { 'ɲw', 'nyw', 'ñv', 'nyw', 'nyw' },
['ညွှ'] = { 'ɲ̊w', 'hnyw', 'ñvh', 'hnyw', 'hnyw' },
['ဋ'] = { 't', 't', 'ṭ', 't', 't' },
['ဌ'] = { 'tʰ', 'ht', 'ṭh', 'ht', 'ht' },
['ဍ'] = { 'd', 'd', 'ḍ', 'd', 'd' },
['ဎ'] = { 'd', 'dh', 'ḍh', 'd', 'd' },
['ဏ'] = { 'n', 'n', 'ṇ', 'n', 'n' },
['ဏှ'] = { 'n̥', 'hn', 'ṇh', 'hn', 'hn' },
['တ'] = { 't', 't', 't', 't', 't' },
['တျ'] = { 'tj', 'ty', 'ty', 'ty', 'ty' },
['တြ'] = { 'tɹ', 'tr', 'tr', 'tr', 'tr' },
['တွ'] = { 'tw', 'tw', 'tv', 'tw', 'tw' },
['ထ'] = { 'tʰ', 'ht', 'th', 'ht', 'ht' },
['ထွ'] = { 'tʰw', 'htw', 'thv', 'htw', 'htw' },
['ဒ'] = { 'd', 'd', 'd', 'd', 'd' },
['ဒျ'] = { 'dj', 'dy', 'dy', 'dy', 'dy' },
['ဒြ'] = { 'dɹ', 'dr', 'dr', 'dr', 'dr' },
['ဒွ'] = { 'dw', 'dw', 'dv', 'dw', 'dw' },
['ဓ'] = { 'd', 'dh', 'dh', 'd', 'd' },
['န'] = { 'n', 'n', 'n', 'n', 'n' },
['နှ'] = { 'n̥', 'hn', 'nh', 'hn', 'hn' },
['နွ'] = { 'nw', 'nw', 'nv', 'nw', 'nw' },
['နွှ'] = { 'n̥w', 'hnw', 'nvh', 'hnw', 'hnw' },
['ပ'] = { 'p', 'p', 'p', 'p', 'p' },
['ပျ'] = { 'pj', 'py', 'py', 'py', 'py' },
['ပြ'] = { 'pj', 'pr', 'pr', 'py', 'py' },
['ပြွ'] = { 'pw', 'prw', 'prv', 'pw', 'pw' },
['ပွ'] = { 'pw', 'pw', 'pv', 'pw', 'pw' },
['ဖ'] = { 'pʰ', 'hp', 'ph', 'hp', 'hp' },
['ဖျ'] = { 'pʰj', 'hpy', 'phy', 'hpy', 'hpy' },
['ဖြ'] = { 'pʰj', 'hpr', 'phr', 'hpy', 'hpy' },
['ဖွ'] = { 'pʰw', 'hpw', 'phv', 'hpw', 'hpw' },
['ဗ'] = { 'b', 'b', 'b', 'b', 'b' },
['ဗျ'] = { 'bj', 'by', 'by', 'by', 'by' },
['ဗြ'] = { 'bj', 'br', 'br', 'by', 'by' },
['ဗွ'] = { 'bw', 'bw', 'bv', 'bw', 'bw' },
['ဘ'] = { 'b', 'bh', 'bh', 'b', 'b' },
['-ဘ'] = { 'pʰ', 'bh', 'bh', 'hp', 'hp' },
['ဘွ'] = { 'bw', 'bhw', 'bhv', 'bw', 'bw' },
['-ဘွ'] = { 'pʰw', 'bhw', 'bhw', 'hpw', 'hpw' },
['မ'] = { 'm', 'm', 'm', 'm', 'm' },
['မှ'] = { 'm̥', 'hm', 'mh', 'hm', 'hm' },
['မျ'] = { 'mj', 'my', 'my', 'my', 'my' },
['မျှ'] = { 'm̥j', 'hmy', 'myh', 'hmy', 'hmy' },
['မြ'] = { 'mj', 'mr', 'mr', 'my', 'my' },
['မြှ'] = { 'm̥j', 'hmr', 'mrh', 'hmy', 'hmy' },
['မြွ'] = { 'mjw', 'mrw', 'mrv', 'myw', 'myw' },
['မြွှ'] = { 'm̥w', 'hmrw', 'mrvh', 'hmw', 'hmw' },
['မွ'] = { 'mw', 'mw', 'mv', 'mw', 'mw' },
['မွှ'] = { 'm̥w', 'hmw', 'mvh', 'hmw', 'hmw' },
['ယ'] = { 'j', 'y', 'y', 'y', 'y' },
['ယှ'] = { 'ʃ', 'hy', 'yh', 'sh', 'hy' },
['သျှ'] = { 'ʃ', 'hsy', 'syh', 'sh', 'hy' },
['ယွ'] = { 'jw', 'yw', 'yv', 'yw', 'yw' },
['ရ'] = { 'j', 'r', 'r', 'y', 'y' },
['*ရ'] = { 'ɹ', 'r', 'r', 'r', 'r' },
['ရှ'] = { 'ʃ', 'hr', 'rh', 'sh', 'hy' },
['ရွ'] = { 'jw', 'rw', 'rv', 'yw', 'yw' },
['ရွှ'] = { 'ʃw', 'hrw', 'rvh', 'shw', 'hyw' },
['လ'] = { 'l', 'l', 'l', 'l', 'l' },
['လှ'] = { 'l̥', 'hl', 'lh', 'hl', 'hl' },
['လျ'] = { 'j', 'ly', 'ly', 'y', 'y' },
['+သျှ'] = { 'j', 'hsy', 'syh', 'y', 'y' },
['*လျ'] = { 'lj', 'ly', 'ly', 'ly', 'ly' },
['လျှ'] = { 'ʃ', 'hly', 'lyh', 'sh', 'hy' },
['*လျှ'] = { 'l̥j', 'hly', 'lyh', 'hly', 'hly' },
['လွ'] = { 'lw', 'lw', 'lv', 'lw', 'lw' },
['လွှ'] = { 'l̥w', 'hlw', 'lvh', 'hlw', 'hlw' },
['ဝ'] = { 'w', 'w', 'v', 'w', 'w' },
['ဝှ'] = { 'ʍ', 'hw', 'vh', 'hw', 'hw' },
['သ'] = { 'θ', 's', 's', 'th', 'th' },
['+သ'] = { 'ð', 's', 's', 'dh', 'th' },
['သွ'] = { 'θw', 'sw', 'sv', 'thw', 'thw' },
['+သွ'] = { 'ðw', 'sw', 'sw', 'dhw', 'thw' },
['ဟ'] = { 'h', 'h', 'h', 'h', 'h' },
['ဟွ'] = { 'hw', 'hw', 'hv', 'hw', 'hw' },
['ဠ'] = { 'l', 'l', 'ḷ', 'l', 'l' },
['အ'] = { 'ʔ', '', '’', '', '' },
['ဿ'] = { nil, 'ss', 'ss', nil, nil },
[''] = { 'ʔ', '', '', '', '' },
['-'] = { '', '', '', '', '' },
['ျ'] = { nil, 'y', 'y', nil, nil },
['ြ'] = { nil, 'r', 'r', nil, nil },
['ွ'] = { nil, 'w', 'w', nil, nil },
}
local final_table = {
--[[ IPA MLCTS ALA-LC BGN/PCGN Okell ]]
[''] = { 'a̰', 'a.', 'a', 'a.', 'á' },
['က်'] = { 'ɛʔ', 'ak', 'ak‘', 'et', 'eʔ' },
['င်'] = { 'ɪ̀ɴ', 'ang', 'aṅ‘', 'in', 'iñ' },
['စ်'] = { 'ɪʔ', 'ac', 'ac‘', 'it', 'iʔ' },
['ည်'] = { 'ì', 'any', 'aññ‘', 'i', 'i' },
['ည်2'] = { 'è', 'any', 'aññ‘', 'e', 'ei' },
['ည်3'] = { 'ɛ̀', 'any', 'aññ‘', 'è', 'e' },
['ဉ်'] = { 'ɪ̀ɴ', 'any', 'añ‘', 'in', 'iñ' },
['တ်'] = { 'aʔ', 'at', 'at‘', 'at', 'aʔ' },
['န်'] = { 'àɴ', 'an', 'an‘', 'an', 'añ' },
['ပ်'] = { 'aʔ', 'ap', 'ap‘', 'at', 'aʔ' },
['မ်'] = { 'àɴ', 'am', 'am‘', 'an', 'añ' },
['ယ်'] = { 'ɛ̀', 'ai', 'ay‘', 'è', 'e' },
['ံ'] = { 'àɴ', 'am', 'aṃ', 'an', 'añ' },
['ာ'] = { 'à', 'a', 'ā', 'a', 'a' },
['ါ'] = { 'à', 'a', 'ā', 'a', 'a' },
['ိ'] = { 'ḭ', 'i.', 'i', 'i.', 'í' },
['ိတ်'] = { 'eɪʔ', 'it', 'it‘', 'eik', 'eiʔ' },
['ိန်'] = { 'èɪɴ', 'in', 'in‘', 'ein', 'eiñ' },
['ိပ်'] = { 'eɪʔ', 'ip', 'ip‘', 'eik', 'eiʔ' },
['ိမ်'] = { 'èɪɴ', 'im', 'im‘', 'ein', 'eiñ' },
['ိံ'] = { 'èɪɴ', 'im', 'iṃ', 'ein', 'eiñ' },
['ီ'] = { 'ì', 'i', 'ī', 'i', 'i' },
['ု'] = { 'ṵ', 'u.', 'u', 'u.', 'ú' },
['ုတ်'] = { 'oʊʔ', 'ut', 'ut‘', 'ok', 'ouʔ' },
['ုန်'] = { 'òʊɴ', 'un', 'un‘', 'on', 'ouñ' },
['ုပ်'] = { 'oʊʔ', 'up', 'up‘', 'ok', 'ouʔ' },
['ုမ်'] = { 'òʊɴ', 'um', 'um‘', 'on', 'ouñ' },
['ုံ'] = { 'òʊɴ', 'um', 'uṃ', 'on', 'ouñ' },
['ူ'] = { 'ù', 'u', 'ū', 'u', 'u' },
['ေ'] = { 'è', 'e', 'e', 'e', 'ei' },
['ဲ'] = { 'ɛ́', 'ai:', 'ai', 'è:', 'è' },
['ော'] = { 'ɔ́', 'au:', 'o', 'aw:', 'ò' },
['ောက်'] = { 'aʊʔ', 'auk', 'ok‘', 'auk', 'auʔ' },
['ောင်'] = { 'àʊɴ', 'aung', 'oṅ‘', 'aung', 'auñ' },
['ော်'] = { 'ɔ̀', 'au', 'o‘', 'aw', 'o' },
['ို'] = { 'ò', 'ui', 'ui', 'o', 'ou' },
['ိုက်'] = { 'aɪʔ', 'uik', 'uik‘', 'aik', 'aiʔ' },
['ိုင်'] = { 'àɪɴ', 'uing', 'uiṅ‘', 'aing', 'aiñ' },
['ွတ်'] = { 'ʊʔ', 'wat', 'vat‘', 'ut', 'uʔ' },
['ွန်'] = { 'ʊ̀ɴ', 'wan', 'van‘', 'un', 'uñ' },
['ွပ်'] = { 'ʊʔ', 'wap', 'vap‘', 'ut', 'uʔ' },
['ွမ်'] = { 'ʊ̀ɴ', 'wam', 'vam‘', 'un', 'uñ' },
['ွံ'] = { 'ʊ̀ɴ', 'wam', 'vaṃ', 'un', 'uñ' },
["'"] = { 'ə', 'a', 'a', 'ă', 'ă' },
['်'] = { '', '', '‘', '', '' },
}
local nucleus_table = {
--[[ IPA MLCTS ALA-LC BGN/PCGN Okell ]]
[''] = { 'à', 'a', 'a', 'a', 'a' },
['ိ'] = { 'ì', 'i', 'i', 'i', 'i' },
['ု'] = { 'ù', 'u', 'u', 'u', 'u' },
['ော'] = { 'ɔ̀', 'au', 'o', 'aw', 'o' },
['ေါ'] = { 'ɔ̀', 'au', 'o', 'aw', 'o' },
['ွ'] = { 'ʊ̀', 'wa', 'va', 'u', 'u' },
}
local indep_letter_table = {
--[[ IPA MLCTS ALA-LC BGN/PCGN Okell ]]
['ဣ'] = { 'ḭ', 'i.', 'i', 'i.', 'í' },
['ဤ'] = { 'ì', 'i', 'ī', 'i', 'i' },
['ဥ'] = { 'ṵ', 'u.', 'u', 'u.', 'ú' },
['ဦ'] = { 'ù', 'u', 'ū', 'u', 'u' },
['ဧ'] = { 'è', 'e', 'e', 'e', 'ei' },
['၏'] = { 'ɛ̰', 'e', 'e*', 'è.', 'é' },
['ဩ'] = { 'ɔ́', 'au:', 'o', 'aw:', 'ò' },
['ဪ'] = { 'ɔ̀', 'au', 'o‘', 'aw', 'o' },
['၌'] = { 'n̥aɪʔ', 'hnai.', 'n*', 'hnaik', 'hnaiʔ' },
['၍'] = { 'jwḛ', 'rwe', 'r*', 'ywe.', 'yweí' },
}
local tone_table = {
--[[ IPA MLCTS ALA-LC BGN/PCGN Okell ]]
['း'] = { '́', ':', '″', ':', '̀' },
['့'] = { '̰', '.', '′', '.', '́' },
}
local ambig_intersyl = {
[1] = {}, --[[IPA phonetic]]
[2] = { --[[MLCTS orthographic]]
ky = 1, kr = 1, kw = 1,
gy = 1, gr = 1, gw = 1,
ny = 1, ng = 1,
cw = 1, tw = 1, nw = 1,
py = 1, pr = 1, pw = 1,
my = 1, mr = 1, mw = 1,
},
[3] = {}, --[[ALA-LC orthographic]]
[4] = { --[[BGN/PCGN phonetic]]
ky = 1, kr = 1, kw = 1,
gy = 1, gr = 1, gw = 1,
ny = 1, ng = 1,
cw = 1, tw = 1, nw = 1,
tr = 1, tw = 1,
py = 1, pr = 1, pw = 1,
my = 1, mr = 1, mw = 1,
},
[5] = { --[[Okonn phonetic]]
ou = 1,
},
}
--[[
Helper tables for reverse transcriptions from romanisations to Myanmar script:
Used to exhibit cases where phonetic transcriptions or romanizations are not reversible.
The following is made for the BGN/PCGN romanization.
]]
local reverse_table = {
k = 'က', hk = 'ခ', g = 'ဂ',
kw = 'ကွ', hkw = 'ခွ', gw = 'ဂွ',
c = 'ကျ', hc = 'ချ', j = 'ဂျ',
cw = 'ကျွ', hcw = 'ချွ', jw = 'ဂျွ',
ng = 'င', hng = 'ငှ',
ngw = 'ငွ', hngw = 'ငွှ',
s = 'စ', hs = 'ဆ', z = 'ဇ',
sw = 'စွ', hsw = 'ဆွ', zw = 'ဇွ',
ny = 'ည', hny = 'ညှ',
nyw = 'ညွ', hnyw = 'ညွှ',
t = 'တ', ht = 'ထ', d = 'ဒ',
tw = 'တွ', htw = 'ထွ', dw = 'ဒွ',
n = 'န', hn = 'နှ',
nw = 'နွ', hnw = 'နွှ',
p = 'ပ', hp = 'ဖ', b = 'ဗ',
py = 'ပျ', hpy = 'ဖျ', by = 'ဗျ',
pw = 'ပွ', hpw = 'ဖွ', bw = 'ဗွ',
m = 'မ', hm = 'မှ',
my = 'မျ', hmy = 'မျှ',
mw = 'မွ', hmw = 'မွှ',
y = 'ယ', hy = 'ရှ',
yw = 'ယွ', hyw = 'ရွှ',
r = '*ရ',
th = 'သ', ['+th'] = '+သ',
thw = 'သွ',
l = 'လ', hl = 'လှ',
ly = '*လျ', hly = '*လျှ',
lw = 'လွ', hlw = 'လွှ',
w = 'ဝ', hw = 'ဝှ', --[[FIXME: duplicate mapping, which one is correct?]]
h = 'ဟ', hw = 'ဟွ', --[[FIXME: duplicate mapping, which one is correct?]]
[''] = 'အ',
a = 'ာ', ['a\\'] = 'ား', ['a/'] = '', ['a?'] = 'တ်',
['a~'] = 'န်', ['a\\~'] = 'န်း', ['a/~'] = 'န့်',
i = 'ီ', ['i\\'] = 'ီး', ['i/'] = 'ိ', ['i?'] = 'စ်',
['i~'] = 'င်', ['i\\~'] = 'င်း', ['i/~'] = 'င့်',
ei = 'ေ', ['ei\\'] = 'ေး', ['ei/'] = 'ေ့', ['ei?'] = 'ိတ်',
['ei~'] = 'ိန်', ['ei\\~'] = 'ိန်း', ['ei/~'] = 'ိန့်',
e = 'ယ်', ['e\\'] = 'ဲ', ['e/'] = 'ယ့်', ['e?'] = 'က်',
['ai~'] = 'ိုင်', ['ai\\~'] = 'ိုင်း', ['ai/~'] = 'ိုင့်', ['ai?'] = 'ိုက်',
o = 'ော်', ['o\\'] = 'ော', ['o/'] = 'ော့', ['au?'] = 'ောက်',
['au~'] = 'ောင်', ['au\\~'] = 'ောင်း', ['au/~'] = 'ောင့်',
ou = 'ို', ['ou\\'] = 'ိုး', ['ou/'] = 'ို့', ['ou?'] = 'ုပ်',
['ou~'] = 'ုန်', ['ou\\~'] = 'ုန်း', ['ou/~'] = 'ုန့်',
u = 'ူ', ['u\\'] = 'ူး', ['u/'] = 'ု', ['u?'] = 'ွတ်',
['u~'] = 'ွန်', ['u\\~'] = 'ွန်း', ['u/~'] = 'ွန့်',
["a'"] = "'",
}
function syllabify(text)
text = text
:gsub("('?)([%+%-%*]*)", function(a, b)
if a .. b ~= '' then return a .. ' ' .. b end
end)
:gsub('([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)', function(a, b, c)
return
c == '္' and ' ' .. a .. b .. ' ' .. c or
c == '်' and ' ' .. a .. b .. c or
' ' .. a .. ' ' .. b .. c
end) .. ' '
text = text
:gsub("(်း?'?)", '%1 ')
:gsub('([း့])([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ]်)', '%2%1')
local breaker = '([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဿ][့]?[^့်္])'
while match(text, breaker) do
text = text:gsub(breaker, '%1 %2')
end
text = text
:gsub('္', ' , ')
:gsub(' +', ' ' )
:gsub('^ ?(.*[^ ]) ?$', '%1' )
:gsub(' , ', ' ' )
:gsub(' ([23])', '%1' )
return text
end
function initial_by_char(initial_string, system_index, ref_table)
local initial_set = {}
for character in mw.text.gsplit(initial_string, '') do
local temp_initial = ref_table[character] or error('Initial data not found.')
table.insert(initial_set, temp_initial[system_index] or temp_initial)
end
return table.concat(initial_set)
end
function process(initial, final, tone, schwa, system, system_index)
if system.type == 'phonetic' and match(initial .. final, 'ွှ?[တနပမံ]') then
initial = gsub(initial, '[ွ/]', '')
final = 'ွ' .. final
else
initial = gsub(initial, '/', '')
end
local initial_new = system.type == 'phonetic' and gsub(initial, '%+.', initial_voicing) or initial
if indep_letter_table[initial_new] then
initial_new = match(initial_new, '[၌၍]') and '-' or ''
final = initial .. final
end
if initial_new == 'မြွ' then require('Module:debug').track('my-pron/mrw') end
local initial_data =
initial_table[initial_new]
or initial_table[gsub(initial_new, '[%+%-%*]', '')]
or system.type == 'orthographic' and initial_by_char(initial_new, system_index, initial_table)
or error('Initial data not found.')
local initial_value =
initial_data[system_index]
or initial_data
if match(initial, '^%+') and system_index == 5 then
initial_value = initial_table[gsub(initial, '%+', '')][system_index]
initial_value = gsub(initial_value, '^([^rwy]+)', '<u>%1</u>')
end
local final_value =
final_table[system.type .. schwa == "phonetic'" and schwa or final]
or system.type == 'phonetic' and
final_table[final .. '်']
or indep_letter_table[final]
or gsub(final, '^([^်]*)([^်])(်?)$', function(first, second, third)
first_data =
nucleus_table[first]
or final_table[first]
or indep_letter_table[first]
or first
second_data =
initial_table[second]
or second
first =
first_data ~= first and first_data[system_index]
or first
second = second_data ~= second
and second_data[system_index] .. ((system_index == 3 and third ~= '') and '‘' or '')
or second
return (gsub(first .. second, '([%.:])(.*)', '%2'))
end)
final_value = mw.ustring.toNFD(
type(final_value) == 'table' and final_value[system_index] or final_value
)
if tone == '' then
tone_value = ''
else
if system_index ~= 4 then final_value = gsub(final_value, '̀', '') end
final_value = gsub(final_value, '[́:%.]', '')
if system.type .. schwa == "phonetic'" then
tone_value = ''
else
tone_data = tone_table[tone] or error('Tone data not found.')
tone_value = tone_data[system_index]
end
end
if system_index == 1 then
final_value = gsub(final_value, '^([aeəɛiɪoɔuʊ])', '%1' .. tone_value)
elseif system_index == 5 then
final_value = gsub(final_value, '([aeiou])([^aeiou]*)$', '%1' .. tone_value .. '%2')
else
final_value = final_value .. tone_value
end
return mw.ustring.toNFC(initial_value .. final_value)
end
function remove_wide_space(text)
return gsub(text, ' ', '')
end
function concatenate(set, system_index)
if system_index == 1 then
return remove_wide_space(table.concat(set))
else
local result = remove_wide_space(table.concat(set, ' '))
for count = 1, 3 do
result = gsub(result, '(.) (.)([^ ]?)',
function(previous, next, after_next)
if ambig_intersyl[system_index][previous .. next]
or (system_index == 2 or system_index == 4)
and ( match(previous .. ' ' .. next, '[ptkgmngy] [aeiou]')
or match(previous .. next .. after_next, '[aeiou][ptkmn][rwyg]')
and not match(after_next, '[aeiou]')
)
then
return previous .. '-' .. next .. after_next
else
return previous .. next .. after_next
end
end)
end
return result
end
end
function export.get_romanisation(word, pronunciations, system, system_index, mode)
word = syllabify(word:gsub(' ', '|')):gsub('ါ', 'ာ')
if system.type == 'phonetic' then
word = word:gsub('ဝ([တနပမံ])', 'ဝွ%1')
end
local sentences = {}
for phrase in mw.text.gsplit(word, '|', true) do
local temp, syllable = {}, mw.text.split(phrase, ' ', true)
for syllable_index = 1, #syllable do
syllable[syllable_index] = gsub(syllable[syllable_index], '([း့])(်)', '%2%1')
temp[syllable_index] = gsub(
syllable[syllable_index],
"^([%+%-%*]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍ဿ][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",
function(initial, final, tone, schwa)
return process(initial, final, tone, schwa, system, system_index)
end)
end
table.insert(sentences, concatenate(temp, system_index))
end
sentences = table.concat(sentences, ' ')
if mode ~= 'translit_module' then
table.insert(pronunciations[system_index], sentences)
end
return sentences
end
function generate_respelling(text)
text = text:gsub(' ', ' '):gsub('ါ', 'ာ')
if match(text, '[က-႟ꩠ-ꩻ]') then return text end
text = text
:gsub("(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)",
function(voicing_mark, latin_initial, opt_sep, latin_final)
return
voicing_mark ..
( reverse_table[latin_initial] or
initial_by_char(latin_initial, nil, reverse_table)
) ..
opt_sep ..
reverse_table[latin_final]
end)
return text
end
function export.generate_tests(word, respelling)
word = word:generate_respelling()
local p = {
orthographic = word,
phonetic = respelling:generate_respelling() or word
}
local result = {}
for system_index, system in ipairs(system_list) do
local pronunciations = export.get_romanisation(p[system.type], pronunciations, system, system_index)
table.insert(result, table.concat(pronunciations))
end
return table.concat(result, ' | '):gsub('<u>', '('):gsub('</u>', ')')
end
function respelling_format(phonetic, page_title)
local page_title_set = mw.text.split(syllabify(page_title), ' ')
local new_respellings = {}
for _, respelling in ipairs(phonetic) do
local respelling_set = mw.text.split(syllabify(respelling), ' ')
if table.concat(respelling_set):gsub("[%+%-%*']", '') == table.concat(page_title_set):gsub('ါ', 'ာ') then
for index, element in ipairs(respelling_set) do
if element ~= page_title_set[index] then
respelling_set[index] = tostring(mw.html.create('b'):attr('style', 'font-size:110%;color:#A32214'):wikitext(element))
end
end
end
table.insert(new_respellings, table.concat(respelling_set))
end
return table.concat(new_respellings, ', ')
:remove_wide_space()
:gsub('[%+%-].', initial_voicing)
:gsub('([ခဂငဒပဝ]ေ?)ာ', '%1ါ')
end
function export.make(frame)
local args = frame:getParent().args
local page_title = mw.title.getCurrentTitle().text
local title = generate_respelling(args.word or page_title)
if not args[1] then args = { title } end
local p = {
phonetic = {},
orthographic = {},
}
for index, item in ipairs(args) do
if item ~= '' then
table.insert(p.phonetic, generate_respelling(item))
end
end
local pronunciations = {}
for system_index, system in ipairs(system_list) do
for _, word in ipairs(p[system.type]) do
pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index)
end
end
local result = {
-- orthographic = { title },
-- phonetic = {}
}
if title ~= table.concat(args) then
table.insert(result,
'* Phonetic respelling' .. (#p.phonetic > 1 and 's' or '') ..
': ' ..
tostring(mw.html.create('span')
:attr('lang', 'my')
:attr('class', 'Mymr')
:wikitext(
respelling_format(p.phonetic, page_title)
)) .. '\n'
)
end
table.insert(result,
'* [[Wiktionary:International Phonetic Alphabet|IPA]]' ..
tostring(mw.html.create('sup'):wikitext(
'([[Appendix:Burmese pronunciation|key]])'
))
.. ': ' ..
tostring(mw.html.create('span')
:attr('class', 'IPA')
:wikitext(
'/' ..
gsub(table.concat(pronunciations[1], '/, /'), 'ʔʔ', 'ʔ.ʔ') ..
'/'
)) ..
'\n* [[Wiktionary:Burmese transliteration|Romanization:]] '
)
for system_index = 2, 5 do
table.insert(result,
'<em>' .. system_list[system_index].name .. ':</em> ' ..
table.concat(pronunciations[system_index], '/')
)
end
return table.concat(result, ' • ')
end
return export