Module:mnw-translit
Appearance
- The following documentation is located at Module:mnw-translit/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Mark-up
[edit]Traditionally, transliteration of Mon distinguishes the various reading of anusvara. By immediately following it by one of the circled letters Ⓐ, Ⓗ, Ⓞ and Ⓜ, the meaning can be selected as follows:
Combination | Interpretation | Mnemonic |
---|---|---|
ံⒶ | Equivalent to အ် | အ is the independent vowel <a>. |
ံⒽ | Equivalent to ဟ် | ဟ is the letter <h> |
ံⓄ | The vowel is not affected by the coda being a velar consonant | The resulting vowel sound (in the clear register) is /ɔ/ |
ံⓂ | The anusvara represents a final /m/. This is the default interpretation. | /m/ |
This mark-up can be used in the citation and usage example template {{mnw-quote}}
.
In all environments, word boundaries in Burmese script text can be marked up using the HTML tag <wbr>. These tags will be converted to single spaces as part of transliteration. Note that only all lowercase tags will be recognised for conversion. This may be useful in some obscure circumstances.
local export = {}
local gsub = mw.ustring.gsub
local u = require("Module:string/char")
local letter_with_mark = "(.["..u(0x0300).."-"..u(0x036F).."]?)"
local pre = {
["ျ"] = "္ယ", ["ြ"] = "္ရ", ["ွ"] = "္ဝ", ["ှ"] = "္ဟ",
["ၞ"] = "္န", ["ၟ"] = "္မ", ["ၠ"] = "္လ",
}
local tt1 = {
-- consonants ; Unicode doesn't have exclusive great nya, that looks like ည with another curve, so use ည္ည as it should be.
["က"] = "kᵃ", ["ခ"] = "khᵃ", ["ဂ"] = "gᵃ", ["ဃ"] = "ghᵃ", ["င"] = "ṅᵃ", ["ၚ"] = "ṅᵃ",
["စ"] = "cᵃ", ["ဆ"] = "chᵃ", ["ဇ"] = "jᵃ", ["ၛ"] = "jhᵃ", ["ဉ"] = "ñᵃ", ["ည"] = "ññᵃ", -- ññ -> ñ later
["ဋ"] = "ṭᵃ", ["ဌ"] = "ṭhᵃ", ["ဍ"] = "ḍᵃ", ["ဎ"] = "ḍhᵃ", ["ဏ"] = "ṇᵃ",
["တ"] = "tᵃ", ["ထ"] = "thᵃ", ["ဒ"] = "dᵃ", ["ဓ"] = "dhᵃ", ["န"] = "nᵃ",
["ပ"] = "pᵃ", ["ဖ"] = "phᵃ", ["ဗ"] = "bᵃ", ["ဘ"] = "bhᵃ", ["မ"] = "mᵃ",
["ယ"] = "yᵃ", ["ရ"] = "rᵃ", ["လ"] = "lᵃ", ["ဝ"] = "wᵃ", ["သ"] = "sᵃ", ["ဿ"] = "ssᵃ",
["ဟ"] = "hᵃ", ["ဠ"] = "ḷᵃ", ["ၜ"] = "ṗᵃ", ["အ"] = "ʼᵃ", ["ၝ"] = "ḅᵃ",
-- independent vowels (1 char)
["ဣ"] = "ʼi", ["ဥ"] = "ʼu",
["ဨ"] = "ʼe", ["ဩ"] = "ʼo",
-- dependent vowels and diacritics (1 char)
["ါ"] = "ā", ["ာ"] = "ā", ["ိ"] = "i", ["ီ"] = "iṃ", ["ဳ"] = "ī", ["ု"] = "u", ["ူ"] = "ū", ["ဲ"] = "ʸ",
["ဴ"] = "ao", ["ေ"] = "e", ["ဵ"] = "e",
["ံ"] = "ṃ", ["း"] = "ḥ", ["္"] = "¡", ["်"] = "¤",
-- punctuation marks
["၊"] = ",", ["။"] = ".",
-- numerals
["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4",
["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9",
-- zero-width space (display it if it hides in a word)
[u(0x200B)] = "‼", [u(0x200C)] = "‼", [u(0x200D)] = "‼",
}
-- ⒶⒽⓄⓂ markup for anusvara.
local ahom = {
["ံⒶ"] = "အ်", ["ံⒽ"] = "ĥ",
["ံⓄ"] = "ံ", -- default action, at least for now.
["ံⓂ"] = "ံ", -- default action
}
local tt2 = {
["ံ[ⒶⒽⓄⓂ]"] = ahom, -- CAUTION: ahom is a table.
-- vowels (2 chars)
["ဣဳ"] = "ʼī", ["ဥု"] = "ʼū",
["ေါ"] = "o", ["ော"] = "o", ["ဵု"] = "iuw",
}
function export.tr(text, lang, sc, debug_mode)
if type(text) == "table" then -- called directly from a template
text = text.args[1]
end
--Punctuation
text = gsub(text, "( +)", u(0xa0, 0xa0).."%1") -- 2 NBSP before spaces to widen them.
text = gsub(text, "<wbr/?>", " ") -- Insert spaces between words.
text = gsub(text, "([ှ])(ေ?ါ?ာ?)([်])(ေ?)",
function(H,b,A,a) return b..a..'h' end)
text = gsub(text, ".", pre)
text = gsub(text, "ဲါ", "ါဲ") -- fixed ay+aa to aa+ay; it often occurs
for k, v in pairs(tt2) do
text = gsub(text, k, v)
end
text = gsub(text, ".", tt1)
text = gsub(text, "([aeiuoāīū])ʸ", "%1y")
text = gsub(text, "ᵃʸ", "oa")
text = gsub(text, "ᵃ([¡¤]+)", "")
text = gsub(text, "([aeiuoāīū])¤", "%1k")
text = gsub(text, "ᵃ([aeiuoāīū])", "%1")
text = gsub(text, "ᵃ", "a")
text = gsub(text, "iṃu", "iuṃ")
if lang == "mnw" then --Modern Mon
text = gsub(text, "ññ", "ñ")
end
return text
end
return export