Jump to content

Module:mnw-translit

From Wiktionary, the free dictionary

This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

Mark-up

[edit]

Traditionally, transliteration of Mon distinguishes the various reading of anusvara. By immediately following it by one of the circled letters Ⓐ, Ⓗ, Ⓞ and Ⓜ, the meaning can be selected as follows:

Combination Interpretation Mnemonic
ံⒶ Equivalent to အ် is the independent vowel <a>.
ံⒽ Equivalent to ဟ် is the letter <h>
ံⓄ The vowel is not affected by the coda being a velar consonant The resulting vowel sound (in the clear register) is /ɔ/
ံⓂ The anusvara represents a final /m/. This is the default interpretation. /m/

This mark-up can be used in the citation and usage example template {{mnw-quote}}.

In all environments, word boundaries in Burmese script text can be marked up using the HTML tag <wbr>. These tags will be converted to single spaces as part of transliteration. Note that only all lowercase tags will be recognised for conversion. This may be useful in some obscure circumstances.


local export = {}
local gsub = mw.ustring.gsub
local u = require("Module:string/char")
local letter_with_mark = "(.["..u(0x0300).."-"..u(0x036F).."]?)"

local pre = {
	["ျ"] = "္ယ", ["ြ"] = "္ရ", ["ွ"] = "္ဝ", ["ှ"] = "္ဟ",
	["ၞ"] = "္န", ["ၟ"] = "္မ", ["ၠ"] = "္လ",
}

local tt1 = {
	-- consonants ; Unicode doesn't have exclusive great nya, that looks like ည with another curve, so use ည္ည as it should be.
	["က"] = "kᵃ", ["ခ"] = "khᵃ", ["ဂ"] = "gᵃ", ["ဃ"] = "ghᵃ", ["င"] = "ṅᵃ", ["ၚ"] = "ṅᵃ",
	["စ"] = "cᵃ", ["ဆ"] = "chᵃ", ["ဇ"] = "jᵃ", ["ၛ"] = "jhᵃ", ["ဉ"] = "ñᵃ", ["ည"] = "ññᵃ",  -- ññ -> ñ later
	["ဋ"] = "ṭᵃ", ["ဌ"] = "ṭhᵃ", ["ဍ"] = "ḍᵃ", ["ဎ"] = "ḍhᵃ", ["ဏ"] = "ṇᵃ",
	["တ"] = "tᵃ", ["ထ"] = "thᵃ", ["ဒ"] = "dᵃ", ["ဓ"] = "dhᵃ", ["န"] = "nᵃ",
	["ပ"] = "pᵃ", ["ဖ"] = "phᵃ", ["ဗ"] = "bᵃ", ["ဘ"] = "bhᵃ", ["မ"] = "mᵃ",
	["ယ"] = "yᵃ", ["ရ"] = "rᵃ", ["လ"] = "lᵃ", ["ဝ"] = "wᵃ", ["သ"] = "sᵃ", ["ဿ"] = "ssᵃ",
	["ဟ"] = "hᵃ", ["ဠ"] = "ḷᵃ", ["ၜ"] = "ṗᵃ", ["အ"] = "ʼᵃ", ["ၝ"] = "ḅᵃ",
	-- independent vowels (1 char)
	["ဣ"] = "ʼi", ["ဥ"] = "ʼu",
	["ဨ"] = "ʼe", ["ဩ"] = "ʼo",
	-- dependent vowels and diacritics (1 char)
	["ါ"] = "ā", ["ာ"] = "ā", ["ိ"] = "i", ["ီ"] = "iṃ", ["ဳ"] = "ī", ["ု"] = "u", ["ူ"] = "ū", ["ဲ"] = "ʸ",
	["ဴ"] = "ao", ["ေ"] = "e", ["ဵ"] = "e", 
	["ံ"] = "ṃ", ["း"] = "ḥ", ["္"] = "¡", ["်"] = "¤",
	-- punctuation marks
	["၊"] = ",", ["။"] = ".", 
	-- numerals
	["၀"] = "0", ["၁"] = "1", ["၂"] = "2", ["၃"] = "3", ["၄"] = "4",
	["၅"] = "5", ["၆"] = "6", ["၇"] = "7", ["၈"] = "8", ["၉"] = "9",
	-- zero-width space (display it if it hides in a word)
	[u(0x200B)] = "‼", [u(0x200C)] = "‼", [u(0x200D)] = "‼",
}

-- ⒶⒽⓄⓂ markup for anusvara.
local ahom = {
	["ံⒶ"] = "အ်", ["ံⒽ"] = "ĥ",
	["ံⓄ"] = "ံ", -- default action, at least for now.
	["ံⓂ"] = "ံ", -- default action
}
local tt2 = {
	["ံ[ⒶⒽⓄⓂ]"] = ahom, -- CAUTION: ahom is a table.
	-- vowels (2 chars)
	["ဣဳ"] = "ʼī", ["ဥု"] = "ʼū",
	["ေါ"] = "o", ["ော"] = "o", ["ဵု"] = "iuw",
}

function export.tr(text, lang, sc, debug_mode)

	if type(text) == "table" then -- called directly from a template
		text = text.args[1]
	end

--Punctuation
	text = gsub(text, "( +)", u(0xa0, 0xa0).."%1") -- 2 NBSP before spaces to widen them. 
    text = gsub(text, "<wbr/?>", " ")    -- Insert spaces between words.
	text = gsub(text, "([ှ])(ေ?ါ?ာ?)([်])(ေ?)", 
				function(H,b,A,a) return b..a..'h' end)
	text = gsub(text, ".", pre)
	text = gsub(text, "ဲါ", "ါဲ") -- fixed ay+aa to aa+ay; it often occurs

	for k, v in pairs(tt2) do
		text = gsub(text, k, v)
	end

	text = gsub(text, ".", tt1)

	text = gsub(text, "([aeiuoāīū])ʸ", "%1y")
	text = gsub(text, "ᵃʸ", "oa")

	text = gsub(text, "ᵃ([¡¤]+)", "")
	text = gsub(text, "([aeiuoāīū])¤", "%1k")
	text = gsub(text, "ᵃ([aeiuoāīū])", "%1")
	text = gsub(text, "ᵃ", "a")

	text = gsub(text, "iṃu", "iuṃ")
	if lang == "mnw" then --Modern Mon
		text = gsub(text, "ññ", "ñ")
	end

	return text
 
end
 
return export