User:Sbb1413/bn-translit
The following is the personal copy of the code I have used in Bengali transliteration.
-- Transliteration for Bengali
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ",
["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ",
["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ঢ"] = "ḍh", ["ণ"] = "ṇ",
["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n",
["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m",
["য"] = "ĵ", ["র"] = "r", ["ল"] = "l",
["শ"] = "ś", ["ষ"] = "ṣ", ["স"] = "s", ["হ"] = "h",
["য়"] = "ẏ", ["ড়"] = "ṛ", ["ঢ়"] = "ṛh", ["জ়"] = "z",
-- vowel diacritics
["ি"] = "i", ["ু"] = "u",
["ৃ"] = "r̥", ["ে"] = "e", ["ো"] = "ō",
["া"] = "a", ["ী"] = "ī", ["ূ"] = "ū", ["ৈ"] = "ōi", ["ৌ"] = "ōu",
-- archaic vowel diacritics
["ৄ"] = "r̥̄", ["ৢ"] = "l̥", ["ৣ"] = "l̥̄",
-- visarga
["ঃ"] = "ḥ",
-- vowel signs
["অ"] = "o", ["ই"] = "i", ["উ"] = "u",
["ঋ"] = "r̥", ["এ"] = "e", ["ও"] = "ō",
["আ"] = "a", ["ঈ"] = "ī", ["ঊ"] = "ū", ["ঐ"] = "ōi", ["ঔ"] = "ōu",
-- archaic vowel diacritics
["ৠ"] = "r̥̄", ["ঌ"] = "l̥", ["ৡ"] = "l̥̄",
--virama
["্"] = "",
-- chandrabindu
["ঁ"] = "̃",
-- avagraha
['ঽ']='’',
-- anusvara
["ং"] = "ṁ",
-- khandata,
["ৎ"] = "t",
-- numerals
["০"] = "0", ["১"] = "1", ["২"] = "2", ["৩"] = "3", ["৪"] = "4",
["৫"] = "5", ["৬"] = "6", ["৭"] = "7", ["৮"] = "8", ["৯"] = "9",
-- punctuation
["।"] = ".", -- dãri
}
local consonant, vowel, vowel_sign = "ক-হড়-য়", "oা-ৌ’", "অ-ঔ"
local c = "[" .. consonant .. "]"
local cc = "়?" .. c
local v = "[" .. vowel .. vowel_sign .. "]"
local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")"
local function rev_string(text)
local result, length = "", mw.ustring.len(text)
for i = 1, length do
result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1)
end
return result
end
function export.tr(text, lang, sc, mode)
text = gsub(text, "݁", "্অ")
text = gsub(text, "[࣪ܿ]", "্")
text = gsub(text, "ঃ", "্ḥ")
text = gsub(text, "টি" , "্টি")
text = gsub(text, "কার" , "্কার")
text = gsub(text, "খানা" , "্খানা")
text = gsub(text, "ডাল" , "্ডাল")
text = gsub(text, "খানি" , "্খানি")
text = gsub(text, "জন" , "্জন")
text = gsub(text, "সকল" , "্সকল")
text = gsub(text, "কে" , "্কে")
text = gsub(text, "ফল" , "্ফল")
text = gsub(text, "কেই" , "্কেই")
text = gsub(text, "মান" , "্মান")
text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b)
return a .. (b == "" and "o" or b) end)
for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2")
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, "%1%2")
end
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, "([^র])্য", "%1y")
text = gsub(text, "্ব", "v")
text = gsub(text, ".[়’]?", conv)
text = gsub(text, ".", conv)
text = gsub(text, "([bgmr])v", "%1b")
text = gsub(text, "hv", "hb")
text = gsub(text, "kṣ", "ḳh")
text = gsub(text, "jñ", "ġy")
text = gsub(text, "ry", "rĵ")
text = gsub(text, "nḍo$", "nḍ")
text = gsub(text, "([aæeiīoōuū])h$", "%1ho")
text = gsub(text, "([lś])aho$", "%1ah")
text = gsub(text, "^oya", "æ")
text = gsub(text, "^eya", "æ")
text = gsub(text, "ẏo([ln])([aeiīoōuū])", "ẏ%1%2")
text = gsub(text, "oō$", "ō")
text = gsub(text, "([iī])ẏ", "%1ẏo")
if match(text, "[ঁ-৽]") and mode ~= "debug" then
return nil
else
return mw.ustring.toNFC(text)
end
end
return export