Module:Deva-Beng-translit
Appearance
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate text in the Devanagari script.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:Deva-Beng-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local export = {}
local twoChars = {
["अॕ"] = "অ্যা", ["एॕ"] = "এ্যা"
}
local oneChar = {
["क"] = "ক", ["ख"] = "খ", ["ग"] = "গ", ["घ"] = "ঘ", ["ङ"] = "ঙ", ["च"] = "চ", ["छ"] = "ছ", ["ज"] = "জ", ["झ"] = "ঝ", ["ञ"] = "ঞ", ["ट"] = "ট", ["ठ"] = "ঠ", ["ड"] = "ড", ["ढ"] = "ঢ", ["ण"] = "ণ", ["त"] = "ত", ["थ"] = "থ", ["द"] = "দ", ["ध"] = "ধ", ["न"] = "ন", ["प"] = "প", ["फ"] = "ফ", ["ब"] = "ব", ["भ"] = "ভ", ["म"] = "ম", ["य"] = "য়", ["र"] = "র", ["ल"] = "ল", ["ळ"] = "ল়", ["व"] = "ব", ["श"] = "শ", ["ष"] = "ষ", ["स"] = "স", ["ह"] = "হ",
["अ"] = "অ", ["आ"] = "আ", ["ॲ"] = "অ্য", ["इ"] = "ই", ["ई"] = "ঈ", ["उ"] = "উ", ["ऊ"] = "ঊ", ["ऋ"] = "ঋ", ["ॠ"] = "ৠ", ["ऌ"] = "ঌ", ["ॡ"] = "ৡ", ["ऍ"] = "এ্য", ["ए"] = "এ", ["ऐ"] = "ঐ", ["ओ"] = "ও", ["औ"] = "ঔ",
["ा"] = "া", ["ि"] = "ি", ["ी"] = "ী", ["ु"] = "ু", ["ू"] = "ূ", ["ृ"] = "ৃ", ["ॄ"] = "ৄ", ["ॢ"] = "ৢ", ["ॣ"] = "ৣ", ["ॅ"] = "্য", ["ॕ"] = "্যা", ["े"] = "ে", ["ै"] = "ৈ", ["ो"] = "ো", ["ौ"] = "ৌ", ["्"] = "্",
["ं"] = "ং", ["ः"] = "ঃ", ["ँ"] = "ঁ", ["़"] = "়", ["ऽ"] = "ঽ", ["॰"] = "৽", ["꣼"] = "ঀ", ["ॐ"] = "ওঁ",
["०"] = "০", ["१"] = "১", ["२"] = "২", ["३"] = "৩", ["४"] = "৪", ["५"] = "৫", ["६"] = "৬", ["७"] = "৭", ["८"] = "৮", ["९"] = "৯"
}
-- Override returns text even if some characters cannot be transliterated.
-- If noKhandaTa is set, then "ৎ" will not be contextually substituted for "ত্", which is suitable (e.g.) for Sanskrit transliteration.
function export.tr(text, lang, sc, override, noKhandaTa)
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
local Beng = require("Module:scripts").getByCode("Beng")
text = mw.ustring.toNFD(text)
for digraph, replacement in pairs(twoChars) do
text = string.gsub(text, digraph, replacement)
end
text = string.gsub(text, "^य", "য")
text = string.gsub(text, "्य", "্য")
text = string.gsub(text, UTF8_char, oneChar)
-- Khanda Ta is not used in Sanskrit.
if not noKhandaTa and lang ~= "sa" then
text = mw.ustring.gsub(text, "ত্([^তথনবমরল])", "ৎ%1")
text = string.gsub(text, "ত্$", "ৎ")
end
text = mw.ustring.toNFC(text)
local reducedText = mw.ustring.gsub(mw.ustring.gsub(text, "<.->", ""), "[%s%p\n]+", "")
if (mw.ustring.len(reducedText) == Beng:countCharacters(reducedText) and not mw.ustring.find(text, "়়")) or override then
return text
else
return nil
end
end
return export