Module:bho-Kthi-translit
Jump to navigation
Jump to search
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list โข links โข transclusions โข testcases โข sandbox
This module will transliterate Bhojpuri language text. It is also used to transliterate Angika, Magahi, Maithili, and Sadri.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:bho-Kthi-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
-- Transliteration for Bhojpuri
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
['๐'] = 'k', ['๐'] = 'kh', ['๐'] = 'g', ['๐'] = 'gh', ['๐'] = 'แน
',
['๐'] = 'c', ['๐'] = 'ch', ['๐'] = 'j', ['๐'] = 'jh', ['๐'] = 'รฑ',
['๐'] = 'แนญ', ['๐'] = 'แนญh', ['๐'] = 'แธ', ['๐'] = 'แธh', ['๐'] = 'แน',
['๐'] = 't', ['๐'] = 'th', ['๐ '] = 'd', ['๐ก'] = 'dh', ['๐ข'] = 'n',
['๐ฃ'] = 'p', ['๐ค'] = 'ph', ['๐ฅ'] = 'b', ['๐ฆ'] = 'bh', ['๐ง'] = 'm',
['๐จ'] = 'y', ['๐ฉ'] = 'r', ['๐ช'] = 'l', ['๐ซ'] = 'v', ['๐ซ'] = 'v', ['เคณ'] = 'แธท',
['๐ฌ'] = 'ล', ['๐ญ'] = 'แนฃ', ['๐ฎ'] = 's', ['๐ฏ'] = 'h',
['๐'] = 'แน', ['๐'] = 'แน', ['๐'] = 'แนh', ['๐'] = 'แนh',
-- ['๐๐น๐'] = 'gy',
-- vowel diacritics
['๐ฑ'] = 'i', ['๐ณ'] = 'u', ['๐ต'] = 'e', ['๐ท'] = 'o',
['๐ฐ'] = 'ฤ', ['๐ฒ'] = 'ฤซ', ['๐ด'] = 'ลซ',
['๐ถ'] = 'ai', ['๐ธ'] = 'au',
-- vowel signs
['๐'] = 'a', ['๐
'] = 'i', ['๐'] = 'u', ['๐'] = 'e', ['๐'] = 'o',
['๐'] = 'ฤ', ['๐'] = 'ฤซ', ['๐'] = 'ลซ',
['๐'] = 'ai', ['๐'] = 'au',
['เฅ'] = 'om',
-- chandrabindu
['๐'] = 'ฬ',
-- anusvara
['๐'] = 'แน',
-- visarga
['๐'] = 'แธฅ',
-- virama
['๐น'] = '',
-- numerals
['เฅฆ'] = '0', ['เฅง'] = '1', ['เฅจ'] = '2', ['เฅฉ'] = '3', ['เฅช'] = '4',
['เฅซ'] = '5', ['เฅฌ'] = '6', ['เฅญ'] = '7', ['เฅฎ'] = '8', ['เฅฏ'] = '9',
-- punctuation
['๐'] = '.', -- danda
['๐'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['๐ป'] = '.',
}
local nasal_assim = {
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐ฃ'] = '๐ง', ['๐ค'] = '๐ง', ['๐ฅ'] = '๐ง', ['๐ฆ'] = '๐ง', ['๐ง'] = '๐ง',
}
local perm_cl = {
['๐ง๐น๐ช'] = true, ['๐ซ๐น๐ช'] = true, ['๐ซ๐น๐ช'] = true, ['๐ข๐น๐ช'] = true,
}
local all_cons, special_cons = '๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ก๐ฃ๐ค๐ฅ๐ฆ๐ฌ๐ญ๐ฎ๐จ๐ฉ๐ช๐ซ๐ฏ๐๐ข๐ง', '๐จ๐ฉ๐ช๐ฅ๐ซ๐ฏ๐ข๐ง'
local vowel, vowel_sign = 'a๐ฐ๐ฑ๐ฒ๐ณ๐ด๐ต๐ถ๐ท๐ธ', '๐๐๐
๐๐๐๐๐๐๐'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(๐บ?[' .. all_cons .. '])a(๐บ?[' .. gsub(all_cons, "๐จ", "") .. '])([๐๐]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = 1, length do
table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text = gsub(
text,
'([' .. all_cons .. ']๐บ?)([' .. vowel .. '๐น]?)',
function(c, d)
return c .. (d == "" and 'a' or d)
end
)
for word in mw.ustring.gmatch(text, "[๐-๐a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(
word,
'^a(๐บ?)([' .. all_cons .. '])(.)(.?)',
function(opt, first, second, third)
local a = ""
if match(first, '[' .. special_cons .. ']')
and match(second, '๐น')
and not perm_cl[first..second..third]
or match(first .. second, '๐จ[๐ฒ๐ต๐ถ]') then
a = "a"
end
return a .. opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(
word,
'(.?)๐(.)',
function(succ, prev)
local mid = nasal_assim[succ] or "n"
if succ..prev == "a" then
mid = "๐บ๐ง"
elseif succ == "" and match(prev, '[' .. vowel .. ']') then
mid = "ฬ"
end
return succ .. mid .. prev
end
)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.๐บ?', conv)
text = gsub(text, 'a([iu])ฬ', 'aอ %1')
text = gsub(text, '๐๐น๐', conv)
text = gsub(text, '%*', 'a')
return mw.ustring.toNFC(text)
end
return export