Module:mr-Modi-translit
Jump to navigation
Jump to search
- The following documentation is located at Module:mr-Modi-translit/documentation. [edit]
- Useful links: subpage list โข links โข transclusions โข testcases โข sandbox
This module will transliterate Marathi language text per WT:MR TR. It is also used to transliterate Varhadi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:mr-Modi-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local export = {}
local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local ZWJ = u(0x200D)
local conv = {
-- consonants
['๐']='k', ['๐']='kh', ['๐']='g', ['๐']='gh', ['๐']='แน
',
['๐']='c', ['๐']='ch', ['๐']='j', ['๐']='jh', ['๐']='รฑ',
['๐']='แนญ', ['๐']='แนญh', ['๐']='แธ', ['๐']='แธh', ['๐']='แน',
['๐']='t', ['๐']='th', ['๐']='d', ['๐ ']='dh', ['๐ก']='n',
['๐ข']='p', ['๐ฃ']='ph', ['๐ค']='b', ['๐ฅ']='bh', ['๐ฆ']='m',
['๐ง']='y', ['๐จ']='r', ['๐ฉ']='l', ['๐ช']='v', ['๐ฏ']='แธท',
['๐ซ']='ล', ['๐ฌ']='แนฃ', ['๐ญ']='s', ['๐ฎ']='h',
['๐จ๐ฟ'..ZWJ] = 'r',
-- ['๐๐ฟ๐'] = 'dny',
-- vowel diacritics
---- only in script charts: ['๐ฑ'] = 'i', ['๐ด'] ='ลซ',
['๐ณ'] = 'u', ['๐น'] = 'e', ['๐ป'] = 'o',
['๐ฐ'] = 'ฤ', ['๐ฒ'] = 'ฤซ',
['๐ต'] = 'ru',
['๐บ'] = 'ai', ['๐ผ'] = 'au',
-- ['๐ฐ๐'] = 'ล',
['๐'] = 'ฤ',
-- vowel signs
---- only in script charts: ['๐'] = 'i', ['๐
'] ='ลซ',
['๐'] = 'a', ['๐'] = 'u', ['๐'] = 'e', ['๐'] = 'o',
['๐'] = 'ฤ', ['๐'] = 'ฤซ',
['๐'] = 'ล',
['๐'] = 'ai', ['๐'] = 'au',
['๐๐'] = 'ล',
['๐๐'] = 'ฤ', ['๐๐'] = 'ฤ',
['๐๐ฆ๐ฟ'] = 'om',
-- chandrabindu
--- ['๐๐ฝ'] = 'ฬ',
-- anusvara
['๐ฝ'] = 'แน',
-- visarga
['๐พ'] = 'แธฅ',
-- virama
['๐ฟ'] = '',
-- numerals
['๐'] = '0', ['๐'] = '1', ['๐'] = '2', ['๐'] = '3', ['๐'] = '4',
['๐'] = '5', ['๐'] = '6', ['๐'] = '7', ['๐'] = '8', ['๐'] = '9',
-- punctuation
['๐'] = '.', -- danda
['๐'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['๐'] = '.',
}
local nasal_assim = {
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐', ['๐'] = '๐',
['๐ข'] = '๐ฆ', ['๐ฃ'] = '๐ฆ', ['๐ค'] = '๐ฆ', ['๐ฅ'] = '๐ฆ', ['๐ฆ'] = '๐ฆ',
['๐ง'] = 'i', ['๐จ'] = '๐', ['๐ฉ'] = '๐ฉ', ['๐ช'] = '๐',
['๐ซ'] = '๐', ['๐ฌ'] = '๐', ['๐ญ'] = '๐', ['๐ฎ'] = '๐',
}
local perm_cl = {
['๐ฆ๐ฟ๐ฉ'] = true, ['๐ช๐ฟ๐ฉ'] = true, ['๐ก๐ฟ๐ฉ'] = true,
}
local all_cons, special_cons = '๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ข๐ฃ๐ค๐ฅ๐ซ๐ฌ๐ญ๐ง๐จ๐ฉ๐ช๐ฎ๐๐ก๐ฆ๐ฏ', '๐๐๐ง๐จ๐ฉ๐ช๐ฎ๐ก๐ฆ'
local vowel, vowel_sign = '%*a๐ฑ๐ณ๐ต๐น๐ป๐ฐ๐ฒ๐ด๐บ๐ผ๐', '๐๐๐๐๐๐๐๐
๐๐๐๐๐'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])([' .. all_cons .. '])a([' .. all_cons .. '])([เค]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local char_array, i = {}, 1
for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern
char_array[i] = char
i = i + 1
end
return table.concat(require("Module:table").reverse(char_array))
end
function export.tr(text, lang, sc)
-- text = gsub(text, 'เคพเค', 'เฅ' .. 'เค')
-- text = gsub(text, 'เค', 'เฅ
' .. 'เค')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])๐ฝ ', '%1๐ ')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])๐ฝ$', '%1๐')
text = gsub(text, '([' .. all_cons .. '])([' .. vowel .. '๐ฟ]?)', function(c, d)
return c .. (d == "" and 'a' or d) end)
for word in mw.ustring.gmatch(text, "[๐-๐a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^a([' .. all_cons .. '][' .. vowel .. vowel_sign .. '])', '%1')
while find(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(word, '(.?)๐ฝ(.)', function(succ, prev)
return succ .. (succ..prev == "a" and "๐ฟ๐ฆ" or
(succ == "" and find(prev, '[' .. vowel .. ']') and "ฬ" or nasal_assim[succ] or "n")) .. prev end)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.', conv)
text = gsub(text, 'a([iu])ฬ', 'aอ %1')
text = gsub(text, 'aa', 'a')
text = gsub(text, 'รฑjรฑ', 'ndny')
text = gsub(text, 'jรฑ', 'dny')
return mw.ustring.toNFC(text)
end
return export