Module:sa-Kthi-translit
Appearance
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list โข links โข transclusions โข testcases โข sandbox
This module will transliterate Sanskrit language text per WT:SA TR. It is also used to transliterate Old Awadhi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:sa-Kthi-translit/testcases.
Functions
[edit]tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local export = {}
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char
local consonants = {
['๐'] = 'k', ['๐'] = 'kh', ['๐'] = 'g', ['๐'] = 'gh', ['๐'] = 'แน
',
['๐'] = 'c', ['๐'] = 'ch', ['๐'] = 'j', ['๐'] = 'jh', ['๐'] = 'รฑ',
['๐'] = 'แนญ', ['๐'] = 'แนญh', ['๐'] = 'แธ', ['๐'] = 'แธh', ['๐'] = 'แน',
['๐'] = 't', ['๐'] = 'th', ['๐ '] = 'd', ['๐ก'] = 'dh', ['๐ข'] = 'n',
['๐ฃ'] = 'p', ['๐ค'] = 'ph', ['๐ฅ'] = 'b', ['๐ฆ'] = 'bh', ['๐ง'] = 'm',
['๐จ'] = 'y', ['๐ฉ'] = 'r', ['๐ช'] = 'l', ['๐ซ'] = 'v', ['๐ซ'] = 'v', ['เคณ'] = 'แธท',
['๐ฌ'] = 'ล', ['๐ญ'] = 'แนฃ', ['๐ฎ'] = 's', ['๐ฏ'] = 'h',
['๐'] = 'แน', ['๐'] = 'แน', ['๐'] = 'แนh', ['๐'] = 'แนh',
-- ['๐๐น๐'] = 'gy',
}
local diacritics = {
['๐ฑ'] = 'i', ['๐ณ'] = 'u', ['๐ต'] = 'e', ['๐ท'] = 'o',
['๐ฐ'] = 'ฤ', ['๐ฒ'] = 'ฤซ', ['๐ด'] = 'ลซ', ['๐'] = 'rฬฅ',
['๐ถ'] = 'ai', ['๐ธ'] = 'au',
-- virama
['๐น'] = '',
}
local diatrema = {
['๐
'] = 'รฏ', ['๐'] = 'รผ',
}
local tt = {
-- vowels
['๐'] = 'a', ['๐
'] = 'i', ['๐'] = 'u', ['๐'] = 'e', ['๐'] = 'o',
['๐'] = 'ฤ', ['๐'] = 'ฤซ', ['๐'] = 'ลซ',
['๐'] = 'ai', ['๐'] = 'au',
['เฅ'] = 'om',
-- chandrabindu
['๐'] = 'mฬ',
-- anusvara
['๐'] = 'แน',
-- visarga
['๐'] = 'แธฅ',
--numerals
['เฅฆ'] = '0', ['เฅง'] = '1', ['เฅจ'] = '2', ['เฅฉ'] = '3', ['เฅช'] = '4',
['เฅซ'] = '5', ['เฅฌ'] = '6', ['เฅญ'] = '7', ['เฅฎ'] = '8', ['เฅฏ'] = '9',
--punctuation
['๐'] = '.', -- danda
["๐"] = ".", -- double danda
['+'] = '', -- compound separator
--reconstructed
['*'] = '',
}
function export.tr(text, lang, sc)
text = gsub(
text,
'([๐-๐ฏ])'..
'([๐ฐ-๐น]?)'..
'([๐
๐]?)',
function(c, d, e)
if d == "" and e ~= "" then
return consonants[c] .. 'a' .. diatrema[e]
elseif e ~= "" then
return consonants[c] .. diacritics[d] .. tt[e]
elseif d == "" then
return consonants[c] .. 'a'
else
return consonants[c] .. diacritics[d]
end
end)
-- Adjacent vowel letters needing dieresis
text = gsub(text, '([๐])([๐
๐])', function(a, b) return tt[a]..diatrema[b] end)
text = gsub(text, '.', tt)
return toNFC(text)
end
return export