Jump to content

Module:sa-Taml-translit

From Wiktionary, the free dictionary

This module will transliterate Sanskrit language text per WT:SA TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sa-Taml-translit/testcases.

Functions

[edit]
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local function dc(text)
	return string.gsub(string.gsub(text, 'க', ''), '𑌕', '') end -- 'drop carrier'
 
local consonants = {
	['க']='k' , ['ங']='ṅ' , ['ச']='c' , ['ஞ']='ñ' , ['ட']='ṭ' , ['ண']='ṇ' , ['த']='t' ,
	['ந']='n' , ['ப']='p', ['ம']='m' , ['ய']='y' , ['ர']='r' , ['ல']='l' , ['வ']='v' ,
	['ழ']='ḻ' , ['ள']='ḷ' , ['ற']='ṟ' ,
	['ன']='n' , -- So only contextual distinction between ந and ன.
	['ஶ']='ś' , ['ஜ']='j' , ['ஷ']='ṣ' , 
	['ஸ']='s' , ['ஹ']='h' , 
--	['ஃப']='f' , ['ஃஜ']='z', ['ஃஸ']='ks' , ['ஃக ']='x',
	['ஃ']='ḥ' , ['ௐ']='о̄m',
-- Consonants modified by spacing superscript digit.  Be liberal.
	['க¹']='k', ['க²']='kh', ['க³']='g', ['க⁴']='gh',
	['ச¹']='c', ['ச²']='ch', ['ச³']='j', ['ச⁴']='jh',
	                        ['ஜ¹']='j', ['ஜ²']='jh',
	['ட¹']='ṭ', ['ட²']='ṭh', ['ட³']='ḍ', ['ட⁴']='ḍh',
	['த¹']='t', ['த²']='th', ['த³']='d', ['த⁴']='dh',
	['ப¹']='p', ['ப²']='ph', ['ப³']='b', ['ப⁴']='bh',
	['ம²']='ṃ', ['ம³']='m̐',  ['ர²']='Ⓡ', ['ல²']='Ⓛ',
-- Consonants modified by spacing subscript digit.  Be liberal.
	['க₁']='k', ['க₂']='kh', ['க₃']='g', ['க₄']='gh',
	['ச₁']='c', ['ச₂']='ch', ['ச₃']='j', ['ச₄']='jh',
	                        ['ஜ₁']='j', ['ஜ₂']='jh',
	['ட₁']='ṭ', ['ட₂']='ṭh', ['ட₃']='ḍ', ['ட₄']='ḍh',
	['த₁']='t', ['த₂']='th', ['த₃']='d', ['த₄']='dh',
	['ப₁']='p', ['ப₂']='ph', ['ப₃']='b', ['ப₄']='bh',
	['ம₂']='ṃ', ['ம₃']='m̐',  ['ர₂']='Ⓡ', ['ல₂']='Ⓛ',
}

local diacritics = { -- Obliterate Tamil length contrast in mid vowels later. 
	['ா']= 'ā' , ['ி']='i' , ['ீ']='ī' , ['ு']='u' , ['ூ']='ū' ,  ['ெ']='e' ,
	['ே']='ē' , ['ை']='ai' , ['ொ']='o' , ['ோ']='ō' , ['ௌ']='au', 
	['்']='',	-- pulli, suppresses the inherent vowel "a"
-- Grantha syllabic consonants get used:
	['𑍃']='ṛ', ['𑍄']='ṝ', ['𑍢']='ḷ', ['𑍣']='ḹ',
	-- no diacritic
	[''] = 'a',
}

local nonconsonants = {
	-- vowels
	['அ']='’a' , ['ஆ']='’ā' , ['இ']='’i' , ['ஈ']='’ī' , ['உ']='’u' , ['ஊ']='’ū' , 
	['எ']='’e' , ['ஏ']='’ē' , ['ஐ']='’ai' , ['ஒ']='’o' , ['ஓ']='’ō' , ['ஔ']='’au' , ['ௐ']='о̄m',
	-- other symbols
	['ஃ']='ḥ', [dc('கஂ')] = 'ṃ', ['𑌃'] = 'ḥ',
-- syllabic consonants (Grantha)ː
	['𑌋']='ṛ', ['𑍠']='ṝ', ['𑌌']='ḷ', ['𑍡']='ḹ',
}

local syll2 = {
	['Ⓡu']='ṛ', ['Ⓡū']='ṝ', ['Ⓛu']='ḷ', ['Ⓛū']='ḹ',
}

-- translit any words or phrases
function export.tr(text, lang, sc)
-- Special vowel-killing diacritics
	local pattern = "[மயலவ][dc(கஂ⃰𑌕𑌁)]" -- gsub on gsub arguments tends to fail!
	text = mw.ustring.gsub(text, pattern, {
		['மஂ'] = "ṃ", -- Desirable to get more and independent examples.
		['ம⃰'] = " ṃ ", 
		['ய𑌁'] = "y̐", ['ல𑌁'] = "l̐", ['வ𑌁'] = "v̐", 
	})
	local nukta = '([¹²³⁴₁₂₃₄]?)'
	local anusvara = dc('([கஂ𑌕𑌂]?)')
	local vowel = dc('([கா-க்𑌕𑍃𑌕𑍄𑌕𑍢𑌕𑍣]?)')
	text = mw.ustring.gsub(
		text,
		'(ஃ?)([க-ஹ])'..nukta..vowel..nukta..'([ாௗ]?)'..nukta..anusvara..nukta,
		function(h, c, n1, d, n2, d2, n3, av, n4)
			local cn = c .. n1 .. n2 .. n3 .. n4
			local da = d..d2
			if d2 ~= "" then da = mw.ustring.toNFC(da) end
			return (consonants[h..cn] or (consonants[h] or "") .. (consonants[cn] or cn)) ..
					(diacritics[da] or da) .. av
		end)
	
	text = mw.ustring.gsub(text, '.', nonconsonants)
	text = string.gsub(text, 'ē', 'e')
	text = string.gsub(text, 'ō', 'o')
	text = mw.ustring.gsub(text, '[ⓇⓁ][uū]', syll2)
	text = string.gsub(text, '^’', '')
	text = mw.ustring.gsub(text, '([%s%p])’', '%1')
	
	return text
end
 
return export