Jump to content

Module:sa-utilities/translit/Sinh-to-SLP1

From Wiktionary, the free dictionary


local export = {}
local u = mw.ustring.char

local consonants = {
	['ක'] = 'k', ['ඛ'] = 'K', ['ග'] = 'g', ['ඝ'] = 'G', ['ඟ'] = 'Ng', ['ඞ'] = 'N',
	['ච'] = 'c', ['ඡ'] = 'C', ['ජ'] = 'j', ['ඣ'] = 'J', ['ඦ'] = 'Yj', ['ඤ'] = 'Y', ['ඥ'] = 'jY',
	['ට'] = 'w', ['ඨ'] = 'W', ['ඩ'] = 'q', ['ඪ'] = 'Q', ['ඬ'] = 'Rq', ['ණ'] = 'R',
	['ත'] = 't', ['ථ'] = 'T', ['ද'] = 'd', ['ධ'] = 'D', ['ඳ'] = 'nd', ['න'] = 'n',
	['ප'] = 'p', ['ඵ'] = 'P', ['බ'] = 'b', ['භ'] = 'B', ['ඹ'] = 'mb', ['ම'] = 'm',
	['ය'] = 'y', ['ර'] = 'r', ['ල'] = 'l', ['ව'] = 'v', ['ළ'] = 'L',
	['ශ'] = 'S', ['ෂ'] = 'z', ['ස'] = 's', ['හ'] = 'h',
}

local diacritics = {
['්'] = '', ['ා'] = 'A',
	['ි'] = 'i', ['ී'] = 'I',
	['ු'] = 'u', ['ූ'] = 'U',
	['ෘ'] = 'f', ['ෲ'] = 'F',
	['ෟ'] = 'x', ['ෳ'] = 'X',
	['ෙ'] = 'e', ['ේ'] = 'e', ['ෛ'] = 'E',
	['ො'] = 'o', ['ෝ'] = 'o', ['ෞ'] = 'O',
}

local tt = {
	-- vowels
	['අ'] = 'a', ['ආ'] = 'A',
	['ඉ'] = 'i', ['ඊ'] = 'I',
	['උ'] = 'u', ['ඌ'] = 'U',
	['ඍ'] = 'f', ['ඎ'] = 'F',
	['ඏ'] = 'x', ['!ශ'] = 'X',
	['එ'] = 'e', ['ඒ'] = 'e', ['ඓ'] = 'E',
	['ඔ'] = 'o', ['ඕ'] = 'o', ['ඖ'] = 'O',
	-- chandrabindu
	['ඁ'] = '~', -- Probably much more complicated.
	-- anusvara
	['ං'] = 'M',
	-- visarga
	['ඃ'] = 'H',
	-- avagraha
	-- ['ऽ'] = '',
	--numerals -- Modern digits are Western Arabic
--	['0'] = '0', ['1'] = '1', ['2'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
	--punctuation
	['॥'] = '.', --double danda
	['।'] = '.', --danda
	--Vedic extensions - Probably unsupported.
	['ᳵ'] = 'Z',
	['ᳶ'] = 'V',
	--Om
	['ॐ'] = 'oM',
	['◌॑'] = '/',
}

local function dc(x) return string.gsub(x, 'ක', '') end -- Drop Carrier
function export.tr(text, lang, sc)
	text = string.gsub(text, u(0x200d), '') -- Drop ZWJ
	local vowels = dc('ක්කාකිකීකෘකෟකෲකෳකුකූකෙකේකෛකොකෝකෞ') -- Use carrier for readability.
	text = mw.ustring.gsub(text,
		'([කඛගඝඟඞචඡජඣඦඤඥටඨඩඪඬණතථදධඳනපඵබභඹමයරලවළශෂසහ])' .. '([' .. vowels .. ']?)',
		function(c, d)
			if d == "" then
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)
	
	text = mw.ustring.gsub(text, '.', tt)
	
	return text
end

return export