Jump to content

Module:sa-utilities/translit/SLP1-to-Sinh

From Wiktionary, the free dictionary


local export = {}

local consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh"
local consonant = "[" .. consonant_list .. "]"
local vowel_list = "aAiIuUfFxXeEoO"
local vowel = "[" .. vowel_list .. "]"

local U = mw.ustring.char

local virAma = U(0x0DCA)

local diacritics = {
	['a'] = '', ['A'] = 'ා',
	['i'] = 'ි', ['I'] = 'ී',
	['u'] = 'ු', ['U'] = 'ූ',
	['f'] = 'ෘ', ['F'] = 'ෲ',
	['x'] = 'ෟ', ['X'] = 'ෳ',
	['e'] = 'ෙ', ['E'] = 'ෛ',
	['o'] = 'ො', ['O'] = 'ෞ',
}

local tt = {
	-- consonants
	['k'] = 'ක', ['K'] = 'ඛ', ['g'] = 'ග', ['G'] = 'ඝ', ['N'] = 'ඞ',
	['c'] = 'ච', ['C'] = 'ඡ', ['j'] = 'ජ', ['J'] = 'ඣ', ['Y'] = 'ඤ',
	['w'] = 'ට', ['W'] = 'ඨ', ['q'] = 'ඩ', ['Q'] = 'ඪ', ['R'] = 'ණ',
	['t'] = 'ත', ['T'] = 'ථ', ['d'] = 'ද', ['D'] = 'ධ', ['n'] = 'න',
	['p'] = 'ප', ['P'] = 'ඵ', ['b'] = 'බ', ['B'] = 'භ', ['m'] = 'ම',
	['y'] = 'ය', ['r'] = 'ර', ['l'] = 'ල', ['v'] = 'ව', ['L'] = 'ළ',
	['S'] = 'ශ', ['z'] = 'ෂ', ['s'] = 'ස', ['h'] = 'හ',
	-- vowels
	['a'] = "අ", ['A'] = "ආ",
	['i'] = "ඉ", ['I'] = "ඊ",
	['u'] = "උ", ['U'] = "ඌ",
	['f'] = "ඍ", ['F'] = "ඎ",
	['x'] = "ඏ", ['X'] = "ඐ",
	['e'] = "එ", ['E'] = "ඓ",
	['o'] = "ඔ", ['O'] = "ඖ",
	-- chandrabindu
	['~'] = 'ඁ',
	-- anusvara
	['M'] = 'ං',
	-- visarga
	['H'] = 'ඃ',
	-- avagraha
	-- ["'"] = 'ऽ',
	--numerals
	--- Unchanged
--	['0'] = '०', ['1'] = '१', ['2'] = '२', ['3'] = '३', ['4'] = '४', ['5'] = '५', ['6'] = '६', ['7'] = '७', ['8'] = '८', ['9'] = '९',
	--Vedic extensions
--	['Z'] = 'ᳵ',
--	['V'] = 'ᳶ',
	['/'] = '',
	['\\'] = '',
}

-- local fixes = mw.loadData('Module:sa-utilities/translit/post replace fix/Sinh')
local fixes = require('Module:sa-utilities/translit/post replace fix/Sinh')
function export.tr(text, lang, sc)
	-- capture twice so that all adjacent pairs are covered
	text = mw.ustring.gsub(text, "(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2")
	text = mw.ustring.gsub(text, "(" .. consonant .. ")" .. "(" .. consonant .. ")", "%1" .. virAma .. "%2")
	-- whitespace and end of string
	text = mw.ustring.gsub(text, "(" .. consonant .. ")%f[%s%z]", "%1" .. virAma)
	-- all vowel diacritics
	text = mw.ustring.gsub(text, "(" .. consonant .. ")(" .. vowel ..  ")",
							function(c, v) return c .. diacritics[v] end)
	-- everything else
	text = mw.ustring.gsub(text, '.', tt)
-- Elaborate
    for _, data in ipairs(fixes) do
		text = mw.ustring.gsub(text, data[1], data[2])
	end
	return text
--	return mw.ustring.toNFC(text)
end

return export