Jump to content

Module:sa-utilities/translit/post replace fix/Sinh

From Wiktionary, the free dictionary


local U = mw.ustring.char
local virAma = U(0x0DCA)
local s1con = 'කඛගඝඞචඡජඣඤටඨඩඪණතථදධනපඵබභමයරලවළශෂසහ' -- Basic consonants
local s2con = s1con .. 'ඥඟඦඬඳඹ' -- Add in single characters composed of 2 SLP1 characters.
local repha_source = 'ර්(['..s1con..'])'
local pattern1 = '(['..s1con..'])'..virAma..'(['..s1con..'])'
local pattern2 = '(['..s2con..'])'..virAma..'(['..s2con..'])'
local liga = U(0x0DCA, 0x200D) -- 'Conjuncts' in Unicode-speak
local abut = U(0x200D, 0x0DCA) -- Make touching consonants
local conj = {
--			['ඞග'] = 'ඟ', ['ඤජ'] = 'ඦ', ['ණඩ'] = 'ඬ', ['නද'] = 'ඳ', ['මබ'] = 'ඹ', -- prenasalised in Sinhalese
			['ජඤ'] = 'ඥ', -- jñ
--Shared with Pali:
			['කව'] = 'ක්‍ව', -- kv
			['තථ'] = 'ත්‍ථ', -- tth
			['තව'] = 'ත්‍ව', -- tv
			['නථ'] = 'න්‍ථ', -- nth
			['නද'] = 'න්‍ද', -- nd.
			['නධ'] = 'න්‍ධ', -- ndh
			['නව'] = 'න්‍ව', -- nv
-- Similar
			['කෂ'] = 'ක්‍ෂ', --kṣ
			['ගධ'] = 'ග්‍ධ', -- ɡdh
-- Using sanyaka and not listed above
			['ඤච'] = 'ඤ්‍ච', -- ñc
			['ඤඡ'] = 'ඤ්‍ඡ', -- ñch -- ill-supported.
			['ටඨ'] = 'ට්‍ඨ', -- ṭṭh
--			['දධ'] = 'ද්‍ධ', -- ddh -- Minority taste.
			['දව'] = 'ද්‍ව', -- dv
}
local function select(a, b)
	local merged = conj[a..b]
	return merged or a..abut..b
end
local fixes = { 
	{'ය්ය', 'ය‍්ය'}, -- Pairs of ය touch Triple ය does not occur.
	{virAma..'([යර])', liga..'%1'}, -- ය and ර ligate with preceding.
	{repha_source, 'ර'..liga..'%1'}, -- Form repha.
	{pattern1, select},
	{pattern2, select},
}
return fixes