Module:tl-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Tagalog language text per WT:TL TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tl-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local consonants = {
	['ᜃ']='k', ['ᜄ']='g', ['ᜅ']='ng',
    ['ᜆ']='t', ['ᜇ']='d', ['ᜈ']='n',
    ['ᜉ']='p', ['ᜊ']='b', ['ᜋ']='m',
    ['ᜌ']='y', ['ᜎ']='l', ['ᜏ']='w',
    ['ᜐ']='s', ['ᜑ']='h', ['ᜍ']='r',
}

local diacritics = {
	['ᜒ']='i', ['ᜓ']='u', ['᜔']='',
}

local tt = {
	-- vowels
	['ᜀ']='a', ['ᜁ']='i', ['ᜂ']='u',
	--punctuation
    ['᜶']='.', -- kulit and pamudpod
    ['᜵']=',' -- single kulit and pamudpod
}

function export.tr(text, lang, sc, override)
	if sc ~= "Tglg" then
		return nil
	end
	
	local separate_dr = false
	if string.find(text, 'ᜍ') then
		separate_dr = true
	end

	text = mw.ustring.gsub(text,'([ᜃ᜔ᜄ᜔ᜅ᜔ᜆ᜔ᜈ᜔ᜉ᜔ᜊ᜔ᜋ᜔ᜌ᜔ᜎ᜔ᜏ᜔ᜐ᜔])'..'([ᜀᜁᜂ])','%1-%2')
	text = mw.ustring.gsub(
		text,
		'([ᜃ-ᜑ])'..
		'([ᜒᜓ᜔]?)'..
		'([ᜀ-ᜂ]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. ''
				else return consonants[c] .. 'a' .. tt[e] end
				elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

	text = mw.ustring.gsub(text, '.', tt)

	--convert intervocalic D to R
	if not separate_dr then
		while true do
			local new_text = text
			text = mw.ustring.gsub(text,"([aiu])d([aiu])","%1r%2")
			text = mw.ustring.gsub(text,"([bkdghlmnprstwy])([bkgpt])d([aiu])","%1%2r%3")
			text = mw.ustring.gsub(text,"^([bkgpt])d([aiu])","%1r%2")
			if text == new_text then
				break
			end
		end
		
	end
	
	--remove hyphen between vowels
	text = mw.ustring.gsub(text,"([aiu])-([aiu])","%1%2")
	
	text = mw.ustring.gsub(text,
		'([ᜒᜓ᜔])',
		function(c)
			return '-' .. diacritics[c]
		end)
	
	text = mw.ustring.gsub(text, "◌", "-a")
	text = mw.ustring.gsub(text, " ([,.])", "%1")

	return text
end

return export