Jump to content

Module:sa-Kthi-translit

From Wiktionary, the free dictionary

This module will transliterate Sanskrit language text per WT:SA TR. It is also used to transliterate Old Awadhi. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sa-Kthi-translit/testcases.

Functions

[edit]
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local consonants = {
	['๐‘‚'] = 'k', ['๐‘‚Ž'] = 'kh', ['๐‘‚'] = 'g', ['๐‘‚'] = 'gh', ['๐‘‚‘'] = 'แน…', 
	['๐‘‚’'] = 'c', ['๐‘‚“'] = 'ch', ['๐‘‚”'] = 'j', ['๐‘‚•'] = 'jh', ['๐‘‚–'] = 'รฑ', 
	['๐‘‚—'] = 'แนญ', ['๐‘‚˜'] = 'แนญh', ['๐‘‚™'] = 'แธ', ['๐‘‚›'] = 'แธh', ['๐‘‚'] = 'แน‡',
	['๐‘‚ž'] = 't', ['๐‘‚Ÿ'] = 'th', ['๐‘‚ '] = 'd', ['๐‘‚ก'] = 'dh', ['๐‘‚ข'] = 'n',
	['๐‘‚ฃ'] = 'p', ['๐‘‚ค'] = 'ph', ['๐‘‚ฅ'] = 'b', ['๐‘‚ฆ'] = 'bh', ['๐‘‚ง'] = 'm', 
	['๐‘‚จ'] = 'y', ['๐‘‚ฉ'] = 'r', ['๐‘‚ช'] = 'l', ['๐‘‚ซ'] = 'v', ['๐‘‚ซ'] = 'v', ['เคณ'] = 'แธท',
	['๐‘‚ฌ'] = 'ล›', ['๐‘‚ญ'] = 'แนฃ', ['๐‘‚ฎ'] = 's', ['๐‘‚ฏ'] = 'h',
	['๐‘‚š'] = 'แน›', ['๐‘‚š'] = 'แน›', ['๐‘‚œ'] = 'แน›h', ['๐‘‚œ'] = 'แน›h',
	-- ['๐‘‚”๐‘‚น๐‘‚–'] = 'gy',
}

local diacritics = {
	['๐‘‚ฑ'] = 'i', ['๐‘‚ณ'] = 'u', ['๐‘‚ต'] = 'e', ['๐‘‚ท'] = 'o', 
	['๐‘‚ฐ'] = 'ฤ', ['๐‘‚ฒ'] = 'ฤซ', ['๐‘‚ด'] = 'ลซ', ['๐‘ƒ‚'] = 'rฬฅ',
	['๐‘‚ถ'] = 'ai', ['๐‘‚ธ'] = 'au', 

	-- virama
	['๐‘‚น'] = '',
}

local diatrema = {
	['๐‘‚…'] = 'รฏ', ['๐‘‚‡'] = 'รผ',
}

local tt = {

	-- vowels
	['๐‘‚ƒ'] = 'a', ['๐‘‚…'] = 'i', ['๐‘‚‡'] = 'u', ['๐‘‚‰'] = 'e', ['๐‘‚‹'] = 'o',
	['๐‘‚„'] = 'ฤ', ['๐‘‚†'] = 'ฤซ', ['๐‘‚ˆ'] = 'ลซ',  
	['๐‘‚Š'] = 'ai', ['๐‘‚Œ'] = 'au', 

	['เฅ'] = 'om',

	-- chandrabindu
	['๐‘‚€'] = 'mฬ',

	-- anusvara
	['๐‘‚'] = 'แนƒ',

	-- visarga
	['๐‘‚‚'] = 'แธฅ',

	--numerals
	['เฅฆ'] = '0', ['เฅง'] = '1', ['เฅจ'] = '2', ['เฅฉ'] = '3', ['เฅช'] = '4',
	['เฅซ'] = '5', ['เฅฌ'] = '6', ['เฅญ'] = '7', ['เฅฎ'] = '8', ['เฅฏ'] = '9',

	--punctuation        
	['๐‘ƒ€'] = '.', -- danda
	["๐‘ƒ"] = ".", -- double danda

	['+'] = '', -- compound separator

    --reconstructed
    ['*'] = '',
}

function export.tr(text, lang, sc)
	text = gsub(
		text,
		'([๐‘‚-๐‘‚ฏ])'..
		'([๐‘‚ฐ-๐‘‚น]?)'..
		'([๐‘‚…๐‘‚‡]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				return consonants[c] .. 'a' .. diatrema[e]
			elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then        
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

-- Adjacent vowel letters needing dieresis

	text = gsub(text, '([๐‘‚ƒ])([๐‘‚…๐‘‚‡])', function(a, b) return tt[a]..diatrema[b] end)

	text = gsub(text, '.', tt)

	return toNFC(text)

end

return export