Module:mr-Modi-translit

The following documentation is located at Module:mr-Modi-translit/documentation. ^[edit]

Useful links: subpage list • links • transclusions • testcases • sandbox

This module will transliterate Marathi language text per WT:MR TR. It is also used to transliterate Varhadi. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:mr-Modi-translit/testcases.

Functions

tr(text, lang, sc): Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.; When the transliteration fails, returns nil.

local export = {}

local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local find = mw.ustring.find

local ZWJ = u(0x200D)

local conv = {
	-- consonants
	['𑘎']='k', ['𑘏']='kh', ['𑘐']='g', ['𑘑']='gh', ['𑘒']='ṅ',
	['𑘓']='c', ['𑘔']='ch', ['𑘕']='j', ['𑘖']='jh', ['𑘗']='ñ', 
	['𑘘']='ṭ', ['𑘙']='ṭh', ['𑘚']='ḍ', ['𑘛']='ḍh', ['𑘜']='ṇ', 
	['𑘝']='t', ['𑘞']='th', ['𑘟']='d', ['𑘠']='dh', ['𑘡']='n', 
	['𑘢']='p', ['𑘣']='ph', ['𑘤']='b', ['𑘥']='bh', ['𑘦']='m',
	['𑘧']='y', ['𑘨']='r', ['𑘩']='l', ['𑘪']='v', ['𑘯']='ḷ',
	['𑘫']='ś', ['𑘬']='ṣ', ['𑘭']='s', ['𑘮']='h',
	['𑘨𑘿'..ZWJ] = 'r',
	-- ['𑘕𑘿𑘗'] = 'dny',

	-- vowel diacritics
	----  only in script charts: ['𑘱'] = 'i', ['𑘴'] ='ū',
	['𑘳'] = 'u', ['𑘹'] = 'e', ['𑘻'] = 'o', 
	['𑘰'] = 'ā', ['𑘲'] = 'ī',
	['𑘵'] = 'ru',
	['𑘺'] = 'ai', ['𑘼'] = 'au',
	-- ['𑘰𑙀'] = 'ŏ',
	['𑙀'] = 'ĕ',

	-- vowel signs
	----  only in script charts: ['𑘂'] = 'i', ['𑘅'] ='ū',
	['𑘀'] = 'a', ['𑘄'] = 'u', ['𑘊'] = 'e', ['𑘌'] = 'o',
	['𑘁'] = 'ā', ['𑘃'] = 'ī',
	['𑘆'] = 'ŕ', 
	['𑘋'] = 'ai', ['𑘍'] = 'au', 
	['𑘁𑙀'] = 'ŏ',
	['𑘀𑙀'] = 'ĕ', ['𑘊𑙀'] = 'ĕ',
	
	['𑘌𑘦𑘿'] = 'om',
	
	-- chandrabindu
	--- ['𑙀𑘽'] = '̃',
	
	-- anusvara
	['𑘽'] = 'ṁ',
	
	-- visarga
	['𑘾'] = 'ḥ',
	
	-- virama
	['𑘿'] = '',
	
	-- numerals
	['𑙐'] = '0', ['𑙑'] = '1', ['𑙒'] = '2', ['𑙓'] = '3', ['𑙔'] = '4',
	['𑙕'] = '5', ['𑙖'] = '6', ['𑙗'] = '7', ['𑙘'] = '8', ['𑙙'] = '9',
	
	-- punctuation
	['𑙁'] = '.', -- danda
	['𑙂'] = '.', -- double danda
	['+'] = '', -- compound separator
	
	-- abbreviation sign
	['𑙃'] = '.',
}

local nasal_assim = {
	['𑘎'] = '𑘒', ['𑘏'] = '𑘒', ['𑘐'] = '𑘒', ['𑘑'] = '𑘒', 
	['𑘓'] = '𑘗', ['𑘔'] = '𑘗', ['𑘕'] = '𑘗', ['𑘖'] = '𑘗',  
	['𑘘'] = '𑘜', ['𑘙'] = '𑘜', ['𑘚'] = '𑘜', ['𑘛'] = '𑘜',
	['𑘢'] = '𑘦', ['𑘣'] = '𑘦', ['𑘤'] = '𑘦', ['𑘥'] = '𑘦', ['𑘦'] = '𑘦',
	['𑘧'] = 'i', ['𑘨'] = '𑘄', ['𑘩'] = '𑘩', ['𑘪'] = '𑘄',
	['𑘫'] = '𑘄', ['𑘬'] = '𑘄', ['𑘭'] = '𑘄', ['𑘮'] = '𑘄',
}

local perm_cl = {
	['𑘦𑘿𑘩'] = true, ['𑘪𑘿𑘩'] = true, ['𑘡𑘿𑘩'] = true,
	
}

local all_cons, special_cons = '𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘝𑘞𑘟𑘠𑘢𑘣𑘤𑘥𑘫𑘬𑘭𑘧𑘨𑘩𑘪𑘮𑘜𑘡𑘦𑘯', '𑘟𑘝𑘧𑘨𑘩𑘪𑘮𑘡𑘦'
local vowel, vowel_sign = '%*a𑘱𑘳𑘵𑘹𑘻𑘰𑘲𑘴𑘺𑘼𑙀', '𑘀𑘂𑘄𑘊𑘌𑘁𑘃𑘅𑘆𑘋𑘍𑘀𑙀'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])([' .. all_cons .. '])a([' .. all_cons .. '])([ं]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
	local char_array, i = {}, 1
	for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern
		char_array[i] = char
		i = i + 1
	end
	return table.concat(require("Module:table").reverse(char_array))
end

function export.tr(text, lang, sc)
	-- text = gsub(text, 'ाँ', 'ॉ' .. 'ं')
	-- text = gsub(text, 'ँ', 'ॅ' .. 'ं')
	text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽 ', '%1𑘀 ')
	text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽$', '%1𑘀')
	text = gsub(text, '([' .. all_cons .. '])([' .. vowel .. '𑘿]?)', function(c, d)
		return c .. (d == "" and 'a' or d) end)
	for word in mw.ustring.gmatch(text, "[𑘀-𑙙a]+") do
		local orig_word = word
		word = rev_string(word)
		word = gsub(word, '^a([' .. all_cons .. '][' .. vowel .. vowel_sign .. '])', '%1')
		while find(word, syncope_pattern) do
			word = gsub(word, syncope_pattern, '%1%2%3%4')
		end
		word = gsub(word, '(.?)𑘽(.)', function(succ, prev)
			return succ .. (succ..prev == "a" and "𑘿𑘦" or 
				(succ == "" and find(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end)
		text = gsub(text, orig_word, rev_string(word))
	end
	text = gsub(text, '.', conv)
	text = gsub(text, 'a([iu])̃', 'a͠%1')
	text = gsub(text, 'aa', 'a')
    text = gsub(text, 'ñjñ', 'ndny')
    text = gsub(text, 'jñ', 'dny')
	return mw.ustring.toNFC(text)
end

return export