Jump to content

Module:te-IPA

From Wiktionary, the free dictionary


local export = {}

local consonants = {
	['క']='k', ['ఖ']='kʰ', ['గ']='ɡ', ['ఘ']='ɡʱ', ['ఙ']='ŋ',
	['చ']='t͡ɕ', ['ఛ']='t͡ɕʰ', ['జ']='d͡ʑ', ['ఝ']='d͡ʑʱ', ['ఞ']='ɲ',
	['ట']='ʈ', ['ఠ']='ʈʰ', ['డ']='ɖ', ['ఢ']='ɖʱ', ['ణ']='ɳ',
	['త']='t̪', ['థ']='t̪ʰ', ['ద']='d̪', ['ధ']='d̪ʱ', ['న']='n',
	['ప']='p', ['ఫ']='pʰ', ['బ']='b', ['భ']='bʱ', ['మ']='m',
	['య']='j', ['ర']='ɾ', ['ల']='l', ['వ']='ʋ', ['ళ']='ɭ',
	['శ']='ɕ', ['ష']='ʂ', ['స']='s', ['హ']='h', ['ఱ']='r',
	['ఴ']='ɻ', ['ౘ']='t͡s', ['ౙ']='d͡z', ['ౚ']='d', 
	['క఼']='q', ['ఖ఼']='x', ['గ఼']='ɣ', ['జ఼']='z', ['ఝ఼']='ʒ', ['ఫ఼']='f', ['డ఼']='ɽ', ['ఢ఼']='ɽʱ',
	['఼']=''
}

local vowel_diacritics = {
	['ా']= 'aː', ['ి']='i', ['ీ']='iː', ['ు']='u', ['ూ']='uː',
	['ృ']= 'ɻ̍', ['ౄ']='ɻ̍ː', ['ౢ']='l̩', ['ౣ']='l̩ː',
	['ె']='e', ['ే']='eː', ['ై']='ai', ['ొ']='o', ['ో']='oː', ['ౌ']='au',
	['్']='',	-- Virama - suppresses the inherent vowel "a"
	[''] = 'a'	-- No diacritic; inherent vowel
}

local other = {
	-- Independent vowels
	['అ']='a', ['ఆ']='aː', ['ఇ']='i', ['ఈ']='iː', ['ఉ']='u', ['ఊ']='uː',
	['ఋ']= 'ɻ̍', ['ౠ']='ɻ̍ː', ['ఌ']='l̩', ['ౡ']='l̩ː',
	['ఎ']='e', ['ఏ']='eː', ['ఐ']='ai',
	['ఒ']='o', ['ఓ']='oː', ['ఔ']='au',
	-- Other symbols
	['ం']='m̃', ['ః']='h',
	['ఁ']='',	-- Chandrabindu - indicates elided nasal; has no effect on pronunciation
	['ౝ']='n',	-- Nakaara pollu - vowelless n
	['ఽ']=''	-- Avagraha - indicates elided vowel due to sandhi; has no effect on pronunciation
--	['఼']=''
}

local adjust1 = {
	-- Assimilate the anusvara
	['m̃([kɡŋ])']='ŋ%1',
	['m̃([td]͡[ɕʑ])']='ɲ%1', ['m̃(ɲ)']='ɲ%1',
	['m̃([ʈɖɳ])']='ɳ%1',
	['m̃([td]̪)']='n̪%1', ['m̃([td]͡[sz])']='n̪%1', ['m̃(n)']='n̪%1',
	['m̃([pbm])']='m%1',
	['ː̃']='̃ː',
}

local adjust2 = {
	-- Account for differences in phonemes vs. phones
	['ɻ̍(ː?[ʈɖɳʂ])'] = 'ɾi%1', ['ɻ̍(ː?[^ʈɖɳʂ]?)'] = 'ɾu%1',
	['l̩(ː?[ʈɖɳʂ])'] = 'li%1', ['l̩(ː?[^ʈɖɳʂ]?)'] = 'lu%1',
	['ai'] = 'aj', ['au']='aw',
	['t͡ɕ']='t͡ʃ', ['d͡ʑ']='d͡ʒ',
	['ɕ']='ʃ',
	['n̪r']='nd', ['n̪d']='nd',
}

function export.to_IPA(text)

	text = mw.ustring.gsub(
		text,
		'([క-హౘ-ౚ])(఼?)([ా-్]?)',
		function(c, n, d)
			return ((consonants[c..n] or consonants[c]) or c) .. vowel_diacritics[d]
		end)

	text = mw.ustring.gsub(text, '[ఁ-ౡ]', other)

	for k, v in pairs(adjust1) do
		text = mw.ustring.gsub(text, k, v)
	end

	-- Account for consonant gemination
	text = mw.ustring.gsub(text, "([kɡŋɲʈɖɳnpbmjlʋɭɕʂshrɻqxzf][ʰʱ]?)%1", "%1ː")
	text = mw.ustring.gsub(text, "([td]̪[ʰʱ]?)%1", "%1ː")
	text = mw.ustring.gsub(text, "([td]͡[ɕʑ][ʰʱ]?)%1", "%1ː")
	text = mw.ustring.gsub(text, "([td]͡[sz])%1", "%1ː")
	text = mw.ustring.gsub(text, "([ɾ])%1", "r")
	text = mw.ustring.gsub(text, "[ɾr]ː?[ɾr]ː?", "rː")
	text = mw.ustring.gsub(text, "ɭː?lː?", "ɭː")
	text = mw.ustring.gsub(text, "lː?ɭː?", "ɭː")

	-- If an independent vowel is after another vowel, assume diphthong
	text = mw.ustring.gsub(text, "([aeiou]ː?)•", "%1")

	-- Phonetic transcription
	text2 = text
	for k, v in pairs(adjust2) do
		text2 = mw.ustring.gsub(text2, k, v)
	end

	return (text == text2 and { text } or { text, text2 })

end

function export.show(frame)
	local args = frame:getParent().args
	local page_title = mw.title.getCurrentTitle().text
	local text = args[1] or page_title
	local qualifier = args["q"] or nil

	local transcriptions = export.to_IPA(text)
	local IPA_text
	if not transcriptions[2] then
		IPA_text = require("Module:IPA").format_IPA_full {
			lang = require("Module:languages").getByCode("te"),
			items = {{ pron = "/" .. transcriptions[1] .. "/" }},
		}
	else
		IPA_text = require("Module:IPA").format_IPA_full {
			lang = require("Module:languages").getByCode("te"),
			items = {{ pron = "/" .. transcriptions[1] .. "/" }, { pron = "[" .. transcriptions[2] .. "]" }},
		}
	end

	return (qualifier and require("Module:qualifier").format_qualifier{qualifier} .. " " or "")
		.. IPA_text
end

return export