Jump to content

Module:tcy-IPA

From Wiktionary, the free dictionary


local export = {}

local consonants = {
	['ಕ']='k', ['ಖ']='kʰ', ['ಗ'] = 'ɡ', ['ಘ'] ='ɡʱ', ['ಙ']='ŋ',
	['ಚ']='t͡ʃ', ['ಛ']='t͡ʃʰ', ['ಜ']='d͡ʒ', ['ಝ']='d͡ʒʱ', ['ಞ']='ɲ',
	['ಟ']='ʈ', ['ಠ']='ʈʰ', ['ಡ']='ɖ', ['ಢ']='ɖʱ', ['ಣ']='ɳ',
	['ತ']='t̪', ['ಥ']='t̪ʰ', ['ದ']='d̪', ['ಧ']='d̪ʱ', ['ನ']='n̪', 
	['ಪ']='p', ['ಫ']='pʰ', ['ಬ']='b', ['ಭ']='bʱ',  ['ಮ']='m',
	['ಯ']='j', ['ರ']='ɾ', ['ಲ']='l', ['ವ']='ʋ', 
	['ಶ']='ʃ', ['ಷ']='ʂ', ['ಸ']='s', ['ಹ']='h', 
	['ಳ']='ɭ', ['ೞ']='ɻ', ['ಱ']='r', 
}

local vowel_diacritics = {
	['ಾ']= 'aː', ['ಿ']='i', ['ೀ']='iː', ['ು']='u', ['ೂ']='uː', ['ೃ'] = 'ɾi', ['ೄ'] = 'ɾiː', ['ೢ'] = 'li', ['ೣ'] = 'liː',
	['ೆ']='e', ['ೇ']='eː', ['ೈ']='ɐi̯', ['ೊ']='o', ['ೋ']='oː', ['ೌ']='ɐu̯',
	['್']='',	-- Virama - suppresses the inherent vowel "a"
	[''] = 'ɐ'	-- No diacritic; inherent vowel	
}

local other = {
	-- independent vowels
	['ಅ']='ɐ', ['ಆ']='aː', ['ಇ']='i', ['ಈ']='iː', ['ಉ']='u', ['ಊ']='uː', ['ಋ'] = 'ɾi', ['ೠ'] = 'ɾiː', ['ಌ'] = 'li', ['ೡ'] = 'liː',
	['ಎ']='e', ['ಏ']='eː', ['ಐ']='ɐi̯', ['ಒ']='o', ['ಓ']='oː', ['ಔ']='ɐu̯',
	-- Other symbols
	['ಂ']='m̃', ['ಃ']='h',
	['ೝ']='n',
	['ಽ']='',
}

local adjust1 = {
	-- Assimilate the anusvara
	['m̃([kɡŋ])']='ŋ%1',
	['m̃([td]͡[ʃʒ])']='ɲ%1', ['m̃(ɲ)']='ɲ%1',
	['m̃([ʈɖɳ])']='ɳ%1',
	['m̃([td]̪)']='n̪%1', ['m̃([td]͡[sz])']='n̪%1', ['m̃(n̪)']='n̪%1',
	['m̃([pbmjɾlʋʃʂshɭ])']='m%1',
	['m̃([%s%p])']='m%1', ['m̃$']='m',
	['e2']='ɛ',
}

local adjust2 = {
	-- Account for differences in phonemes vs. phones

}

local function shift_to_codas(syllables)
	-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method 
	local to_move = 0
	for i, syll in ipairs(syllables) do
		if i == 1 then
			-- no need to shift to coda if in the first syllable
		elseif #syll < 3 then
			-- coda movement only needed for onset clusters of 2 or more
		elseif #syll == 3 then
			-- V.CCV => VC.CV
			to_move = 1
		elseif #syll == 4 then
			if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
				-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
				-- if the first two consonants are stops, VC.CCV
				to_move = 1
			else
				-- V.CCCV => VCC.CV
				to_move = 2
			end
		else
			-- 4 consonants or more
			if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" then
				to_move = #syll - 3
			else
				-- find index of consonant of least sonority
				to_move = #syll - 1
				local min_son = consonant_sonority[syll[#syll - 1]]
				for i = (#syll - 1), 1, -1 do
					if consonant_sonority[syll[i]] < min_son then
						to_move = i
						min_son = consonant_sonority[syll[i]]
					end
				end
			end
		end
	
		while to_move > 0 do
			table.insert(syllables[i - 1], table.remove(syllables[i], 1))
			to_move = to_move - 1
		end
	end
	return syllables
end

local function syllabify(remainder, accent)
	local syllables = {}
	local syll = {}
	
	while #remainder > 0 do
		local phoneme = table.remove(remainder, 1)
		
		if vowel_list[phoneme] then
			table.insert(syll, phoneme)
			table.insert(syllables, syll)
			syll = {}
		else
			table.insert(syll, phoneme)
		end
	end
	-- store whatever consonants remain
	local final_cons = syll
	
	-- Vedic pitch accent
	if accent ~= nil and accent <= #syllables then
		syll = syllables[accent]
		syllables[accent][#syll] = accent_vowel[syll[#syll]]
	end
	
	syllables = shift_to_codas(syllables)
	
	local short_vowel_patt = "^[ɐiurl]" .. SYLLABIC .. "?" .. ACUTE .. "?$"

	-- Classic stress accent
	local num_sylls = #syllables
	if num_sylls == 2 then
		table.insert(syllables[1], 1, 'ˈ')
	elseif num_sylls == 3 then
		-- if the final segment of the second syllable is not a short vowel, stress the second syllable
		if mw.ustring.match(syllables[2][#syllables[2]], short_vowel_patt) == nil then
			table.insert(syllables[2], 1, 'ˈ')
		-- else stress the third
		else
			table.insert(syllables[1], 1, 'ˈ')
		end
	elseif num_sylls >= 4 then
		if mw.ustring.match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 1], 1, 'ˈ')
		elseif mw.ustring.match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 2], 1, 'ˈ')
		else
			table.insert(syllables[num_sylls - 3], 1, 'ˈ')
		end
	end

	-- If there are phonemes left, then the word ends in a consonant
	-- Add them to the last syllable
	for _, phoneme in ipairs(final_cons) do
		table.insert(syllables[#syllables], phoneme)
	end
	
	for i, _ in ipairs(syllables) do
		syllables[i] = table.concat(syllables[i], "")
	end
	
	return table.concat(syllables, ".")
end

function export.to_IPA(text)
	local VIRAMA = '್'	-- final virama rules
	text = mw.ustring.gsub(text, VIRAMA .. "([%,%.%!%?%:%;]?)$", VIRAMA .. "ɯ%1")
	text = mw.ustring.gsub(text, VIRAMA .. "([%,%.%!%?%:%;]?) ", VIRAMA .. "ɯ%1 ")
	
	text = mw.ustring.gsub(
		text,
		'([ಕ-ಹ])(಼?)([ಾ-್]?)',
		function(c, n, d)
			return ((consonants[c..n] or consonants[c]) or c) .. vowel_diacritics[d]
		end)

	text = mw.ustring.gsub(text, '[ಂ-ೡ]', other)

	for k, v in pairs(adjust1) do
		text = mw.ustring.gsub(text, k, v)
	end

	-- If an independent vowel is after another vowel, assume diphthong
	text = mw.ustring.gsub(text, "([ɐaeiou]ː?)•", "%1")

	-- Phonetic transcription
	text2 = text
	for k, v in pairs(adjust2) do
		text2 = mw.ustring.gsub(text2, k, v)
	end

	return (text == text2 and { text } or { text, text2 })

end

function export.show(frame)
	local args = frame:getParent().args
	local page_title = mw.title.getCurrentTitle().text
	local text = args[1] or page_title
	local qualifier = args["q"] or nil

	local transcriptions = export.to_IPA(text)
	local IPA_text
	if not transcriptions[2] then
		IPA_text = require("Module:IPA").format_IPA_full {
			lang = require("Module:languages").getByCode("tcy"),
			items = {{ pron = "/" .. transcriptions[1] .. "/" }},
		}
	else
		IPA_text = require("Module:IPA").format_IPA_full {
			lang = require("Module:languages").getByCode("tcy"),
			items = {{ pron = "/" .. transcriptions[1] .. "/" }, { pron = "[" .. transcriptions[2] .. "]" }},
		}
	end

	return "* " .. (qualifier and require("Module:qualifier").format_qualifier{qualifier} .. " " or "")
		.. IPA_text
end

return export