Jump to content

Module:sa-pronunc

From Wiktionary, the free dictionary

Powers {{sa-IPA}}.


local export = {}

local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local ACUTE     = u(0x0301)
local COARTIC   = u(0x0361)
local DENTAL    = u(0x032A)
local FLAP      = u(0x0306)
local NORELEASE = u(0x031A)
local SYLLABIC  = u(0x0329)
local NASAL     = u(0x0303)

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("sa")
local m_a = require("Module:accent qualifier")

local consonants = {
	["क"] = "k", ["ग"] = "ɡ", ["ख"] = "kʰ", ["घ"] = "ɡʱ", ["ङ"] = "ŋ",
	["च"] = "t͡ɕ", ["ज"] = "d͡ʑ", ["छ"] = "t͡ɕʰ", ["झ"] = "d͡ʑʱ", ["ञ"] = "ɲ",
	["त"] = "t", ["द"] = "d", ["थ"] = "tʰ", ["ध"] = "dʱ", ["न"] = "n",
	["ट"] = "ʈ", ["ड"] = "ɖ", ["ठ"] = "ʈʰ", ["ढ"] = "ɖʱ", ["ण"] = "ɳ",
	["प"] = "p", ["ब"] = "b", ["फ"] = "pʰ", ["भ"] = "bʱ", ["म"] = "m",
	["य"] = "j", ["र"] = "ɾ", ["ल"] = "l", ["व"] = "ʋ", ["ळ"] = "ɭ̆",  -- ["ळ्ह"] = "ɭ̆ʱ",
	["श"] = "ɕ", ["ष"] = "ʂ", ["स"] = "s", ["ह"] = "ɦ",
}

local diacritics = {
	["ा"] = "ɑː", ["ि"] = "i", ["ी"] = "iː", ["ु"] = "u", ["ू"] = "uː", ["ृ"] = "r̩", ["ॄ"] = "r̩ː",
	["ॢ"] = "l̩", ["ॣ"] = "l̩ː", ["े"] = "ɐj", ["ै"] = "ɑːj", ["ो"] = "ɐw", ["ौ"] = "ɑːw", ["्"] = "",
}

local vowel_list = {
	["ɐ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, ["uː"] = true, ["r̩"] = true, ["r̩ː"] = true,
	["l̩"] = true, ["l̩ː"] = true, ["ɐj"] = true, ["ɑːj"] = true, ["ɐw"] = true, ["ɑːw"] = true, 
}

local stop_list = {
	["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true,
	["t͡ɕ"] = true, ["d͡ʑ"] = true, ["t͡ɕʰ"] = true, ["d͡ʑʱ"] = true,
	["t"] = true, ["d"] = true, ["tʰ"] = true, ["dʱ"] = true,
	["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true, 
	["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true,
}

local consonant_sonority = {
	-- voiceless stops and affricates
	["k"] = 1, ["kʰ"] = 1,
	["t͡ɕ"] = 1, ["t͡ɕʰ"] = 1,
	["t"] = 1, ["tʰ"] = 1,
	["ʈ"] = 1, ["ʈʰ"] = 1,
	["p"] = 1, ["pʰ"] = 1,
	-- voiceless fricatives
	["ɕ"] = 2, ["ʂ"] = 2, ["s"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2,
	-- voiced stops and affricates
	["ɡ"] = 3, ["ɡʱ"] = 3,
	["d͡ʑ"] = 3, ["d͡ʑʱ"] = 3,
	["d"] = 3, ["dʱ"] = 3,
	["ɖ"] = 3, ["ɖʱ"] = 3,
	["b"] = 3, ["bʱ"] = 3,
	-- voiced fricatives
	["ɦ"] = 4,
	-- nasals
	["ŋ"] = 5, ["ɲ"] = 5, ["n"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5,
	-- flaps
	["ɾ"] = 6,
	-- laterals
	["l"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7,
	-- glides
	["j"] = 8, ["ʋ"] = 8,
}

local tt = {
	-- vowels
	["अ"] = "ɐ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", ["ऋ"] = "r̩", ["ॠ"] = "r̩ː",
	["ऌ"] = "l̩", ["ॡ"] = "l̩ː", ["ए"] = "ɐj", ["ऐ"] = "ɑːj", ["ओ"] = "ɐw", ["औ"] = "ɑːw", 
	-- visarga    
	["ः"] = "h",
	-- chandrabindu
	["ँ"] = "m̐",
	-- anusvara
	["ं"] = "ṃ",
	-- avagraha
	['ऽ'] = "",
    --Vedic extensions
    ['ᳵ'] = "x", ['ᳶ'] = "ɸ",
}

local accent_vowel = {
	["ɐ"] = "ɐ́", ["ɑː"] = "ɑ́ː", ["i"] = "í", ["iː"] = "íː", ["u"] = "ú", ["uː"] = "úː", ["r̩"] = "ŕ̩", ["r̩ː"] = "ŕ̩ː",
	["l̩"] = "ĺ̩", ["l̩ː"] = "ĺ̩ː", ["ɐj"] = "ɐ́j", ["ɑːj"] = "ɑ́ːj", ["ɐw"] = "ɐ́w", ["ɑːw"] = "ɑ́ːw", 
}

local function shift_to_codas(syllables)
	-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method 
	local to_move = 0
	for i, syll in ipairs(syllables) do
		if i == 1 then
			-- no need to shift to coda if in the first syllable
		elseif #syll < 3 then
			-- coda movement only needed for onset clusters of 2 or more
		elseif #syll == 3 then
			-- V.CCV => VC.CV
			to_move = 1
		elseif #syll == 4 then
			if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
				-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
				-- if the first two consonants are stops, VC.CCV
				to_move = 1
			else
				-- V.CCCV => VCC.CV
				to_move = 2
			end
		else
			-- 4 consonants or more
			if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" then
				to_move = #syll - 3
			else
				-- find index of consonant of least sonority
				to_move = #syll - 1
				local min_son = consonant_sonority[syll[#syll - 1]]
				for i = (#syll - 1), 1, -1 do
					if consonant_sonority[syll[i]] < min_son then
						to_move = i
						min_son = consonant_sonority[syll[i]]
					end
				end
			end
		end
	
		while to_move > 0 do
			table.insert(syllables[i - 1], table.remove(syllables[i], 1))
			to_move = to_move - 1
		end
	end
	return syllables
end

local function syllabify(remainder, accent)
	local syllables = {}
	local syll = {}
	
	while #remainder > 0 do
		local phoneme = table.remove(remainder, 1)
		
		if vowel_list[phoneme] then
			table.insert(syll, phoneme)
			table.insert(syllables, syll)
			syll = {}
		else
			table.insert(syll, phoneme)
		end
	end
	-- store whatever consonants remain
	local final_cons = syll
	
	-- Vedic pitch accent
	if accent ~= nil and accent <= #syllables then
		syll = syllables[accent]
		syllables[accent][#syll] = accent_vowel[syll[#syll]]
	end
	
	syllables = shift_to_codas(syllables)
	
	-- it is not necessary to include 'l' in the pattern for short vowels as it doesn't occur as a vowel in syllable coda and as consonantal 'l' would then be erroneously included
	local short_vowel_patt = "^[ɐiur]" .. SYLLABIC .. "?" .. ACUTE .. "?$"

	-- If there are phonemes left, then the word ends in a consonant
	-- Add them to the last syllable
	for _, phoneme in ipairs(final_cons) do
		table.insert(syllables[#syllables], phoneme)
	end
	
	for i, _ in ipairs(syllables) do
		syllables[i] = table.concat(syllables[i], "")
	end
	
	return table.concat(syllables, ".")
end

local anu_to_nasals = {
	["k"] = "ŋ", ["ɡ"] = "ŋ",
	["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ",
	["t"] = "n", ["d"] = "n",
	["ʈ"] = "ɳ", ["ɖ"] = "ɳ",
	["p"] = "m", ["b"] = "m",
}

local function anusvara(text)
	text = gsub(text, "ṃ$", "m")
	text = gsub(
		text,
		"ṃ([ %.ˈ]?)([kɡtdʈɖpb])([" .. DENTAL .. COARTIC .. "]?)([ɕʑ]?)",
		function(div, cons, mark, fric)
			return anu_to_nasals[cons .. mark .. fric] .. div .. cons .. mark .. fric
		end
	)
	text = gsub(
		text,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)ṃ",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	text = gsub(text, "ṃ", "ɴ")
	return text
end

local function convert_word(word, accent)
	local chars = {}
	local t = {}
	
	gsub(word, ".", function(c) table.insert(chars, c) end)
	
	for i, c in ipairs(chars) do
		if consonants[c] then
			table.insert(t, consonants[c])
			if not diacritics[chars[i + 1]] then
				table.insert(t, "ɐ")
			end
		elseif c == "्" then
			-- do nothing
		elseif diacritics[c] then
			table.insert(t, diacritics[c])
		elseif tt[c] then
			table.insert(t, tt[c])
		end
	end
	
	word = syllabify(t, accent)
	
	word = gsub(word, "%.ˈ", "ˈ")
	
	-- correction for ळ्ह = ɭ̆ʱ
	word = gsub(word, "ɭ̆([.']?)ɦ", "%1ɭ̆ʱ")
	
	-- chandrabindu
	word = gsub(
		word,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)m̐",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	return word
end

local function convert_words(words, accents)
	local result = {}
	
	local word_num = 1
	for word in mw.text.gsplit(words, " ") do
		table.insert(result, convert_word(word, accents[word_num]))
		word_num = word_num + 1
	end
	
	text = table.concat(result, " ")
	
	return text
end

local function phon_procs(text)
	-- Anusvāra
	text = anusvara(text)
	
	return text
end

local function abhinidhana_phonemic(text)
	--de-aspirate and de-affricate before stops
	text = gsub(
		text,
		"([kɡtdʈɖpb])(" .. DENTAL .. "?)[ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2%3%4"
	)
	text = gsub(
		text,
		"([td])" .. COARTIC .. "[ɕʑ][ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2%3"
	)
	return text
end

local function abhinidhana_phonetic(text)
	text = gsub(
		text,
		"([kɡtdʈɖpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2" .. NORELEASE .. "%3%4"
	)
	return text
end

local superscript = {
	["ɐ"] = "ɐ̆",
	["ɑ"] = "ɑ̆",
	["e"] = "ĕ",
	["o"] = "ŏ",
	["i"] = "ĭ",
	["u"] = "ŭ",
}

local function make_dialects(text)
	local dialects = {}
	
	text = abhinidhana_phonemic(text)

	-- Rigvedic Sanskrit
	local rig_phnm = text
	rig_phnm = gsub(rig_phnm, "^ˈ", "")
	rig_phnm = gsub(rig_phnm, "ˈ", ".")
	rig_phnm = gsub(rig_phnm, " %.", " ")
	
	local rig_phnt = abhinidhana_phonetic(rig_phnm)
	-- visarga alternation
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([p])", "ɸ%1%2")
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([k])", "x%1%2")
	-- nasalized semivowels
	rig_phnt = gsub(
		rig_phnt,
		"([ŋɲnɳm])(" .. DENTAL .. "?)([ %.ˈ]?)([lɭjʋ])([" .. DENTAL .. FLAP .. "]?)(ʱ?)",
		"%4%5" .. NASAL .. "%3%4%5%6"
	)
	
	dialects['rig'] = {
		label = "Vedic",
		phonemic = rig_phnm,
		phonetic = rig_phnt,
	}
	
	-- Classical Sanskrit
	local cla_phnm = text
	cla_phnm = gsub(cla_phnm, "[éóíúŕ" .. ACUTE .. "]", {["é"] = "e", ["ó"] = "o", ["í"] = "i", ["ú"] = "u", [ACUTE] = "", ["ŕ"] = "r"})
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)j", "e%1ː")
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)w", "o%1ː")
	cla_phnm = gsub(cla_phnm, "ɑ(" .. NASAL .. "?)ː([jw])", "ɑ%1%2")
	
	-- Add dental diacritic to t, d, tʰ, dʱ, n, l, s.
	cla_phnm = gsub(
	    cla_phnm,
	    "([td]" .. COARTIC .. "?[ɕʑ]?)([ʰʱ]?)",
	    function(base_consonant, aspiration)
	        if base_consonant == "t" or base_consonant == "d" then
	            return base_consonant .. DENTAL .. aspiration
	        end
	    end
    )
	cla_phnm = gsub(cla_phnm, NASAL .. "?([nls])", "%1" .. DENTAL)
	
	local cla_phnt = abhinidhana_phonetic(cla_phnm)
	-- cla_pron = gsub(cla_pron, "r̩(" .. NASAL .. "?)(" .. ACUTE .. "?)(ː?)", "ɾi%1%2%3")
	-- cla_pron = gsub(cla_pron, "l̩(" .. NASAL .. "?)(" .. ACUTE .. "?)(ː?)", "l̪i%1%2%3")
	
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h$",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow]
		end
	)
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h ",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "
		end
	)
	
	dialects['cla'] = {
		label = "Classical Sanskrit",
		phonemic = cla_phnm,
		phonetic = cla_phnt,
	}
	
	return dialects
end

local function make_table(dialects, novedic, noclassical)
	local dial_types = {'rig', 'cla'}
	
	if novedic then
		table.remove(dial_types, 1)
	end
	
	if noclassical then
		table.remove(dial_types, 2)
	end
	
	if #dial_types == 1 then
		local dial = dial_types[1]
		local IPA_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
		if dialects[dial].phonemic ~= dialects[dial].phonetic then
			table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'})
		end
		return table.concat{
			'\n* ',
			m_a.format_qualifiers(lang, {dialects[dial].label}),
			' ',
			m_IPA.format_IPA_full { lang = lang, items = IPA_args },
		}
	else
		
		local inline_args = {{pron = '/' .. dialects.cla.phonemic .. '/'}}
		
		if dialects.cla.phonemic ~= dialects.cla.phonetic then
			table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'})
		end
		
		local full = {}
		for _, dial in ipairs(dial_types) do
			local full_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
			if dialects[dial].phonemic ~= dialects[dial].phonetic then
				table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'})
			end
			table.insert(full, table.concat{
				'\n* ',
				m_a.format_qualifiers(lang, {dialects[dial].label}),
				' ',
				m_IPA.format_IPA_full { lang = lang, items = full_args },
			})
		end
		
		return table.concat(full, "")
	end
end

function export.show(frame)
	local params = {
		[1] = {alias_of = 'w'},
		w = {default = mw.title.getCurrentTitle().text},
		a = {list = true, allow_holes = true, type = 'number'},
		novedic = {type = 'boolean'},
		noclassical = {type = 'boolean'}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local text = convert_words(args.w, args.a)
	
	text = phon_procs(text)
	
	local dialects = make_dialects(text)
	
	return make_table(dialects, args.novedic, args.noclassical)
end

return export