Module:sa-pronunc/sandbox2

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local u = mw.ustring.char
local gsub = mw.ustring.gsub

local HIGH      = u(0x0301)
local RISING    = u(0x030C)
local LOW       = u(0x0300)
local PEAKING   = u(0x1DC8)
local FALLING   = u(0x0302)
local COARTIC   = u(0x0361)
local DENTAL    = u(0x032A)
local FLAP      = u(0x0306)
local NORELEASE = u(0x031A)
local SYLLABIC  = u(0x0329)
local NASAL     = u(0x0303)

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("sa")
local m_a = require("Module:accent qualifier")

local consonants = {
	["क"] = "k", ["ग"] = "ɡ", ["ख"] = "kʰ", ["घ"] = "ɡʱ", ["ङ"] = "ŋ",
	["च"] = "c", ["ज"] = "ɟ", ["छ"] = "cʰ", ["झ"] = "ɟʱ", ["ञ"] = "ɲ",
	["त"] = "t̪", ["द"] = "d̪", ["थ"] = "t̪ʰ", ["ध"] = "d̪ʱ", ["न"] = "n̪",
	["ट"] = "ʈ", ["ड"] = "ɖ", ["ठ"] = "ʈʰ", ["ढ"] = "ɖʱ", ["ण"] = "ɳ",
	["प"] = "p", ["ब"] = "b", ["फ"] = "pʰ", ["भ"] = "bʱ", ["म"] = "m",
	["य"] = "j", ["र"] = "ɽ", ["ल"] = "l̪", ["व"] = "w", ["ळ"] = "ɭ̆", ["ळ्ह"] = "ɭ̆ʱ",
	["श"] = "ɕ", ["ष"] = "ʂ", ["स"] = "s̪", ["ह"] = "ɦ",
}

local diacritics = {
	["ा"] = "ɑː", ["ि"] = "i", ["ी"] = "iː", ["ु"] = "u", ["ू"] = "uː", ["ृ"] = "r̩", ["ॄ"] = "r̩ː",
	["ॢ"] = "l̩", ["ॣ"] = "l̩ː", ["े"] = "ɐɪ", ["ै"] = "ɑːɪ", ["ो"] = "ɐʊ", ["ौ"] = "ɑːʊ", ["्"] = "",
}

local vowel_list = {
	["ɐ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, ["uː"] = true, ["r̩"] = true, ["r̩ː"] = true,
	["l̩"] = true, ["l̩ː"] = true, ["ɐɪ"] = true, ["ɑːɪ"] = true, ["ɐʊ"] = true, ["ɑːʊ"] = true, 
}

local stop_list = {
	["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true,
	["c"] = true, ["ɟ"] = true, ["cʰ"] = true, ["ɟʱ"] = true,
	["t̪"] = true, ["d̪"] = true, ["t̪ʰ"] = true, ["d̪ʱ"] = true,
	["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true, 
	["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true,
}

local consonant_sonority = {
	-- voiceless stops and affricates
	["k"] = 1, ["kʰ"] = 1,
	["c"] = 1, ["cʰ"] = 1,
	["t̪"] = 1, ["t̪ʰ"] = 1,
	["ʈ"] = 1, ["ʈʰ"] = 1,
	["p"] = 1, ["pʰ"] = 1,
	-- voiceless fricatives
	["ɕ"] = 2, ["ʂ"] = 2, ["s̪"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2,
	-- voiced stops and affricates
	["ɡ"] = 3, ["ɡʱ"] = 3,
	["ɟ"] = 3, ["ɟʱ"] = 3,
	["d̪"] = 3, ["d̪ʱ"] = 3,
	["ɖ"] = 3, ["ɖʱ"] = 3,
	["b"] = 3, ["bʱ"] = 3,
	-- voiced fricatives
	["ɦ"] = 4,
	-- nasals
	["ŋ"] = 5, ["ɲ"] = 5, ["n̪"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5,
	-- flaps
	["ɽ"] = 6,
	-- laterals
	["l̪"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7,
	-- glides
	["j"] = 8, ["w"] = 8,
}

local tt = {
	-- vowels
	["अ"] = "ɐ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", ["ऋ"] = "r̩", ["ॠ"] = "r̩ː",
	["ऌ"] = "l̩", ["ॡ"] = "l̩ː", ["ए"] = "ɐɪ", ["ऐ"] = "ɑːɪ", ["ओ"] = "ɐʊ", ["औ"] = "ɑːʊ", 
	-- visarga    
	["ः"] = "h",
	-- chandrabindu
	["ँ"] = "m̐",
	-- anusvara
	["ं"] = "ṃ",
	-- avagraha
	['ऽ'] = "",
    --Vedic extensions
    ['ᳵ'] = "x", ['ᳶ'] = "ɸ",
}

local rising_vowel = {
	["ɐ"] = "ɐ" .. RISING, ["ɑː"] = "ɑ" .. RISING .. "ː",
	["i"] = "i" .. RISING, ["iː"] = "i" .. RISING .. "ː",
	["u"] = "u" .. RISING, ["uː"] = "u" .. RISING .. "ː",
	["r̩"] = "r" .. RISING .. "̩", ["r̩ː"] = "r" .. RISING .. "̩ː",
	["l̩"] = "l" .. RISING .. "̩", ["l̩ː"] = "l" .. RISING .. "̩ː",
	["ɐɪ"] = "ɐ" .. RISING .. "ɪ", ["ɑːɪ"] = "ɑ" .. RISING .. "ːɪ",
	["ɐʊ"] = "ɐ" .. RISING .. "ʊ", ["ɑːʊ"] = "ɑ" .. RISING .. "ːʊ", 
}

local low_vowel = {
	["ɐ"] = "ɐ" .. LOW, ["ɑː"] = "ɑ" .. LOW .. "ː",
	["i"] = "i" .. LOW, ["iː"] = "i" .. LOW .. "ː",
	["u"] = "u" .. LOW, ["uː"] = "u" .. LOW .. "ː",
	["r̩"] = "r" .. LOW .. "̩", ["r̩ː"] = "r" .. LOW .. "̩ː",
	["l̩"] = "l" .. LOW .. "̩", ["l̩ː"] = "l" .. LOW .. "̩ː",
	["ɐɪ"] = "ɐ" .. LOW .. "ɪ", ["ɑːɪ"] = "ɑ" .. LOW .. "ːɪ",
	["ɐʊ"] = "ɐ" .. LOW .. "ʊ", ["ɑːʊ"] = "ɑ" .. LOW .. "ːʊ", 
}

local peaking_vowel = {
	["ɐ"] = "ɐ" .. PEAKING, ["ɑː"] = "ɑ" .. PEAKING .. "ː",
	["i"] = "i" .. PEAKING, ["iː"] = "i" .. PEAKING .. "ː",
	["u"] = "u" .. PEAKING, ["uː"] = "u" .. PEAKING .. "ː",
	["r̩"] = "r" .. PEAKING .. "̩", ["r̩ː"] = "r" .. PEAKING .. "̩ː",
	["l̩"] = "l" .. PEAKING .. "̩", ["l̩ː"] = "l" .. PEAKING .. "̩ː",
	["ɐɪ"] = "ɐ" .. PEAKING .. "ɪ", ["ɑːɪ"] = "ɑ" .. PEAKING .. "ːɪ",
	["ɐʊ"] = "ɐ" .. PEAKING .. "ʊ", ["ɑːʊ"] = "ɑ" .. PEAKING .. "ːʊ", 
}

local function shift_to_codas(syllables)
	-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method 
	local to_move = 0
	for i, syll in ipairs(syllables) do
		if i == 1 then
			-- no need to shift to coda if in the first syllable
		elseif #syll < 3 then
			-- coda movement only needed for onset clusters of 2 or more
		elseif #syll == 3 then
			-- V.CCV => VC.CV
			to_move = 1
		elseif #syll == 4 then
			if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
				-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
				-- if the first two consonants are stops, VC.CCV
				to_move = 1
			else
				-- V.CCCV => VCC.CV
				to_move = 2
			end
		else
			-- 4 consonants or more
			if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" then
				to_move = #syll - 3
			else
				-- find index of consonant of least sonority
				to_move = #syll - 1
				local min_son = consonant_sonority[syll[#syll - 1]]
				for i = (#syll - 1), 1, -1 do
					if consonant_sonority[syll[i]] < min_son then
						to_move = i
						min_son = consonant_sonority[syll[i]]
					end
				end
			end
		end
	
		while to_move > 0 do
			table.insert(syllables[i - 1], table.remove(syllables[i], 1))
			to_move = to_move - 1
		end
	end
	return syllables
end

local function syllabify(remainder, accent)
	local syllables = {}
	local syll = {}
	
	while #remainder > 0 do
		local phoneme = table.remove(remainder, 1)
		
		if vowel_list[phoneme] then
			table.insert(syll, phoneme)
			table.insert(syllables, syll)
			syll = {}
		else
			table.insert(syll, phoneme)
		end
	end
	-- store whatever consonants remain
	local final_cons = syll
	
	-- Vedic pitch accent
	if accent ~= nil and accent <= #syllables then
		syll = syllables[accent]
		syllables[accent][#syll] = rising_vowel[syll[#syll]]
		if accent - 1 > 0 then -- sannatara takes precendence 
			syll = syllables[accent - 1]
			syllables[accent - 1][#syll] = low_vowel[syll[#syll]]
		end
		if accent + 1 <= #syllables then -- then svarita
			syll = syllables[accent + 1]
			syllables[accent + 1][#syll] = peaking_vowel[syll[#syll]]
		end
	end
	
	syllables = shift_to_codas(syllables)
	
	local short_vowel_patt = "^[ɐiurl]" .. SYLLABIC .. "?[" .. RISING .. LOW .. PEAKING .. "]?$"

	-- Classic stress accent
	local num_sylls = #syllables
	if num_sylls == 2 then
		table.insert(syllables[1], 1, 'ˈ')
	elseif num_sylls == 3 then
		-- if the final segment of the second syllable is not a short vowel, stress the second syllable
		if mw.ustring.match(syllables[2][#syllables[2]], short_vowel_patt) == nil then
			table.insert(syllables[2], 1, 'ˈ')
		-- else stress the third
		else
			table.insert(syllables[1], 1, 'ˈ')
		end
	elseif num_sylls >= 4 then
		if mw.ustring.match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 1], 1, 'ˈ')
		elseif mw.ustring.match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 2], 1, 'ˈ')
		else
			table.insert(syllables[num_sylls - 3], 1, 'ˈ')
		end
	end

	-- If there are phonemes left, then the word ends in a consonant
	-- Add them to the last syllable
	for _, phoneme in ipairs(final_cons) do
		table.insert(syllables[#syllables], phoneme)
	end
	
	for i, _ in ipairs(syllables) do
		syllables[i] = table.concat(syllables[i], "")
	end
	
	return table.concat(syllables, ".")
end

local anu_to_nasals = {
	--earlier
	["s̪"] = "ŋ̊",
	["ɕ"] = "ŋ̊",
	["ʂ"] = "ŋ̊",
	["h"] = "ŋ̊",
	["ɦ"] = "ŋ",
	["ɽ"] = "ŋ",
	--later
	["k"] = "ŋ", ["ɡ"] = "ŋ",
	["c"] = "ɲ", ["ɟ"] = "ɲ",
	["t̪"] = "n̪", ["d̪"] = "n̪",
	["ʈ"] = "ɳ", ["ɖ"] = "ɳ",
	["p"] = "m", ["b"] = "m",
}

local function anusvara(text)
	text = gsub(text, "ṃ$", "m")
	text = gsub(
		text,
		"ṃ([ %.ˈ]?)([kɡtdʈɖcɟpbsɕʂhɦɽ])(" .. DENTAL .. "?)",
		function(div, cons, mark)
			return anu_to_nasals[cons .. mark] .. div .. cons .. mark
		end
	)
	text = gsub(
		text,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)ṃ",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	return text
end

local function convert_word(word, accent)
	local chars = {}
	local t = {}
	
	gsub(word, ".", function(c) table.insert(chars, c) end)
	
	for i, c in ipairs(chars) do
		if consonants[c] then
			table.insert(t, consonants[c])
			if not diacritics[chars[i + 1]] then
				table.insert(t, "ɐ")
			end
		elseif c == "्" then
			-- do nothing
		elseif diacritics[c] then
			table.insert(t, diacritics[c])
		elseif tt[c] then
			table.insert(t, tt[c])
		end
	end
	
	word = syllabify(t, accent)
	
	word = gsub(word, "%.ˈ", "ˈ")
	
	-- chandrabindu
	word = gsub(
		word,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)m̐",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	return word
end

local function convert_words(words, accents)
	local result = {}
	
	local word_num = 1
	for word in mw.text.gsplit(words, " ") do
		table.insert(result, convert_word(word, accents[word_num]))
		word_num = word_num + 1
	end
	
	text = table.concat(result, " ")
	
	return text
end

local function phon_procs(text)
	-- Anusvāra
	text = anusvara(text)
	
	return text
end

local function abhinidhana(text)
	text = gsub(
		text,
		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖcɟpb])",
		"%1%2" .. NORELEASE .. "%3%4"
	)
	return text
end

local superscript = {
	["ɐ"] = "ᵄ",
	["ɑ"] = "ᵅ",
	["e"] = "ᵉ",
	["o"] = "ᵒ",
	["i"] = "ⁱ",
	["u"] = "ᵘ",
}

local function make_dialects(text)
	local dialects = {}

	-- Rigvedic Sanskrit
	local rig_phnm = text
	rig_phnm = gsub(rig_phnm, "^ˈ", "")
	rig_phnm = gsub(rig_phnm, "ˈ", ".")
	rig_phnm = gsub(rig_phnm, " %.", " ")
	
	local rig_phnt = abhinidhana(rig_phnm)
	-- visarga alternation
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([p])", "ɸ%1%2")
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([k])", "x%1%2")
	-- nasalized semivowels
	rig_phnt = gsub(
		rig_phnt,
		"([ŋɲnɳm])(" .. DENTAL .. "?)([ %.ˈ]?)([lɭɪʊ])([" .. DENTAL .. FLAP .. "]?)(ʱ?)",
		"%4%5" .. NASAL .. "%3%4%5%6"
	)
	-- nasalized yama
	rig_phnt = gsub(
		rig_phnt,
		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",
		"%1%2%3ⁿ%4%5"
	)
	rig_phnt = gsub(rig_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3")
	
	-- remove sannatara and svarita from phonemic
	rig_phnm = gsub(rig_phnm, "[" .. LOW .. PEAKING .. "]", "")
	rig_phnm = gsub(rig_phnm, RISING, HIGH)
	
	dialects['rig'] = {
		label = "Vedic",
		phonemic = rig_phnm,
		phonetic = rig_phnt,
	}
	
	-- Classical Sanskrit
	local cla_phnm = text
	cla_phnm = gsub(cla_phnm, "([ɐɑeoiurl])[" .. RISING .. LOW .. PEAKING .. "]", "%1")
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ɪ", "e%1ː")
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ʊ", "o%1ː")
	cla_phnm = gsub(cla_phnm, "ɑ(" .. NASAL .. "?)ː([ɪʊ])", "ɑ%1%2")
	cla_phnm = gsub(cla_phnm, "w", "ʋ")
	
	local cla_phnt = abhinidhana(cla_phnm)
	-- cla_pron = gsub(cla_pron, "r̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "ɽi%1%2%3")
	-- cla_pron = gsub(cla_pron, "l̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "l̪i%1%2%3")
	--nasalized yama
	cla_phnt = gsub(
		cla_phnt,
		"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",
		"%1%2%3ⁿ%4%5"
	)
	cla_phnt = gsub(cla_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3")
	
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h$",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow]
		end
	)
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h ",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "
		end
	)
	
	dialects['cla'] = {
		label = "Classical Sanskrit",
		phonemic = cla_phnm,
		phonetic = cla_phnt,
	}
	
	return dialects
end

local function make_table(dialects, novedic)
	local dial_types = {'rig', 'cla'}
	
	if novedic then
		table.remove(dial_types, 1)
	end
	
	if #dial_types == 1 then
		local dial = dial_types[1]
		local IPA_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
		if dialects[dial].phonemic ~= dialects[dial].phonetic then
			table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'})
		end
		return table.concat{
			'\n* ',
			m_a.format_qualifiers(lang, {dialects[dial].label}),
			' ',
			m_IPA.format_IPA_full { lang = lang, items = IPA_args },
		}
	else
		local inline_args = {{pron = '/' .. dialects.cla.phonemic .. '/'}}
		if dialects.cla.phonemic ~= dialects.cla.phonetic then
			table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'})
		end
		local inline = table.concat{
			'\n* ',
			m_IPA.format_IPA_full { lang = lang, items = inline_args },
		}
		
		local full = {}
		table.insert(full, '\n<div class="mw-collapsible-content">\n----\n')
		for _, dial in ipairs(dial_types) do
			local full_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
			if dialects[dial].phonemic ~= dialects[dial].phonetic then
				table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'})
			end
			table.insert(full, table.concat{
				'\n* ',
				m_a.format_qualifiers(lang, {dialects[dial].label}),
				' ',
				m_IPA.format_IPA_full { lang = lang, items = full_args },
			})
		end
		table.insert(full, '</div>')
		
		return table.concat{
			'<div class="toccolours mw-collapsible mw-collapsed" style="width:600px; font-size:100%">',
			inline,
			table.concat(full, ""),
			'</div>',
		}
	end
end

function export.show(frame)
	local params = {
		[1] = {alias_of = 'w'},
		w = {default = mw.title.getCurrentTitle().text},
		a = {list = true, allow_holes = true, type = 'number'},
		novedic = {type = 'boolean'}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local text = convert_words(args.w, args.a)
	
	text = phon_procs(text)
	
	local dialects = make_dialects(text)
	
	return make_table(dialects, args.novedic)
end

return export