Jump to content

Module:es-pronunc/sandbox

From Wiktionary, the free dictionary


local export = {}

-- ɟ is used internally to represent [ʝ⁓ɟ͡ʝ]
function export.show(word, LatinAmerica, phonetic, do_debug)
	local debug = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word, "[^abcdefghijklmnopqrstuvwxyzáéíóúüñ.]", "")
	
	table.insert(debug, word)
	
	local V = "[aeiouáéíóú]" -- vowel
	local W = "[jw]"
	local C = "[^aeiouáéíóú.]" -- consonant
	--determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix
	word = mw.ustring.gsub(word, "y(" .. C .. ")", "i%1")
	word = mw.ustring.gsub(word, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	word = mw.ustring.gsub(word, "hi(" .. V .. ")", "ɟ%1")
	word = mw.ustring.gsub(word, "y$", "ï")
    word = mw.ustring.gsub(word, "mente$", "ménte")
	
	--x
	word = mw.ustring.gsub(word, "x", "ks")
	
	--"c" & "g" before "i" and "e" and all that stuff
	word = mw.ustring.gsub(word, "c([ieíé])", (LatinAmerica and 's' or 'θ') .. "%1")
	word = mw.ustring.gsub(word, "gü([ieíé])", "ɡw%1")
	word = mw.ustring.gsub(word, "ü", "")
	word = mw.ustring.gsub(word, "gu([ieíé])", "ɡ%1")
	word = mw.ustring.gsub(word, "g([ieíé])", "x%1")

	table.insert(debug, word)
	
	--alphabet-to-phoneme
	word = mw.ustring.gsub(word, "qu", "c")
	word = mw.ustring.gsub(word, "ch", "t͡ʃ")
	word = mw.ustring.gsub(word, '[cgjñrvyʃ]',
		--['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{['c']='k', ['g']='ɡ', ['j']='x', ['ñ']='ɲ', ['r']='ɾ', ['v']='b', ['ʃ']='ʃ'})
	
	-- trill in #r, lr, nr, rr
	local match_count = 0
	word = mw.ustring.gsub(
		word,
		'(.?)ɾ(.?)',
		function (before, after)
			match_count = match_count + 1
			-- mw.log(word, before, after)
			if match_count == 1 and before == '' or before == 'l' or before == 'n'
					or after ~= '' and ('bdfɡklʎmnɲpstxzʃɟ'):match(after) then
				return before .. 'r' .. after
			elseif before == 'ɾ' then
				return 'r' .. after
			elseif after == 'ɾ' then
				return before .. 'r'
			end
		end)
	
	word = mw.ustring.gsub(word, 'n([bm])', 'm%1')
	word = mw.ustring.gsub(word, 'll', LatinAmerica and 'ɟ' or 'ʎ')
	word = mw.ustring.gsub(word, 'z', LatinAmerica and 'z' or 'θ') -- not the real LatAm sound
	
	table.insert(debug, word)
	
	--syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. ")(" .. C .. W .. "?" .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
			"%1.%2")
	end
	word = mw.ustring.gsub(word, "([pbktdɡ])%.([lɾ])", ".%1%2")
	word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	word = mw.ustring.gsub(word, "([aeoáéíóú])([aeoáéíóú])", "%1.%2")
	word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
	word = mw.ustring.gsub(word, "([uú])([uú])", "%1.%2")

	table.insert(debug, word)
	
	--diphthongs
	word = mw.ustring.gsub(word, 'ih?([aeouáéóú])', 'j%1')
	word = mw.ustring.gsub(word, 'uh?([aeioáéíó])', 'w%1')
	
	table.insert(debug, word)
	
	--accentuation
	local syllables = mw.text.split(word, "%.")
	if mw.ustring.find(word, "[áéíóú]") then
		for i = 1, #syllables do
			if mw.ustring.find(syllables[i], "[áéíóú]") then
				syllables[i] = "ˈ"..syllables[i]
			end
		end
	else
		if mw.ustring.find(word, "[^aeiouns]$") then
			syllables[#syllables] = "ˈ" .. syllables[#syllables]
		else
			if #syllables > 1 then
				syllables[#syllables-1] = "ˈ" .. syllables[#syllables-1]
			end
		end
	end

	table.insert(debug, word)
	
	--syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
	local remove_accent = { ['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u'}
	local nasalize = { ['a'] = 'ã', ['e'] = 'ẽ', ['i'] = 'ĩ', ['o'] = 'õ', ['u'] = 'ũ' }
	for i = 1, #syllables do
		syllables[i] = mw.ustring.gsub(syllables[i], '[áéíóú]', remove_accent)
		if phonetic and mw.ustring.find(syllables[i], '[mnɲ]' .. C .. '?$') then
			syllables[i] = mw.ustring.gsub(syllables[i], '[aeiou]', nasalize)
		end
		syllables[i] = mw.ustring.gsub(syllables[i], '[ptk]$', { ['p'] = 'b', ['t'] = 'd', ['k'] = 'ɡ' })
	end
	word = table.concat(syllables)
	
	--real sound of LatAm Z
	word = mw.ustring.gsub(word, 'z', 's')
	--secondary stress
	word = mw.ustring.gsub(word, 'ˈ(.+)ˈ', 'ˌ%1ˈ')
	word = mw.ustring.gsub(word, 'ˈ(.+)ˌ', 'ˌ%1ˌ')
	word = mw.ustring.gsub(word, 'ˌ(.+)ˈ(.+)ˈ', 'ˌ%1ˌ%2ˈ')

	--phonetic transcription
	if phonetic then
		--θ, s, f before voiced consonants
		local voiced = 'mnɲbdɟɡʎ'
		local r = 'ɾr'
		local tovoiced = {
			['θ'] = 'θ̬',
			['s'] = 'z',
			['f'] = 'v',
		}
		local function voice(sound, following)
			return tovoiced[sound]..following
		end
		word = mw.ustring.gsub(word, '([θs])([ˈˌ]?['..voiced..r..'])', voice)
		word = mw.ustring.gsub(word, '(f)([ˈˌ]?['..voiced..'])', voice)
		
		local stop_to_fricative = {['b']='β', ['d']='ð', ['ɟ']='ʝ', ['ɡ']='ɣ'}
		local fricative_to_stop = {['β']='b', ['ð']='d', ['ʝ']='ɟ', ['ɣ']='ɡ'}
		--lots of allophones going on
		word = mw.ustring.gsub(word, '[bdɟɡ]', stop_to_fricative)
		word = mw.ustring.gsub(
			word,
			'()([ˈˌ]?)([βðɣʝ])',
			function (pos, stress, fricative)
				-- Matching the character before the fricative in the pattern
				-- doesn't work because sometimes there are two fricatives in
				-- a row.
				local before = pos > 1 and mw.ustring.sub(word, pos - 1, pos - 1)
				-- mw.log(orig_word, before, stress, fricative)
				if not before or (fricative == 'ɣ' or fricative == 'β') and ('mnɲ'):find(before)
						or (fricative == 'ð' or fricative == 'ʝ') and ('lʎmnɲ'):find(before) then
					return stress .. fricative_to_stop[fricative]
				end -- else no change
			end)
		word = mw.ustring.gsub(word, '[td]', {['t']='t̪', ['d']='d̪'})
		--nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
			'ɱ', 'n̪', 'n̟', 'nʲ', 'ɲ', 'ŋ'
		local nasal_assimilation = {
			['f'] = labiodental,
			['t'] = dentialveolar, ['d'] = dentialveolar,
			['θ'] = dental,
			['t͡ʃ'] = alveolopalatal,
			['ɟ'] = palatal, ['ʎ'] = palatal,
			['k'] = velar, ['x'] = velar, ['ɡ'] = velar,
		}
		
		word = mw.ustring.gsub(
			word,
			'n([ˈˌ]?)(.)',
			function (stress, following)
				return (nasal_assimilation[following] or 'n') .. stress .. following
			end)
		--lateral assimilation before consonants
		word = mw.ustring.gsub(
			word,
			'l([ˈˌ]?)(.)',
			function (stress, following)
				local l = 'l'
				if following == 't' or following == 'd' then -- dentialveolar
					l = 'l̪'
				elseif following == 'θ' then -- dental
					l = 'l̟'
				elseif following == 'ʃ' then -- alveolopalatal
					l = 'lʲ'
				end
				return l .. stress .. following
			end)
		--semivowels
		word = mw.ustring.gsub(word, '([aeouãẽõũ][iïĩ])', '%1̯')
		word = mw.ustring.gsub(word, '([aeioãẽĩõ][uũ])', '%1̯')
	end
	
	table.insert(debug, word)
	
	word = mw.ustring.gsub(word, 'h', '') --silent "h"
	word = mw.ustring.gsub(word, 'ɟ', 'ɟ͡ʝ') --fake "y" to real "y"
	word = mw.ustring.gsub(word, 'ï', 'i') --fake "y$" to real "y$"
	
	if do_debug == 'yes' then
		return word .. table.concat(debug, "")
	else
		return word
	end
end

function export.LatinAmerica(frame)
	return export.show(frame, true)
end

function export.phonetic(frame)
	return export.show(frame, false, true)
end

function export.phoneticLatinAmerica(frame)
	return export.show(frame, true, true)
end

return export