Jump to content

Module:hy-pronunciation/sandbox

From Wiktionary, the free dictionary


local export = {}

-- single characters that map to IPA sounds   
local phonetic_chars_map = {
	-- Eastern Armenian
	east = {
		["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="ɛ", ["զ"]="z",
		["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ", 
		["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
		["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v", 
		["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
	-- Western Armenian
	west = {
		["ա"]="ɑ", ["բ"]="pʰ", ["գ"]="kʰ", ["դ"]="tʰ", ["ե"]="ɛ", ["զ"]="z",
		["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="d͡z", ["կ"]="ɡ", ["հ"]="h", ["ձ"]="t͡sʰ", ["ղ"]="ʁ", 
		["ճ"]="d͡ʒ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
		["չ"]="t͡ʃʰ", ["պ"]="b", ["ջ"]="t͡ʃʰ", ["ռ"]="ɾ", ["ս"]="s", ["վ"]="v", 
		["տ"]="d", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
}

-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
	east = {
		['ու'] = 'u',
		-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
		['իե'] = 'jɛ',
		['իա'] = 'jɑ'
	},
	west = {
		['ու'] = 'u',
		['էօ'] = 'œ',
		['(.?)յու'] = function(before)
			-- if not in the initial position and if not preceded by [ɑɛəɔiu]
			-- This will not catch initial position in a word
			-- that follows a vowel-initial word.
			if not (before == '' or mw.ustring.find(before, '[%sɑɛəɔiu]')) then
				return 'ʏ'
			end
		end,
		-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
		['իե'] = 'jɛ',
		['իա'] = 'jɑ'
	},
}

function export._pronunciation(word, system)
	if not (phonetic_chars_map[system] and phonetic_2chars_map[system]) then
		error("Invalid system " .. tostring(system))
	end
	
	word = mw.ustring.lower(word)

	local phonetic = word

	-- then long consonants that are orthographically geminated.
	phonetic = mw.ustring.gsub(phonetic, "(.)%1", "%1ː")

	for pat, repl in pairs(phonetic_2chars_map[system]) do
		phonetic = mw.ustring.gsub(phonetic, pat, repl)
	end

	-- ե and ո are pronounced as jɛ and vɔ word-initially.
	phonetic = mw.ustring.gsub(phonetic, "^ե", "յէ")
	phonetic = mw.ustring.gsub(phonetic, "^ո", "վօ")
	-- except when followed by another վ.
	phonetic = mw.ustring.gsub(phonetic, "^վօվ", "օվ")

	phonetic = mw.ustring.gsub(phonetic, '.', phonetic_chars_map[system])

	-- assimilation: nasal + velar plosives = velar nasal + velar plosives
	phonetic = mw.ustring.gsub(phonetic, "n([ɡkχ]+)", "ŋ%1")

	-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "tj", "t͡sj")
	--phonetic = mw.ustring.gsub(phonetic, "tʰj", "t͡sʰj")
	--phonetic = mw.ustring.gsub(phonetic, "dj", "d͡zj")

	-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "ɾt", "rt")

	phonetic = mw.ustring.gsub(phonetic, "%S+", function(word)
		-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑɛəɔiu]+.
		local numberOfVowels = select(2, mw.ustring.gsub(phonetic, "[ɑɛəɔiu]", "%0"))
	
		-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not 
		-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
		-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
		--      1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
		--         If true, put the IPA stress at the beginning, else do step 3.
		--      2) Find the last non-schwa vowel, i.e. [ɑɛɔiu],
		--      3) If the IPA symbol preceding it is [ɑɛəɔiu], i.e. a vowel, put the stress symbol between them, 
		--         if it is NOT [ɑɛɔiuə], i.e. it is a consonant, 
		--         put the stress before that consonant.
		if numberOfVowels > 1 then
			local rcount
			phonetic, rcount = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiuə])՛", "ˈ%1")
			if rcount == 0 then
				phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiu][^ɑɛɔiuə]*)$", "ˈ%1")
				phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛəɔiu]?[ɑɛɔiu][^ɑɛɔiuə]*ə[^ɑɛɔiuə]*)$", "ˈ%1")
			end
			phonetic = mw.ustring.gsub(phonetic, "([ɑɛəɔiu])ˈ([^ɑɛɔiuə]+)([^ɑɛɔiuəːˈʰ])", "%1%2ˈ%3")
			phonetic = mw.ustring.gsub(phonetic, "(.)͡ˈ", "ˈ%1͡")
			return phonetic
		end
	end)

	return phonetic
end

function export.pronunciation(word, system)
	if type(word) == "table" then
		local frame = word
		word = frame.args[1] or frame:getParent().args[1]
		system = frame.args.system or "east"
	end
	if not word or (word == "") then
		error("Please put the word as the first positional parameter!")
	end
	
	return export._pronunciation(word, system)
end
 
return export