Module:tyz-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match
local u = require("Module:string/char")

local export = {}

--àằầèềìòồờùừỳ áắấéếíóốớúứý ảẳẩẻểỉỏổởủửỷ ãẵẫẽễĩõỗỡũữỹ ạặậẹệịọộợụựỵ a̱ă̱â̱e̱ê̱i̱o̱ô̱ơ̱u̱ư̱y̱
local tone_diacritics = { ["̀"] = 2, ["́"] = 3, ["̉"] = 4, ["̃"] = 5, ["̣"] = 6, ["̱"] = 7 }

local tone_contour = {
	["tatd"] = { [1] = "˧˧", [2] = "˧˨", [3] = "˧˥", [4] = "˨˩˧", [5] = "˦ˀ˥", [6] = "˧˨ʔ", [7] =  "˩˩" }, 
	["tk"] = { [1] = "˦˥", [2] = "˩", [3] = "˦", [4] = "˨˦", [5] = "˦ˀ˥", [6] = "˩˧", [7] =  "˧" },
}

local initial_ipa = {
	["b"] = { "ɓ", "ɓ" },
	["bh"] = { "p", "bʱ" },
	["bhj"] = { "pʲ","bʱʲ" },
	["bj"] = { "ɓʲ", "ɓʲ" },
	["c"] = { "k", "k" },
	["ch"] = { "c", "c" },
	["d"] = { "z", "j" },
	["dh"] = { "t", "dʱ" },
	["đ"] = { "ɗ", "ɗ" },
	["f"] = { "f", "f" },
	["g"] = { "ɣ", "ɣ" },
	["gh"] = { "k", "gʱ" },
	["h"] = { "h", "h" },
	["k"] = { "k", "k" },
	["kh"] = { "kʰ", "kʰ" },
	["l"] = { "l", "l" },
	["m"] = { "m", "m" },
	["mj"] = { "mʲ", "mʲ" },
	["n"] = { "n", "n" },
	["ng"] = { "ŋ", "ŋ" },
	["ngh"] = { "ŋ", "ŋ" },
	["nh"] = { "ɲ", "ɲ" },
	["p"] = { "p", "p" },
	["ph"] = { "pʰ", "pʰ" },
	["phj"] = { "pʰʲ", "pʰʲ" },
	["pj"] = { "pʲ", "pʲ" },
	["q"] = { "k", "k" },
	["qu"] = { "kw", "kw" },
	["r"] = { "r", "r" },
	["s"] = { "ʂ", "ʂ" },
	["sl"]= { "ɬ", "θ" },
	["t"] = { "t", "t" },
	["th"] = { "tʰ", "tʰ" },
	["tr"] = { "ʈ", "ʈ" },
	["v"] = { "v", "w" },
	["vh"] = { "f", "v" },
	["x"] = { "s", "s" },
	["z"] = { "ð", "ð"},
	["zh"] = { "c", "z"},
	[""] = { "ʔ", "ʔ" },
	["-"] = { "", "" },
}

local final_ipa = {
	["a"] = { "aː", "aː" },
	["ac"] = { "aːk̚", "aːk̚" },
	["ach"] = { "ajk̟̚", "ajk̟̚" },
	["ai"] = { "aːj", "aːj" },
	["am"] = { "aːm", "aːm" },
	["an"] = { "aːn", "aːn" },
	["ang"] = { "aːŋ", "aːŋ" },
	["anh"] = { "ajŋ̟", "ajŋ̟" },
	["ao"] = { "aːw", "aːw" },
	["ap"] = { "aːp̚", "aːp̚" },
	["at"] = { "aːt̚", "aːt̚" },
	["au"] = { "aw", "aw" },
	["ay"] = { "aj", "aj" },
	["ăc"] = { "ak̚", "ak̚" },
	["ăm"] = { "am", "am" },
	["ăn"] = { "an", "an" },
	["ăng"] = { "aŋ", "aŋ" },
	["ăp"] = { "ap̚", "ap̚" },
	["ăt"] = { "at̚", "at̚" },
	["â"] = { "ə", "ə" },
	["âc"] = { "ək̚", "ək̚" },
	["âm"] = { "əm", "əm" },
	["ân"] = { "ən", "ən" },
	["âng"] = { "əŋ", "əŋ" },
	["âp"] = { "əp̚", "əp̚" },
	["ât"] = { "ət̚", "ət̚" },
	["âu"] = { "əw", "əw" },
	["âư"] = { "əɯ", "əɯ" },
	["ây"] = { "əj", "əj" },
	["e"] = { "ɛ", "ɛ" },
	["ec"] = { "ɛk̚", "ɛk̚" },
	["em"] = { "ɛm", "ɛm" },
	["en"] = { "ɛn", "ɛn" },
	["eng"] = { "ɛŋ", "ɛŋ" },
	["eo"] = { "ɛw", "ɛw" },
	["ep"] = { "ɛp̚", "ɛp̚" },
	["et"] = { "ɛt̚", "ɛt̚" },
	["ê"] = { "e", "e" },
	["êc"] = { "ek̚", "ek̚" },
	["êch"] = { "əjk̟̚", "əjk̟̚" },
	["êm"] = { "em", "em" },
	["ên"] = { "en", "en" },
	["êng"] = { "eŋ", "eŋ" },
	["ênh"] = { "əjŋ̟", "əjŋ̟" },
	["êp"] = { "ep̚", "ep̚" },
	["êt"] = { "et̚", "et̚" },
	["êu"] = { "ew", "ew" },
	["i"] = { "i", "i" },
	["ia"] = { "iə", "iə" },
	["ic"] = { "ïk̟̚", "ïk̟̚" },
	["ich"] = { "ïk̟̚", "ïk̟̚" },
	["iêc"] = { "iək̚", "iək̚" },
	["iêm"] = { "iəm", "iəm" },
	["iên"] = { "iən", "iən" },
	["iêng"] = { "iəŋ", "iəŋ" },
	["iêp"] = { "iəp̚", "iəp̚" },
	["iêt"] = { "iət̚", "iət̚" },
	["iêu"] = { "iəw", "iəw" },
	["im"] = { "im", "im" },
	["in"] = { "in", "in" },
	["inh"] = { "ïŋ", "ïŋ" },
	["ip"] = { "ip̚", "ip̚" },
	["it"] = { "it̚", "it̚" },
	["iu"] = { "iw", "iw" },
	["o"] = { "ɔ", "ɔ" },
	["oa"] = { "waː", "waː" },
	["oac"] = { "waːk̚", "waːk̚" },
	["oach"] = { "wajk̟̚", "wajk̟̚" },
	["oai"] = { "waːj", "waːj" },
	["oam"] = { "waːm", "waːm" },
	["oan"] = { "waːn", "waːn" },
	["oang"] = { "waːŋ", "waːŋ" },
	["oanh"] = { "wajŋ̟", "wajŋ̟" },
	["oao"] = { "waːw", "waːw" },
	["oap"] = { "waːp̚", "waːp̚" },
	["oat"] = { "waːt̚", "waːt̚" },
	["oay"] = { "waj", "waj" },
	["oă"] = { "wa", "wa" },
	["oăc"] = { "wak̚", "wak̚" },
	["oăm"] = { "wam", "wam" },
	["oăn"] = { "wan", "wan" },
	["oăng"] = { "waŋ", "waŋ" },
	["oăt"] = { "wat̚", "wat̚" },
	["oc"] = { "awk͡p̚", "awk͡p̚" },
	["oe"] = { "wɛ", "wɛ" },
	["oem"] = { "wɛm", "wɛm" },
	["oen"] = { "wɛn", "wɛn" },
	["oeo"] = { "wɛw", "wɛw" },
	["oet"] = { "wɛt̚", "wɛt̚" },
	["oi"] = { "ɔj", "ɔj" },
	["om"] = { "ɔm", "ɔm" },
	["on"] = { "ɔn", "ɔn" },
	["ong"] = { "awŋ͡m", "awŋ͡m" },
	["ooc"] = { "ɔk̚", "ɔk̚" },
	["oong"] = { "ɔŋ", "ɔŋ" },
	["op"] = { "ɔp̚", "ɔp̚" },
	["ot"] = { "ɔt̚", "ɔt̚" },
	["ô"] = { "o", "o" },
	["ôc"] = { "əwk͡p̚", "əwk͡p̚" },
	["ôi"] = { "oj", "oj" },
	["ôm"] = { "om", "om" },
	["ôn"] = { "on", "on" },
	["ông"] = { "əwŋ͡m", "əwŋ͡m" },
	["ôôc"] = { "ok̚", "ok̚" },
	["ôông"] = { "oŋ", "oŋ" },
	["ôp"] = { "op̚", "op̚" },
	["ôt"] = { "ot̚", "ot̚" },
	["ơ"] = { "əː", "əː" },
	["ơi"] = { "əːj", "əːj" },
	["ơm"] = { "əːm", "əːm" },
	["ơn"] = { "əːn", "əːn" },
	["ơng"] = { "əːŋ", "əːŋ" },
	["ơp"] = { "əːp̚", "əːp̚" },
	["ơt"] = { "əːt̚", "əːt̚" },
	["u"] = { "u", "u" },
	["ua"] = { "uə", "uə" },
	["uac"] = { "waːk̚", "waːk̚" },
	["uach"] = { "wajk̟̚", "wajk̟̚" },
	["uai"] = { "waːj", "waːj" },
	["uan"] = { "waːn", "waːn" },
	["uang"] = { "waːŋ", "waːŋ" },
	["uanh"] = { "wajŋ̟", "wajŋ̟"},
	["uao"] = { "waːw", "waːw" },
	["uap"] = { "waːp̚", "waːp̚" },
	["uat"] = { "waːt̚", "waːt̚" },
	["uau"] = { "waw", "waw" },
	["uay"] = { "waj", "waj" },
	["uă"] = { "wa", "wa" },
	["uăc"] = { "wak̚", "wak̚" },
	["uăm"] = { "wam", "wam" },
	["uăn"] = { "wan", "wan" },
	["uăng"] = { "waŋ", "waŋ" },
	["uăp"] = { "wap̚", "wap̚" },
	["uăt"] = { "wat̚", "wat̚" },
	["uâ"] = { "wə", "wə" },
	["uâc"] = { "wək̚", "wək̚" },
	["uân"] = { "wən", "wən" },
	["uâng"] = { "wəŋ", "wəŋ" },
	["uât"] = { "wət̚", "wət̚" },
	["uây"] = { "wəj", "wəj" },
	["uc"] = { "ʊwk͡p̚", "ʊwk͡p̚" },
	["ue"] = { "wɛ", "wɛ" },
	["uec"] = {"wɛk̚", "wɛk̚"},
	["uen"] = { "wɛn", "wɛn" },
	["ueo"] = { "wɛw", "wɛw" },
	["uep"] = { "wɛp̚", "wɛp̚" },
	["uet"] = { "wɛt̚", "wɛt̚" },
	["uê"] = { "we", "we" },
	["uêch"] = { "wəjk̟̚", "wəjk̟̚" },
	["uên"] = { "wen", "wen" },
	["uênh"] = { "wəjŋ̟", "wəjŋ̟" },
	["uêt"] = { "wet̚", "wet̚" },
	["uêu"] = { "weu", "weu" },
	["ui"] = { "uj", "uj" },
	["uin"] = { "win", "win" },
	["uit"] = { "wit̚", "wit̚" },
	["um"] = { "um", "um" },
	["un"] = { "un", "un" },
	["ung"] = { "ʊwŋ͡m", "ʊwŋ͡m" },
	["uô"] = { "uə", "uə" }, 
	["uôc"] = { "uək̚", "uək̚" },
	["uôi"] = { "uəj", "uəj" },
	["uôm"] = { "uəm", "uəm" },
	["uôn"] = { "uən", "uən" },
	["uông"] = { "uəŋ", "uəŋ" },
	["uôt"] = { "uət̚", "uət̚" },
	["uơ"] = { "wəː", "wəː" },
	["uơi"] = { "wəːj", "wəːj" },
	["uơn"] = { "uən", "uən" },
	["uơt"] = { "uət̚", "uət̚" },
	["up"] = { "up̚", "up̚" },
	["ut"] = { "ut̚", "ut̚" },
	["uy"] = { "wi", "wi" },
	["uya"] = { "wiə", "wiə" },
	["uych"] = { "wïk̟̚", "wïk̟̚" },
	["uyn"] = { "win", "win" },
	["uich"] = { "wïk̟̚", "wïk̟̚" },
	["uyê"] = { "wiə", "wiə" },
	["uyên"] = { "wiən", "wiən" },
	["uyênh"] = { "wiəŋ̟", "wəŋ" },
	["uyêt"] = { "wiət̚", "wiət̚" },
	["uynh"] = { "wïŋ̟", "wïŋ̟" },
	["uyp"] = { "wip̚", "wip̚" },
	["uyt"] = { "wit̚", "wit̚" },
	["uyu"] = { "wiw", "wiw" },
	["ư"] = { "ɨ", "ɨ" },
	["ưa"] = { "ɨə", "ɨə" },
	["ưc"] = { "ɨk̚", "ɨk̚" },
	["ưi"] = { "ɨj", "ɨj" },
	["ưm"] = { "ɨm", "ɨm" },
	["ưn"] = { "ɨn", "ɨn" },
	["ưng"] = { "ɨŋ", "ɨŋ" },
	["ươc"] = { "ɨək̚", "ɨək̚" },
	["ươi"] = { "ɨəj", "ɨəj" },
	["ươm"] = { "ɨəm", "ɨəm" },
	["ươn"] = { "ɨən", "ɨən" },
	["ương"] = { "ɨəŋ", "ɨəŋ" },
	["ươp"] = { "ɨəp̚", "ɨəp̚" },
	["ươt"] = { "ɨət̚", "ɨət̚" },
	["ươu"] = { "ɨəw", "ɨəw" },
	["ưt"] = { "ɨt̚", "ɨt̚" },
	["ưu"] = { "ɨw", "ɨw" },
	["y"] = { "i", "i" },
	["yêc"] = { "iək̚", "iək̚" },
	["yêm"] = { "iəm", "iəm" },
	["yên"] = { "iən", "iən" },
	["yêng"] = { "iəŋ", "iəŋ" },
	["yêp"] = { "iəp̚", "iəp̚" },
	["yêt"] = { "iət̚", "iət̚" },
	["yêu"] = { "iəw", "iəw" },
}

local voicing = {
	["pj"] = "bhj",
	["p"] = "bh",
	["ch"] = "zh",
	["t"] = "dh",
	["c"] = "gh",
	["k"] = "gh",
	["f"] = "vh",
}

local varieties = { 
	["tatd"] = { "[[:en:w:Thạch An district|Thạch An]] – [[:en:w:Tràng Định district|Tràng Định]]", 1 },
	["tk"] = { "[[:en:w:Trùng Khánh district|Trùng Khánh]]", 2 },
}

function export.ipa(frame)
	local p, output = {}, { ["tatd"] = {}, ["tk"] = {} }
	local output_text = {}
	local pronunciations = { ["tatd"] = {}, ["tk"] = {} }
	local pagename = gsub(gsub(mw.ustring.lower(mw.title.getCurrentTitle().subpageText), "%-", " "), "%,", "")
	local args = frame:getParent().args
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and mw.ustring.lower(gsub(gsub(item, "%-", " "), "%,", "")) or nil)
		end
	else
		table.insert(p, pagename)
	end
	for variety, _ in ipairs(varieties) do
		table.insert(pronunciations[variety], (args[variety] ~= "") and args[variety] or nil)
	end
	for i, word in ipairs(p) do
		local pronunciations =  { ["tatd"] = {}, ["tk"] = {} }
		for syllable in mw.text.gsplit(word, " ", true) do
			local ipa = {}
			local initial, final, tone = nil, nil, nil, nil
			tone = 1
			syllable = mw.ustring.toNFD(syllable)
			
			for diac_pattern, tone_num in pairs(tone_diacritics) do
				if match(syllable, diac_pattern) then
					tone = tone_num
					break
				end
			end
			syllable = mw.ustring.toNFC(gsub(syllable, "[̣̱̀́̉̃]", ""))
			initial = match(syllable, "^g[bcdđfgjklmnpqrstvx]+") or match(syllable, "^(g[hiy])[^cmnpt]")
				or match(syllable, "^g") or match(syllable, "^[bcdđfghjklmnpqrstvxz]+") or ""
			initial = (match(syllable, "^giê.") and syllable ~= "giên") and "d" or initial
			initial = match(syllable, "qu$") and "qu" or initial
			final = sub(syllable, mw.ustring.len(initial) + 1, -1)
			for loc, location in pairs(varieties) do
				local ipa, seq, detoned = {}, location[2], ""
				for voc, _ in pairs(voicing) do
					if match(tone,"[267]") and initial == voc then
						initial = voicing[initial]
						break
					end
				end
				if initial_ipa[initial] then
					table.insert(ipa, initial_ipa[initial][seq])
				else
					local initial_cluster = ""
					initial = gsub(initial, "r$", "ŕ")
					for cc in mw.ustring.gcodepoint(initial) do
						local ch = u(cc)
						initial_cluster = initial_cluster .. initial_ipa[ch][seq]
					end
						initial_cluster = gsub(initial_cluster, "([cgknpt]h)" or "[mbp]j" or "phj", function(digraph)
						return initial_ipa[digraph][seq] end)
					table.insert(ipa, initial_cluster)
				end
				if final_ipa[final] then
					detoned = gsub(final_ipa[final][seq], "^([wu].+)", function(nucleus)
						if initial .. final == "qua" then
							nucleus = final_ipa["oa"][seq]
						elseif initial .. final == "qui" then
							nucleus = final_ipa["uy"][seq]
						end
						return nucleus end)
					table.insert(ipa, detoned)
				else
					error(("Unrecognised final: \"%s\""):format(final))
				end
				if tone == 1 and match(final, "[chmngpt]") then
					tone = 3
				end
				table.insert(ipa, tone_contour[loc][tone])
				table.insert(pronunciations[loc], table.concat(ipa, ""))
			end
		end
		for loc, location in pairs(varieties) do
			table.insert(output[loc], table.concat(pronunciations[loc], " "))
		end
	end
	for loc, location in pairs(varieties) do
		if args[loc] ~= "-" then
			if not args[loc] then
				args[loc] = table.concat(output[loc], "], [")
				local alternative = args[loc]
				if alternative ~= args[loc] then args[loc] = args[loc] .. "] ~ [" .. alternative end
			end
			table.insert(output_text, location[2], "\n* (''" .. location[1] .. "'') " .. 
				"[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>([[w:Tày_language|key]])</sup>: <span class=\"IPA\">[" ..
					args[loc] .. "]</span>")
		else
			table.insert(output_text, location[2], "")
		end
	end
	if table.concat(p, "") ~= mw.ustring.lower(pagename) then
		table.insert(output_text, #output_text + 1, "\n* ''Phonetic'': " .. gsub(table.concat(p, ", "), "ŕ", "R"))
	end
	return (gsub(table.concat(output_text, ""), "^\n(.)", "%1"))
end

return export