Module:yue-pron/sandbox

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}
local m_string_utils = require("Module:string utilities")

local gsub = m_string_utils.gsub
local lower = m_string_utils.lower
local split = mw.text.split

local entering_tones = {
	["1"] = "7", ["3"] = "8", ["6"] = "9"
}

local ipa_preprocess = {
	[1] = {"a", "ă"}, [2] = {"yu", "y"}, [3] = {"ăă", "a"}, [4] = {"uk", "ŭk"}, [5] = {"ik", "ĭk"},
	[6] = {"ou", "ŏu"}, [7] = {"eoi", "eoy"}, [8] = {"ung", "ŭng"}, [9] = {"ing", "ĭng"}, [10] = {"ei", "ĕi"},
}

local ipa_initial = {
	["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", 
	["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
	["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ",
	["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
	["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", 
	["h"] = "h", ["w"] = "w", ["j"] = "j", 
	[""] = ""
}

local ipa_nucleus = {
	["a"] = "aː", ["ă"] = "ɐ", 
	["e"] = "ɛː", ["ĕ"] = "e", 
	["i"] = "iː", ["ĭ"] = "ɪ", 
	["o"] = "ɔː", ["ŏ"] = "o", 
	["oe"] = "œː", ["eo"] = "ɵ", 
	["u"] = "uː", ["ŭ"] = "ʊ", 
	["y"] = "yː"
}

local ipa_coda = {
	["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯", 
	["m"] = "m", ["n"] = "n", ["ng"] = "ŋ", 
	["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚", 
	[""] = ""
}

local ipa_tone = {
	["1"] = "<span style=\"cursor:help\" title=\"or 53\">⁵⁵</span>", 
	["2"] = "³⁵", 
	["3"] = "³³", 
	["4"] = "<span style=\"cursor:help\" title=\"or 11\">²¹</span>", 
	["5"] = "¹³", 
	["6"] = "²²", 
	["7"] = "⁵", 
	["8"] = "³", 
	["9"] = "²", 
	[""] = ""
}

local ipa_tone_sandhi = {
	["*"] = "⁻", ["-"] = "⁻", [""] = ""
}

local ipa_syllabic = {
	["m"] = "m̩", ["ng"] = "ŋ̍"
}

local acute_accents = {
	["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú"
}

local grave_accents = {
	["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù"
}

local macrons = {
	["a"] = "ā", ["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū"
}

local yale_final = {
	["a"] = "沒有耶魯拼音", ["aa"] = "a",
	["eu"] = "沒有耶魯拼音", ["em"] = "沒有耶魯拼音", ["en"] = "沒有耶魯拼音", ["ep"] = "沒有耶魯拼音", ["et"] = "沒有耶魯拼音",
	["om"] = "沒有耶魯拼音", ["op"] = "沒有耶魯拼音",
	["um"] = "沒有耶魯拼音", ["up"] = "沒有耶魯拼音",
	["oe"] = "eu", ["oem"] = "沒有耶魯拼音", ["oen"] = "沒有耶魯拼音", ["oeng"] = "eung", ["oep"] = "沒有耶魯拼音", ["oet"] = "沒有耶魯拼音", ["oek"] = "euk", 
	["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut",
	[""] = ""
}

local canton_pinyin_regex = {
	["yu"] = "y", ["eo"] = "oe", ["eoi"] = "oey", ["z(h?)"] = "dz%1", ["c(h?)"] = "ts%1", 
	["([ptk])([136])"]=function(a,b) return a .. entering_tones[b] end,
	["([ptk][1-6][%-%*])([136])$"]=function(a,b) return a .. entering_tones[b] end
}

local function yale_tone(final,b)
	if b == "4" or b == "5" or b == "6" then
		final = gsub(final, "([ptkmn]?g?)$", "h%1", 1)
	end
	if b == "1" then final = gsub(final, "[aeiou]", macrons, 1) end
	if b == "4" then final = gsub(final, "[aeiou]", grave_accents, 1) end
	if b == "2" or b == "5" then final = gsub(final, "[aeiou]", acute_accents, 1) end
	return final
end

function export.jyutping_to_ipa(text)
	if type(text) == "table" then text = text.args[1] end

	if text:find("[7-9]") then error("Invalid tone in Jyutping.") end
	if text:find("[A-Z]") then error("Please do not capitalize the Jyutping.") end
	if text:find("%-[A-Za-z]") then error("Please do not hyphenate the Jyutping.") end
	if text:find("[0-9][a-z]") then error("Error in the Jyutping transcription.") end
	if text:find("[zcs]h") then error("'zh'/'ch'/'sh' are non-valid Jyutping, use 'z'/'c'/'s' instead.") end
	if text:find("y[^u]") then error("Wrong usage of 'y' in Jyutping.") end
	if text:find("oei") then error("Invalid rime oei in Jyutping.") end
	if text:find("eong") then error("Invalid rime eong in Jyutping") end
	if text:find("eok") then error("Invalid rime eok in Jyutping") end

	text = text:lower()
	text = gsub(text, "jy([^u])", "j%1")
	text = gsub(text, "%.%.%.", " ")
	text = gsub(gsub(text, ",", "隔"), "隔 ", ", ")
	local reading = split(text, "隔")
	
	local function retrieve_ipa1(a,b,c,d,e)
		return ipa_initial[a] .. ipa_syllabic[b] .. ipa_tone[c] .. ipa_tone_sandhi[d] .. ipa_tone[e]
	end
	
	local function retrieve_ipa2(a,b,c,d,e,f)
		return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
			(ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) ..
			(ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) ..
			(ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) ..
			ipa_tone_sandhi[e] ..
			(ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f)))
	end
	
	local function get_entering_tone(a,b) return a .. entering_tones[b] end
	
	for i = 1,#reading do
		reading[i] = gsub(gsub(reading[i], "([1-6])[a-z]", "%1 "), "[^a-z1-6%-%* ]", "")
		local syllable = split(reading[i]:lower(), " ")
		if #syllable == 1 then
			require('Module:debug').track('yue-pron/'..(syllable[1]))
		end
		for i = 1,#syllable do
			syllable[i] = gsub(syllable[i], "([zcs])yu", "%1hyu")
			syllable[i] = gsub(syllable[i], "([zc])oe", "%1hoe")
			syllable[i] = gsub(syllable[i], "([zc])eo", "%1heo")
			if (not syllable[i]:find("^[bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?[aeiouy]+[mnptk]?g?[1-9][%-%*]?[1-9]?$") and not syllable[i]:find("^h?[mn]g?[1-9][%-%*]?[1-9]?$")) and syllable[i]:find("[a-z1-9]") then
				error("Incorrect Jyutping format. Please check!")
			end
			syllable[i] = gsub(syllable[i], "^(h?)([mn]g?)([1-6])([%-%*]?)([1-6]?)$", retrieve_ipa1)
			syllable[i] = gsub(syllable[i], "([ptk])([136])", get_entering_tone)
			syllable[i] = gsub(syllable[i], "([ptk][1-9][%-%*])([136])$", get_entering_tone)
			for regex_idx,regex_pair in ipairs(ipa_preprocess) do
				syllable[i] = gsub(syllable[i],regex_pair[1],regex_pair[2])
			end
			syllable[i] = gsub(syllable[i], "spăn", "span")
			syllable[i] = gsub(syllable[i],
				"([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)",
				retrieve_ipa2)
		end
		reading[i] = table.concat(syllable, " ")
	end
	return table.concat(reading, "/, /")
end

function export.jyutping_to_yale(text)
	if type(text) == "table" then text = text.args[1] end

	text = gsub(text, "jy([^u])", "j%1")
	text = gsub(text, ",", "隔"):gsub("隔 ", ", ")
	local reading = split(text, "隔")
	
	local function yale_tone2(a, b) return yale_tone((yale_final[a] or a), b) end
	
	for i = 1,#reading do
		reading[i] = gsub(reading[i], "[1-6]%-", "")
		reading[i] = gsub(reading[i], "([1-9])[a-z]", "%1 ")
		local syllables = split(lower(reading[i]), " ",true)
		for j = 1,#syllables do
			local text = syllables[j]
			
			text = gsub(text, "[789]",{["7"] = "1", ["8"] = "3", ["9"] = "6"})
			text = gsub(text, "^m[1-6]$",{["m1"] = "{{懸停|或m̀|m̄}}", ["m2"] = "ḿ", ["m3"] = "m", ["m4"] = "m̀h", ["m5"] = "ḿh", ["m6"] = "mh"})
			text = gsub(text, "^ng[1-6]$",{["ng1"] = "{{懸停|或ǹg|n̄g}}", ["ng2"] = "ńg", ["ng3"] = "ng", ["ng4"] = "ǹgh", ["ng5"] = "ńgh", ["ng6"] = "ngh"})
			text = gsub(text, "jy?", "y")
			text = gsub(text, "[cz]",{["z"] = "j", ["c"] = "ch"})
			text = gsub(text, "([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])", yale_tone2)
			text = gsub(text, "(yu[tn]?)([1-6])", yale_tone)
			text = gsub(text, "[āēīōū]",{["ā"] = "{{懸停|或à|ā}}", ["ē"] = "{{懸停|或è|ē}}", ["ī"] = "{{懸停|或ì|ī}}", ["ō"] = "{{懸停|或ò|ō}}", ["ū"] = "{{懸停|或ù|ū}}"})
			text = gsub(text, "{{懸停|或([^|]+)|([^}]+)}}", '<span style="cursor:help" title="or %1">%2</span>')
			
			syllables[j] = text
		end
		reading[i] = table.concat(syllables, " ")
		if reading[i]:find("沒有耶魯拼音") then
			reading[i] = "colloquial sounds not defined"
		end
	end
	return table.concat(reading, ", ")
end

function export.jyutping_to_cantonese_pinyin(text)
	if type(text) == "table" then text = text.args[1] end
	
	text = gsub(text, "jy([^u])", "j%1")
	text = gsub(gsub(text, ",", "隔"), "隔 ", ", ")
	local reading = split(text, "隔")
	for i=1,#reading do
		if reading[i]:find("oe[mnpt][^g]") then
			reading[i] = "沒有教院式拼音"
		end
		for regex, replace in pairs(canton_pinyin_regex) do
			reading[i] = gsub(reading[i], regex, replace)
		end
		if reading[i]:find("沒有教院式拼音") then
			reading[i] = "colloquial sounds not defined"
		end
	end
	return table.concat(reading, ", ")
end

function export.jyutping_to_guangdong(text)
	if type(text) == 'table' then text = text.args[1] end
	
	local palatal = { ['z']='j', ['c']='q', ['s']='x' }
	local function palatalize(a,b) return palatal[a] .. b end
	
	local final = { ['p']='b', ['k']='g', ['t']='d' }
	local function get_final(a,b) return final[a] .. b end

	text = gsub(gsub(text, ",", "隔"), "隔 ", ", ")
	local reading = split(text, "隔")
	for i, item in ipairs(reading) do
		item = gsub(item, 'yu', 'ü')
		item = gsub(item, 'eoi', 'êü')
		item = gsub(item, 'j', 'y')

		item = gsub(item, 'oe[mnpt][^g]', '沒有廣東拼音')
		item = gsub(item, '[oe][oe]', 'ê')
		
		item = gsub(item, 'o[mp]', '沒有廣東拼音')
		item = gsub(item, 'e[un][^g]', '沒有廣東拼音')
		item = gsub(item, 'e', 'é')
		
		item = gsub(item, 'u[mp]', '沒有廣東拼音')

		item = gsub(item, '([zcs])([iü])',  palatalize)

		item = gsub(item, '([jqxyê])ü', '%1u')

		item = gsub(item, 'a+', { ['aa']='a', ['a']='e' } )

		item = gsub(item, '([kg])w', '%1u')
		item = gsub(item, '([ae])u', '%1o')

		item = gsub(item, '([pkt])(%d)', get_final)
		
		item = gsub(item, 'e[1-9]', '沒有廣東拼音')
		
		if item:find('沒有廣東拼音') then
			item = 'colloquial sounds not defined'
		end
		reading[i] = item
	end
	return table.concat(reading, ", ")
end

function export.jyutping_format(text)
	if type(text) == "table" then text = text.args[1] end
	
	if text:find("%[%[") then
		return "just a lengthy text to ensure it works"
	end
	
	text = split(gsub(gsub(text, ",", "隔"), "隔 ", ", "), "隔")
	for i, to_be_processed in ipairs(text) do
		text[i] = gsub(to_be_processed, "[1-6]%-([1-6])", "%1")
	end
	
	return text
end

function export.hoisanva_to_ipa(text)
	local hsv_initial = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "ᵐb", ["f"] = "f", ["v"] = "v",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "ⁿd", ["l"] = "l", ["lh"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ᵑɡ", 
		["z"] = "t͡s", ["c"] = "t͡sʰ",
		["y"] = "j", ["s"] = "s", ["h"] = "h", [""] = ""
	}
	
	local hsv_final = {
		["a"] = "a", ["ai"] = "ai", ["au"] = "au", ["am"] = "am",
		["an"] = "an", ["ang"] = "aŋ", ["ap"] = "ap̚", ["at"] = "at̚",
		["ak"] = "ak̚", 

		["i"] = "i", ["iu"] = "iu", ["im"] = "im", ["in"] = "in", 
		["ip"] = "ip̚", ["it"] = "it̚",

		["ie"] = "iɛ", ["iau"] = "iau", ["iam"] = "iam", ["iang"] = "iaŋ", 
		["iap"] = "iap̚", ["iak"] = "iak̚",

		["u"] = "u", ["ui"] = "ui", ["un"] = "un", ["ut"] = "ut̚", 

		["e"] = "ə", ["ei"] = "ei", ["eu"] = "eu", ["em"] = "em", ["en"] = "en", 
		["uung"] = "ɵŋ", ["ep"] = "ep̚", ["et"] = "et̚", ["uuk"] = "ɵk̚", ["uut"] = "ɵt̚",

		["o"] = "ᵘɔ", ["oi"] = "ᵘɔi", ["on"] = "ᵘɔn", ["ong"] = "ɔŋ", 
		["ot"] = "ᵘɔt̚", ["ok"] = "ɔk̚", 
		
		["m"] = "m̩"
	}
	
	local hsv_tone = { "³³", "⁵⁵", "²²", "²¹", "³²" }
	
	local gsplit = mw.text.gsplit

	local result = {}
	for word in gsplit(text, ",") do
		local initial, final, tone, tone_ch, word_result = "", "", "", "", {}
		for syllable in gsplit(word, " ") do
			initial, final, tone, tone_ch = syllable:match("^([^aeiou]*)([^1-5]*)([1-5])([%*%-]?[1-5]?%*?)$")
			if final == "" then final, initial = initial, "" end
			
			if not hsv_initial[initial] or not hsv_final[final] or not hsv_tone[tonumber(tone)] or (tone == "2" and tone_ch == "*") then
				error("Syllable '" .. syllable .. "' is not a valid syllable for IPA conversion.")
			end
			
			table.insert(word_result,
				hsv_initial[initial] .. hsv_final[final] .. hsv_tone[tonumber(tone)] ..
				(tone_ch ~= "" and "⁻" or "") ..
				(tone_ch == "*" and hsv_tone[tonumber(tone)] or (tone_ch:find("^%-[1-5]") and hsv_tone[tonumber(tone_ch:sub(2, 2))] or "")) ..
				(tone_ch:sub(-1, -1) == "*" and "⁵" or ""))
		end
		table.insert(result, table.concat(word_result, " "))
	end
	return "/" .. table.concat(result, "/, /") .. "/"
end

function export.jyutping_headword(frame)
	local params = {
		["head"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params, nil, "yue-pron", "jyutping_headword")
	
	local head = args.head or mw.title.getCurrentTitle().text
	
	local yue = require("Module:languages").getByCode("yue")
	local Latn = require("Module:scripts").getByCode("Latn")
	
	head = head:gsub("%d[%-%*]?%d?", "<sup>%0</sup>")
	
	return require("Module:headword").full_headword{lang = yue, sc = Latn, heads = {head}, pos_category = "jyutping"}
end

return export