Jump to content

Module:Bopo-convert

From Wiktionary, the free dictionary

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.
  • Automatic conversion of bopomofo (ㄅㄆㄇㄈ) or Zhuyin Fuhao (注音符號) to Hanyu Pinyin

For testcases, see Module:Bopo-convert/testcases.


local export = {}

local Bopo2pin_initials = {
	--labials
	["ㄅ"]="b", ["ㄆ"]="p", ["ㄇ"]="m", ["ㄈ"]="f",
	--dentals
	["ㄉ"]="d", ["ㄊ"]="t", ["ㄋ"]="n", ["ㄌ"]="l",
	--velars/gutturals
	["ㄍ"]="g", ["ㄎ"]="k", ["ㄏ"]="h",
	--palatals
	["ㄐ"]="j", ["ㄑ"]="q", ["ㄒ"]="x",
	--retroflex
	["ㄓ"]="zh", ["ㄔ"]="ch", ["ㄕ"]="sh", ["ㄖ"]="r",
	--dental sibilants
	["ㄗ"]="z", ["ㄘ"]="c", ["ㄙ"]="s",

	--other languages
	["ㄪ"]="v", ["ㄫ"]="ng", ["ㄬ"]="gn",	
}

local Bopo2pin_medials_naked = {
	["ㄧ"]="y", 
	["ㄨ"]="w", 
	["ㄩ"]="yu",
}

local Bopo2pin_medials = {
	["ㄧ"]="i",
	["ㄨ"]="u",
	["ㄩ"]="ü", 
	["ㄚ"]="a", ["ㄛ"]="o", ["ㄜ"]="e", ["ㄝ"]="e", --"ye" ?
}

local Bopo2pin_finals = {
	["ㄞ"]="ai", ["ㄟ"]="i", ["ㄠ"]="ao", ["ㄡ"]="ou",
	["ㄢ"]="an", ["ㄣ"]="n", ["ㄤ"]="ang", ["ㄥ"]="ng", ["ㄦ"]="r",
};

local Bopo2pin_tones = {
	["ˊ"]="\204\129", ["ˇ"]="\204\140", ["ˋ"]="\204\128", ["˙"]="", [""] = "\204\132"	
}

function export.Bopo_to_pinyin(text)
    if type(text) == "table" then text = text.args[1] end
 
 	text = mw.ustring.gsub(text, '([ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙ]?)([ㄧㄨㄩㄚㄛㄜㄝ]*)([ㄞㄟㄠㄡㄢㄣㄤㄥㄦ]?)([ˊˇˋ˙]?)(ㄦ?)', function (initial, medial, final, tone, erhua)
		-- should probably scan the string manually...	
 		if (#initial + #medial + #final) == 0 then
 			return
 		end

		mw.log('matched:', initial, medial, final, erhua)
 		
 		initial = Bopo2pin_initials[initial]
 		final   = Bopo2pin_finals[final]
		if medial == "" then
			medial = nil
		else
			if (initial == "j") or (initial == "q") or (initial == "x") then
				medial = mw.ustring.gsub(medial, '^ㄩ', "u")
			elseif not initial then
		 		medial = mw.ustring.gsub(medial, '^.', Bopo2pin_medials_naked)
			end
	 		medial = mw.ustring.gsub(medial, '.', Bopo2pin_medials)
		end

		if (final == "ng") or (final == "n") or (final == "i") or (final == "r") then
			if not medial or (medial == "w") then
				final = "e" .. final
			elseif medial == "y" then
				medial = "yi"
			elseif (final == "ng") then
				if medial == "u" then
					medial = ((initial == "j") or (initial == "q") or (initial == "x")) and "io" or "o"
				elseif medial == "yu" then
					medial = "yo"
				end
			end
		elseif (final == "ou") and (medial == "i") then
			final = "u"
		end

		if not final then
			if medial == "y" then
				medial = "yi"
			elseif medial == "w" then
				medial = "wu"
	 		elseif not medial then
	 			medial = "i"
	 		end
		end
		if erhua ~= "" then
			final = (final or "") .. "r"
		end
		mw.log('initial conversion: ', initial, medial, final)

		-- place the tone diacritic
		local repls
		final, repls = string.gsub(final or "", "^(.-[aeiou])", "%1" .. Bopo2pin_tones[tone])
		if repls == 0 then
			-- since all medials end with a vowel
			medial = medial .. Bopo2pin_tones[tone]
		end 		

 		return (initial or "") .. (medial or "") .. (final or "")
 	end)
 
    return mw.ustring.toNFC(text)
end

return export