Jump to content

Module:User:Theknightwho/sandbox2

From Wiktionary, the free dictionary


local concat = table.concat
local explode = require("Module:string utilities").explode_utf8
local insert = table.insert

local data = require("Module:User:Theknightwho/cmn-pron2/data")
local pinyin_tones = data.pinyin_tones
local pinyin_tone_letters = data.pinyin_tone_letters

local initials = {
	"b",
	"p",
	"m",
	"f",
	"v",
	"d",
	"t",
	"n",
	"l",
	"g",
	"k",
	"ng",
	"h",
	"j",
	"q",
	"gn",
	"x",
	"zh",
	"ch",
	"sh",
	"r",
	"z",
	"c",
	"s",
}

local finals = {
	{"a", "a"},
	{"o", "o"},
	{"e", "e"},
	{"ê", "ê"},
	{"ai", "ai"},
	{"ei", "ei"},
	{"ao", "ao"},
	{"ou", "ou"},
	{"an", "an"},
	{"on", "on"},
	{"en", "en"},
	{"ên", "ên"},
	{"ang", "ang"},
	{"ong", "ong"},
	{"eng", "eng"},
	{"êng", "êng"},
	{"ain", "ain"},
	{"ein", "ein"},
	{"aon", "aon"},
	{"oun", "oun"},
	{"aing", "aing"},
	{"eing", "eing"},
	{"aong", "aong"},
	{"oung", "oung"},
	{"i", "yi"},
	{"ia", "ya"},
	{"io", "yo"},
	{"ie", "ye"},
	{"iai", "yai"},
	{"iei", "yei"},
	{"iao", "yao"},
	{"iu", "you"},
	{"ian", "yan"},
	{"in", "yin"},
	{"iên", "yên"},
	{"iang", "yang"},
	{"ing", "ying"},
	{"iêng", "yêng"},
	{"iain", "yain"},
	{"iein", "yein"},
	{"iaon", "yaon"},
	{"ioun", "youn"},
	{"iaing", "yaing"},
	{"ieing", "yeing"},
	{"iaong", "yaong"},
	{"ioung", "young"},
	{"u", "wu"},
	{"ua", "wa"},
	{"uo", "wo"},
	{"ue", "we"},
	{"uai", "wai"},
	{"ui", "wei"},
	{"uao", "wao"},
	{"uou", "wou"},
	{"uan", "wan"},
	{"un", "wen"},
	{"uên", "wên"},
	{"uang", "wang"},
	{"ong", "weng"},
	{"uêng", "wêng"},
	{"uain", "wain"},
	{"uein", "wein"},
	{"uaon", "waon"},
	{"uoun", "woun"},
	{"uaing", "waing"},
	{"ueing", "weing"},
	{"uaong", "waong"},
	{"uoung", "woung"},
	{"ü", "yu"},
	{"üa", "yua"},
	{"üo", "yuo"},
	{"üe", "yue"},
	{"üai", "yuai"},
	{"üi", "yui"},
	{"üao", "yuao"},
	{"üou", "yuou"},
	{"üan", "yuan"},
	{"ün", "yun"},
	{"üên", "yuên"},
	{"üang", "yuang"},
	{"iong", "yong"},
	{"üêng", "yuêng"},
	{"üain", "yuain"},
	{"üein", "yuein"},
	{"üaon", "yuaon"},
	{"üoun", "yuoun"},
	{"üaing", "yuaing"},
	{"üeing", "yueing"},
	{"üaong", "yuaong"},
	{"üoung", "yuoung"},
	{"m", "m"},
	{"n", "n"},
	{"ng", "ng"},
}

local bpmfv = {
	b = true,
	p = true,
	m = true,
	f = true,
	v = true
}

local jqx = {
	j = true,
	q = true,
	x = true
}

local output = {}

local function add(str)
	str = explode(str)
	local best, pos = 0
	for i = 1, #str do
		local score = pinyin_tone_letters[str[i]] or 0
		if score >= best then
			best = score
			pos = i
		end
	end
	str[pos] = str[pos]:gsub(
		"^[%z\1-\127\194-\244][\128-\191]*",
		"%0" .. ("\1")
	)
	str = concat(str)
	for i = 1, 5 do
		insert(output, (str:gsub("\1", pinyin_tones[i] or "")))
	end
end
	

local function iteration(initial, final)
	if final == "uo" and bpmfv[initial] then
		return
	elseif jqx[initial] and final:match("ü") then
		return
	end
	add(initial .. final)
end

for _, final in ipairs(finals) do
	add(final[2])
	if final[2]:sub(1, 2) ~= "on" then
		for _, initial in ipairs(initials) do
			iteration(initial, final[1])
			
		end
	end
end

return output