Jump to content

Module:yue-pron/check

From Wiktionary, the free dictionary


--local Han_pattern = "[" .. require("Module:scripts").getByCode("Hani"):getCharacters() .. "]"
local Han_pattern = "[一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮹟𰀀-𲎯]"
local initials = {
	b=1,p=1,m=1,f=1,d=1,t=1,n=1,l=1,
	g=1,k=1,ng=1,h=1,gw=1,kw=1,
	z=1,c=1,s=1,j=1,w=1,
}
local finals = {
	aa=1,aai=1,aau=1,aam=1,aan=1,aang=1,aap=1,aat=1,aak=1,
	a=1,ai=1,au=1,am=1,an=1,ang=1,ap=1,at=1,ak=1,
	e=1,ei=1,eu=1,em=1,eng=1,ep=1,ek=1,
	i=1,iu=1,im=1,["in"]=1,ing=1,ip=1,it=1,ik=1,
	o=1,oi=1,ou=1,on=1,ong=1,ot=1,ok=1,
	u=1,ui=1,un=1,ung=1,ut=1,uk=1,
	eoi=1,eon=1,eot=1,
	oe=1,oeng=1,oet=1,oek=1,
	yu=1,yun=1,yut=1,
}

local function check_jyutping(syl)
	local s,v = syl:match("^(%l+)[1-6](%-?[12]?)$")
	if not s or #v == 1 then return false end
	local i,f = s:match("^([bpmfdtnlgknhzcsjw][gw]?)([aeiouy]+[mnptk]?g?)$")
	if i and initials[i] and finals[f] then return true end
	if finals[s] or s == "m" or s == "ng" then return true end
	return false
end

return function(word, prons)
	local res, word_len = mw.ustring.gsub(word,Han_pattern,"")
	if res ~= "" then
		return false
	end
	for pron in prons:gmatch("[^,]+") do
		-- check length
		if pron:find("  ") then
			return false
		end
		if word_len ~= pron:gsub("[^ ]+",""):len() + 1 then
			return false
		end
		-- check against [[Module:zh/data/Jyutping character]]?
		-- check jyutping validity
		for syl in pron:gmatch("[^ ]+") do
			if not check_jyutping(syl) then
				return false
			end
		end
	end
	return true
end