Module:yo-utilities

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}

local U = mw.ustring.char
local acute, grave, macron, underdot = U(0x301), U(0x300), U(0x304),U(0x323)
local tone = "[" .. acute .. grave .. macron .. "]"
local Vn_pattern = "([aeiou])" .. tone .. "*(" .. underdot .. "?)" .. tone .. "*n"

local function can_be_nasal(vowel, possible_underdot)
	return vowel == "a" or vowel == "i" or vowel == "u"
		or ((vowel == "e" or vowel == "o") and possible_underdot == underdot)
end

-- Adégbé-n-ró for Adégbén̄ró not supported (considered multiword).
function export.is_multiword(term)
	-- Decompose so that diacritics are separate code points and can be matched.
	term = mw.ustring.toNFD(mw.ustring.lower(term))
	for start_pos, possible_break, end_pos in mw.ustring.gmatch(term, "()([%s%p])()") do
		if possible_break == "-" then
			local V1, underdot1 = mw.ustring.match(mw.ustring.sub(term, 1, start_pos - 1), Vn_pattern .. "$")
			local V2, underdot2 = mw.ustring.match(term, "^" .. Vn_pattern, end_pos)
			if not (can_be_nasal(V1, underdot1) and can_be_nasal(V2, underdot2) and V1 == V2) then
				return true
			end
		else
			return true
		end
	end
	return false
end

return export