Jump to content

Module:xh-common

From Wiktionary, the free dictionary

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local u = mw.ustring.char

local ACUTE     = u(0x0301)
local CIRC      = u(0x0302)
local SYLL      = u(0x0324)

export.diacritic = ACUTE .. CIRC
export.toneless_vowel = "aeiouAEIOU." .. SYLL
export.vowel = export.toneless_vowel .. "áéíóúâêîôûÁÉÍÓÚÂÊÎÔÛ" .. export.diacritic

-- Use '.' to denote syllabic m. e.g. um.ntu

function export.split_syllables(word)
	local syllables = {}
	
	for syll in mw.ustring.gmatch(word, "[^" .. export.vowel .. "]*[" .. export.vowel .. "%.]+") do
		if mw.ustring.sub(syll, mw.ustring.len(syll)) == '.' then
			syll = mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
		end
		
		table.insert(syllables, syll)
	end
	
	syllables[#syllables] = syllables[#syllables] .. mw.ustring.match(word, "[^" .. export.vowel .. "]*$")
	
	return syllables
end


function export.apply_tone(word, pattern)
	word = export.split_syllables(word)
	pattern = mw.text.split(pattern or mw.ustring.rep("L", #word), "")
	
	if #word ~= #pattern then
		error("The word \"" .. table.concat(word) .. "\" and the tone pattern " .. table.concat(pattern) .. " have different numbers of syllables.")
	end

	for i, tone in ipairs(pattern) do
		if tone == "F" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. CIRC)
		elseif tone == "H" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. ACUTE)
		elseif tone ~= "L" then
			error("Invalid character \"" .. tone .. "\" in tone pattern string.")
		end
	end
	
	return (mw.ustring.gsub(mw.ustring.toNFC(table.concat(word)), "̩", ""))
end

function export.split_tone(word)
	local syllables = export.split_syllables(word)
	local tones = ""
	local stripped = ""
	
	for _, syll in ipairs(syllables) do
		-- remove any '.' char and convert to NFD
		syll = mw.ustring.toNFD(mw.ustring.gsub(syll, "%.", ""))
		local endChar = mw.ustring.sub(syll, mw.ustring.len(syll))
    	if endChar == ACUTE then
    		tones = tones .. "H"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	elseif endChar == CIRC then
    		tones = tones .. "F"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	else
    		tones = tones .. "L"
			stripped = stripped .. syll
		end
	end
	
	return {mw.ustring.toNFC(stripped), tones}
end

return export