Module:zu-common

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local u = mw.ustring.char

local ACUTE     = u(0x0301)
local CIRC      = u(0x0302)
local MACRON    = u(0x0304)
local SYLL      = u(0x0324)

export.diacritic = MACRON .. ACUTE .. CIRC
export.toneless_vowel = "aeiouāēīōūAEIOUĀĒĪŌŪ." .. SYLL
export.vowel = export.toneless_vowel .. "áéíóúâêîôûḗṓÁÉÍÓÚÂÊÎÔÛḖṒ" .. export.diacritic


-- Use '.' to denote syllabic m. e.g. um.ndeni
function export.split_syllables(word)
	local syllables = {}
	
	for syll in mw.ustring.gmatch(word, "[^" .. export.vowel .. "]*[" .. export.vowel .. "%.]+") do
		table.insert(syllables, syll)
	end
	
	syllables[#syllables] = syllables[#syllables] .. mw.ustring.match(word, "[^" .. export.vowel .. "]*$")
	
	return syllables
end

local function depressor_shift(syllables, pattern)
	if #pattern ~= #syllables then
		error("Number of syllables and number of tones do not match.")
	end
	
	local depressor_consonant = {"bh", "d", "dl", "g", "gc", "gq", "gx", "hh", "j", "mb", "mv", "nd", "ndl", "ng", "ngc", "ngq", "ngx", "nj", "nz", "v", "z"}
	local dep_table = {}
	for _, consonant in ipairs(depressor_consonant) do
		dep_table[consonant] = true
		dep_table[consonant .. "w"] = true
	end
	
	consonants = {}
	for _, syll in ipairs(syllables) do
		consonant = mw.ustring.sub(syll, 1, #syll-1)
		table.insert(consonants, consonant)
	end
	
	for i, cons in ipairs(consonants) do
		 --If the syllable is H and has a depressor consonant, and next syllable does not have a depressor consonant
		if pattern[i] == "H" and dep_table[cons] and not dep_table[consonants[i+1]] then
			if #consonants - i > 2 then --next syllable is before the penult
				pattern[i] = "L"
				if pattern[i+1] == "L" then
					pattern[i+1] = "H"
				end
			elseif #consonants - i == 2 then --next syllable is penultimate
				pattern[i] = "L"
				if pattern[i+1] == "L" then
					pattern[i+1] = "F"
				end
			end
		end
	end
	
	return pattern
end

function export.apply_tone(word, pattern, shift)
	if shift == nil then
		shift = true
	end
	word = export.split_syllables(word)
	pattern = mw.text.split(pattern or mw.ustring.rep("L", #word), "")
	
	if #word ~= #pattern then
		error("The word \"" .. table.concat(word) .. "\" and the tone pattern " .. table.concat(pattern) .. " have different numbers of syllables.")
	end
	
	if shift then
		pattern = depressor_shift(word, pattern)
	end
	
	for i, tone in ipairs(pattern) do
		if tone == "F" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. CIRC)
			-- Delete macron under circumflex
			word[i] = mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(word[i]), MACRON, ""))
		elseif tone == "H" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. ACUTE)
		elseif tone ~= "L" then
			error("Invalid character \"" .. tone .. "\" in tone pattern string.")
		end
	end
	
	return (mw.ustring.gsub(mw.ustring.toNFC(table.concat(word)), "̩", ""))
end

-- Strip the tone diacritics from a word.
-- Returns the stripped word and a tone pattern consisting of H, L, and F.
function export.split_tone(word)
	local syllables = export.split_syllables(word)
	local tones = ""
	local stripped = ""
	
	for _, syll in ipairs(syllables) do
		-- remove any '.' char and convert to NFD
		syll = mw.ustring.toNFD(mw.ustring.gsub(syll, "%.", ""))
		local endChar = mw.ustring.sub(syll, mw.ustring.len(syll))
    	if endChar == ACUTE then
    		tones = tones .. "H"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	elseif endChar == CIRC then
    		tones = tones .. "F"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	else
    		tones = tones .. "L"
			stripped = stripped .. syll
		end
	end
	
	return {mw.ustring.toNFC(stripped), tones}
end

return export