Jump to content

Module:User:Manishearth/wuu-pron

From Wiktionary, the free dictionary

This is a private module sandbox of Manishearth, for their own experimentation. Items in this module may be added and removed at Manishearth's discretion; do not rely on this module's stability.


local export = {}

local ipa_initial = {
	["p"] = "p", ["ph"] = "pʰ", ["b"] = "b̥", ["m"] = "m", ["'m"] = "ʔm", ["f"] = "f", ["v"] = "v̥",
	["t"] = "t", ["th"] = "tʰ", ["d"] = "d̥", ["n"] = "n", ["'n"] = "ʔn", ["l"] = "l", ["'l"] = "ʔl",
	["ts"] = "t͡s", ["tsh"] = "t͡sʰ", ["s"] = "s", ["z"] = "z̥", ["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ",
	["jj"] = "d̥͡ʑ", ["ny"] = "n̠ʲ", ["'ny"] = "ʔn̠ʲ", ["x"] = "ɕ", ["xx"] = "ʑ̥", ["k"] = "k",
	["kh"] = "kʰ", ["g"] = "ɡ̊", ["ng"] = "ŋ", ["'ng"] = "ʔŋ", ["h"] = "h", ["'"] = "ʔ", ["hh"] = "ɦ", [""] = ""
}

local ipa_final = {
	["a"] = "a̱", ["o"] = "o", ["au"] = "ɔ", ["eu"] = "ɜ", ["e"] = "e̞", ["oe"] = "ø", ["i"] = "i",
	["ia"] = "ia̱", ["io"] = "io", ["iau"] = "iɔ", ["ieu"] = "iɜ", ["u"] = "v̩ʷ", ["ua"] = "ɯa̱",
	["ue"] = "ɯe̞", ["uoe"] = "v̩ʷø", ["y"] = "y", ["yoe"] = "yø", ["an"] = "ã", ["aan"] = "ɑ̃",
	["en"] = "əɲ", ["on"] = "ʊŋ", ["aq"] = "a̱ʔ", ["oq"] = "ʊʔ", ["eq"] = "əʔ", ["ian"] = "iã",
	["iaan"] = "iɑ̃", ["in"] = "ɪɲ", ["ion"] = "i̯ʊŋ", ["iaq"] = "ia̱ʔ", ["ioq"] = "i̯ʊʔ", ["iq"] = "i̯ɪʔ",
	["uan"] = "ɯã", ["uaan"] = "ɯɑ̃", ["un"] = "ɯə̯ɲ", ["uaq"] = "ɯa̱ʔ", ["ueq"] = "ɯə̯ʔ", ["yn"] = "ʏɲ",
	["yq"] = "ɥ̯ɪʔ", ["er"] = "əɻ", ["r"] = "z̩"
}

local ipa_syllabic = {
	["mm"] = "m̩", ["ngg"] = "ŋ̍",
	["'mm"] = "ʔm̩", ["'ngg"] = "ʔŋ̍"
}

local wugniu_initial = {
	["p"] = "p", ["ph"] = "ph", ["b"] = "b", ["m"] = "m", ["m"] = "m", ["f"] = "f", ["v"] = "v",
	["t"] = "t", ["th"] = "t", ["d"] = "d", ["n"] = "n", ["n"] = "n", ["l"] = "l", ["l"] = "l",
	["ts"] = "ts", ["tsh"] = "tsh", ["s"] = "s", ["z"] = "z", ["j"] = "c", ["q"] = "ch",
	["jj"] = "j", ["ny"] = "gn", ["ny"] = "gn", ["x"] = "sh", ["xx"] = "zh", ["k"] = "k",
	["kh"] = "kh", ["g"] = "ɡ", ["ng"] = "ng", ["ng"] = "ng", ["h"] = "h", ["'"] = "", ["hh"] = "gh", [""] = ""
}

local wugniu_final = {
	["a"] = "a", ["o"] = "o", ["au"] = "au", ["eu"] = "eu", ["e"] = "e", ["oe"] = "oe", ["i"] = "i",
	["ia"] = "ia", ["io"] = "io", ["iau"] = "iau", ["ieu"] = "ieu", ["u"] = "u", ["ua"] = "ua",
	["ue"] = "ue", ["uoe"] = "uoe", ["y"] = "iu", ["yoe"] = "ioe", ["an"] = "an", ["aan"] = "aon",
	["en"] = "en", ["on"] = "on", ["aq"] = "aq", ["oq"] = "oq", ["eq"] = "eq", ["ian"] = "ian",
	["iaan"] = "iaon", ["in"] = "in", ["ion"] = "ion", ["iaq"] = "iaq", ["ioq"] = "ioq", ["iq"] = "iq",
	["uan"] = "uan", ["uaan"] = "uaon", ["un"] = "uen", ["uaq"] = "uaq", ["ueq"] = "ueq", ["yn"] = "iun",
	["yq"] = "iuq", ["er"] = "ei", ["r"] = "y"
}
local wugniu_syllabic = {
	["mm"] = "m", ["ngg"] = "ng",
	["'mm"] = "m", ["'ngg"] = "ng"
}

local wugniu_tone_map = { ["1"] = 1, ["2"] = 5, ["3"] = 6, ["4"] = 7, ["5"] = 8}

local tone_contours = {
	["1-0"] = "", ["1--"] = "³³",
	["1-1"] = "⁵³", ["1-2"] = "³⁴", ["1-3"] = "²³", ["1-4"] = "⁵⁵", ["1-5"] = "¹²", 
	["2-1"] = "⁵⁵ ²¹", ["2-2"] = "³³ ⁴⁴", ["2-3"] = "²² ⁴⁴", ["2-4"] = "³³ ⁴⁴", ["2-5"] = "¹¹ ²³",
	["3-1"] = "⁵⁵ ³³ ²¹", ["3-2"] = "³³ ⁵⁵ ²¹", ["3-3"] = "²² ⁵⁵ ²¹", ["3-4"] = "³³ ⁵⁵ ²¹", ["3-5"] = "¹¹ ²² ²³",
	["4-1"] = "⁵⁵ ³³ ³³ ²¹", ["4-2"] = "³³ ⁵⁵ ³³ ²¹", ["4-3"] = "²² ⁵⁵ ³³ ²¹", ["4-4"] = "³³ ⁵⁵ ³³ ²¹", ["4-5"] = "²² ⁵⁵ ³³ ²¹",
	["5-1"] = "⁵⁵ ³³ ³³ ³³ ²¹", ["5-2"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-3"] = "²² ⁵⁵ ³³ ³³ ²¹", ["5-4"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-5"] = "²² ⁵⁵ ³³ ³³ ²¹",

	["A-single"] = "⁴⁴", ["B-single"] = "³³", ["C-single"] = "⁴⁴", ["D-single"] = "²²",
	["A-multiple"] = "³³", ["B-multiple"] = "³³", ["C-multiple"] = "³³", ["D-multiple"] = "³³",
}


local tone_table = { 
	["voiceless-unchecked"] = "A",
	["voiced-unchecked"] = "B",
	["voiceless-checked"] = "C",
	["voiced-checked"] = "D",
}

local function determ_syl(text)
	local voicing, coda = "voiceless", "unchecked"
	if text:find("^[bvdlzg]") or text:find("^m[^m]") or text:find("^n[^n]") or text:find("jj") or text:find("xx") or text:find("hh") then
		voicing = "voiced"
	end
	if text:find("q$") then
		coda = "checked"
	end
	return voicing, coda
end

local function tone_determ(text)
	local voicing, coda = determ_syl(text)
	
	return tone_table[voicing .."-" .. coda]
end

local function rom_check(text)
	local tone = text:sub(1, 1)
	text = mw.text.split(text:sub(2, -1), " ")[1]
	local voicing, coda = determ_syl(text)
	
	if text:find("[kgs]h?[iy]") or text:find("^z[iy]") or text:find("^ni") then
		error("Invalid syllable: " .. text .. ". Palatalisation expected.")
	end

	if voicing == "voiced" and tone:find("[124]") then
		error("Invalid syllable: " .. text .. tone .. ". Voiced initials only occur in tones 3 and 5.")
	
	elseif voicing == "voiceless" and tone:find("[35]") then
		error("Invalid syllable: " .. text .. tone .. ". Voiceless initials only occur in tones 1, 2 and 4.")
	
	end
	
	if coda == "checked" and tone:find("[123]") then
		error("Checked syllables only occur in tones 4 and 5.")
	
	elseif coda ~= "checked" and tone:find("[45]") then
		error("Unchecked syllables only occur in tones 1, 2 and 3.")
	
	end
	return nil
end


function convert_by_parts(original_text, syl_conv, tone_conv, apply_ipa_tone_change)
	if type(original_text) == "table" then original_text = original_text.args[1] end
	original_text = mw.ustring.lower(original_text)
	local text, conv_text = "", ""
	local reading = mw.text.split(original_text, ",", true)
	local syllable = {}
	local syl_tone = {}
	for reading_index = 1, #reading, 1 do
		local components = mw.text.split(reading[reading_index], "&", true)
		for component_index = 1, #components do
			local indep_words = mw.text.split(components[component_index], "+", true)
			for indep_index = 1, #indep_words do
				text = indep_words[indep_index]
				local no_syllables = string.len(text:gsub("[^ ]", "")) + 1
				rom_check(text)
				local syl_tone = tone_conv(no_syllables, text:sub(1, 1))
				text = text:sub(2, -1)
				local syllable = mw.text.split(text, " ", true)
				for i = 1, no_syllables, 1 do
					if apply_ipa_tone_change and i == no_syllables and indep_words[indep_index + 1] and tone ~= "³³" then
						syl_tone[i] = tone_contours[tone_determ(syllable[i]) .. "-" .. 
							(no_syllables > 1 and "multiple" or "single")]
					end
					syllable[i] = syl_conv(syllable[i])
					syllable[i] = syllable[i] .. syl_tone[i]
				end
				indep_words[indep_index] = table.concat(syllable, " ")
			end
			components[component_index] = table.concat(indep_words, "  ")
		end
		reading[reading_index] = table.concat(components, " ")
	end
	return reading
end

function ipa_tone_conv(no_syllables, citation_tone)
	local tones = tone_contours[no_syllables.."-"..citation_tone] or error("Tone notation is incorrect. See [[WT:WUU]].")
	return mw.text.split(tones, " ", true)
end

function export.ipa_conv(original_text)
	local reading = convert_by_parts(original_text, export.ipa_syl_conv, ipa_tone_conv, true)
	return table.concat(reading, "/, /")
end

function export.ipa_syl_conv(text)
	return generic_syl_conv(text, ipa_initial, ipa_final, ipa_syllabic)
end

function generic_syl_conv(text, initial, final, syllabic)
	if text:find("^%'?h?h?[mn][mg]?g?$") then
		text = text:gsub("^(%'?h?h?)([mn][mg]?g?)$", function(a, b) return (initial[a] or '') ..
			(syllabic[b] or syllabic[a..b] or error(("Invalid syllable: \"%s\""):format(text))) end)
	
	elseif text:find("^%'?ny") then
		text = text:gsub("^([\']?ny)([aeiouyr][aeou]?[aeu]?[nqr]?)$",
			function(a, b) return initial[a] .. (final[b] or error(("Unrecognised final: \"%s\""):format(b))) end)
		
	elseif text:find("^[\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?h?[aeiouyr][aeou]?[aeu]?[nqr]?$") then
		text = text:gsub("^([\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?[h]?)([aeiouyr][aeou]?[aeu]?[nqr]?)$",
			function(a, b) return
				(initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
				(final[b] or error(("Unrecognised final: \"%s\""):format(b))) end)
	else
		return error(("Invalid syllable: \"%s\""):format(text))
	end
	
	return text
end

function export.rom(text)
	if type(text) == 'table' then text = text.args[1] end
	text = mw.text.split(text, ",", true)
	for i = 1, #text, 1 do
		local parts = mw.text.split(text[i], '+', true)
		for i = 1, #parts do
			if string.find(parts[i], '^[0-9%-]') then
				parts[i] = parts[i]:sub(2,-1) .. ' (T' .. parts[i]:sub(1,1) .. ')'
			end
		end
		text[i] = table.concat(parts, ' + ')
	end
	return table.concat(text, "; ")
end

function wugniu_tone_conv(no_syllables, citation_tone)
	local tones = {}
	for i = 1, no_syllables, 1 do
		if i == 1 then
			tones[i] = "<sup>" .. wugniu_tone_map[citation_tone] .. "</sup>"
		else
			tones[i] = ""
		end
	end
	return tones
end

function wugniu_conv(text)
	return generic_syl_conv(text, wugniu_initial, wugniu_final, wugniu_syllabic)
end

function export.wugniu(original_text)
	local reading = convert_by_parts(original_text, wugniu_conv, wugniu_tone_conv, false)
	return table.concat(reading, ", ")
end

return export