Module:hnm-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module is experimental.
The details of its operation have not yet been fully decided upon. Do not deploy widely until the module is finished.

This module does romanisation conversion, IPA conversion, etc. for Hainanese. See {{zh-pron}}.


-- Mostly based on 《海南方言研究》 by 陳波
local export = {}
local m_string_utils = require("Module:string utilities")

local sub = mw.ustring.sub
local len = m_string_utils.len
local match = m_string_utils.match
local split = mw.text.split
local gsplit = mw.text.gsplit

local initial_ipa = {
	["b"] = "ɓ", ["p"] = "pʰ", ["m"] = "m", ["v"] = "b",
	["d"] = "t", ["dd"] = "ɗ", ["dh"] = "d", ["n"] = "n", ["l"] = "l",
	["g"] = "k", ["gh"] = "ɡ", ["ng"] = "ŋ", ["h"] = "x", ["hh"] = "ɦ",
	["z"] = "t͡ɕ", ["s"] = "ɕ", ["y"] = "d͡ʑ",
	[""] = "ʔ"
}

local final_ipa = {
	["i"] = "i", ["u"] = "u",
	["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
	["o"] = "o", ["io"] = "io",
	["e"] = "e", ["ue"] = "ue",
	["ai"] = "ai", ["uai"] = "uai",
	["oi"] = "oi", ["ui"] = "ui",
	["ao"] = "au", ["iao"] = "iau",
	["ou"] = "ou", ["iu"] = "iu",
	["am"] = "am", ["iam"] = "iam",
	["om"] = "om", ["im"] = "iom",
	["an"] = "an", ["uan"] = "uan",
	["in"] = "ien", ["un"] = "uon",
	["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
	["eng"] = "eŋ",
	["ong"] = "oŋ", ["iong"] = "ioŋ",
	["ab"] = "ap", ["iab"] = "iap",
	["ob"] = "op", ["ib"] = "iop",
	["ad"] = "at", ["uad"] = "uat",
	["id"] = "iet", ["ud"] = "uot",
	["ag"] = "ak", ["iag"] = "iak", ["uag"] = "uak",
	["eg"] = "ek",
	["og"] = "ok", ["iog"] = "iok",
	["ih"] = "iʔ", ["uh"] = "uʔ",
	["ah"] = "aʔ", ["iah"] = "iaʔ", ["uah"] = "uaʔ",
	["oh"] = "oʔ", ["ioh"] = "ioʔ",
	["eh"] = "eʔ", ["ueh"] = "ueʔ",
	["oih"] = "oiʔ"
}

local tone_chao = {
	["1"] = "³³", ["2"] = "²²", ["3"] = "³¹",
	["4"] = "¹¹", ["5"] = "⁴²", ["6"] = "⁵³",
	["7A"] = "⁵", ["7B"] = "⁵³", ["8"] = "³", ["8B"] = "⁴²",
	["4S"] = "⁵⁵", ["8S"] = "¹"
}

-- find the tone of the first syllable in a two-syllable word
-- returns nil if the tone of the first syllable does not change
local function tone_sandhi(tone1, tone2)
	if (tone1 == "1") or (tone1 == "4" and match(tone2, "[67]")) or (tone1 == "6" and match(tone2, "[67]")) then
		return "5"
	elseif match(tone1, "[25]") and tone2 ~= "4" then
		return "4"
	elseif tone1 == "3" and match(tone2, "[1-48]") then
		return "6"
	elseif (tone1 == "3" and match(tone2, "[5-7]")) or (tone1 == "7" and match(tone2, "[67]")) then
		return "1"
	elseif tone1 == "4" and match(tone2, "[1-4]") then
		return "4S"
	elseif tone1 == "4" and match(tone2, "[58]") then
		return "7B"
	elseif tone1 == "8" then
		return "8S"
	end
end

-- convert Hainanese Pinyin to IPA
function export.ipa(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	
	local result = {}
	
	for word in gsplit(text, "/") do
		local syllables = split(word, " ")
		local initial, final, tone, sandhi, ipa = {}, {}, {}, {}, {}
		for i, syllable in ipairs(syllables) do
			initial[i], final[i], tone[i] = match(syllable, "^([bpmvdnlghzsy]?[dhg]?)([aeiou][aeioumnbdgh]?[iomnbdgh]?g?)([1-8])$")
			if match(tone[i], "[78]") then
				tone[i] = tone[i] .. (match(final[i], "h$") and "B" or "A")
			end
		end
		if #syllables == 2 then
			sandhi[1] = tone_sandhi(tone[1], tone[2])
		end
		
		for i=1,#syllables,1 do
			actual_tone = tone_chao[tone[i]] .. (sandhi[i] and "⁻" .. tone_chao[sandhi[i]] or "")
			ipa[i] = initial_ipa[initial[i]] .. final_ipa[final[i]] .. actual_tone
		end
		table.insert(result, table.concat(ipa, " "))
	end
	
	return "/" .. table.concat(result, "/, /") .. "/"
end

function export.rom(text)
	text = gsub(text, "/", " / ")
	text = gsub(text, '([1-9-]+)', '<sup>%1</sup>')
	return text
end

return export