Jump to content

Module:cpx-pron

From Wiktionary, the free dictionary


local export = {}
local m_string_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_data = require("Module:cpx-pron/data")

local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC

local SPECIAL_MARKERS = {
	NO_ASSIMILATION = "*",
	NO_SANDHI = "#",
	MANUAL_CHANGE = ">",
	CAPITALIZATION = "^",
	SPACE_AFTER = "\\"
}

local FORMAT_MODES = {
	DEBUG = "debug",
	BRIEF = "brief",
	COMPLETE = "complete"
}

local dialects = {
	pt = "[[w:Putian dialect|Putian]]",
	-- nr = "[[w:Nanri Island|Nanri]]",
	-- jk = "[[w:zh:江口鎮 (莆田市)|Jiangkou]]",
	xy = "[[w:Xianyou dialect|Xianyou]]",
	-- ft = "[[w:zh:楓亭鎮|Fengting]]",
	-- yy = "[[w:zh:游洋鎮|Youyang]]",
}

local initials = {
	pt = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = ""
	},
	xy = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
		[""] = "",
		["bh"] = "β",
	},
}

local finals = {
	pt = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["yo"] = "yɒ",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["ung"] = "uŋ", ["uang"] = "uaŋ", ["yng"] = "yŋ",
		["yong"] = "yɒŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ", ["oeh"] = "œʔ", ["oh"] = "ɔʔ",
		["ih"] = "iʔ", ["iah"] = "iaʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ", ["uah"] = "uaʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yoh"] = "yɒʔ"
	},
	xy = {
		["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "ɵ",
		["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
		["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
		["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["ya"] = "ya",
		["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ",
		["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["yng"] = "yŋ",
		["yeng"] = "yøŋ", ["uong"] = "uoŋ", ["ng"] = "ŋ̍",
		["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ",
		["ih"] = "iʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ",
		["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yeh"] = "yøʔ",
		["iah"] = "iaʔ", ["uah"] = "uaʔ", -- iah, uah only for 代詞促調
		["aⁿ"] = "ã", ["iⁿ"] = "ĩ", ["yⁿ"] = "ỹ", ["orⁿ"] = "ɒ̃", ["aiⁿ"] = "ãĩ", 
		["aoⁿ"] = "ãũ", ["iaⁿ"] = "ĩã", ["iuⁿ"] = "ĩũ", ["uaⁿ"] = "ũã", ["uiⁿ"] = "ũĩ", 
		["yaⁿ"] = "ỹã"
	},
}

-- 1 ~ 7 correspond to 陰平, 陽平, 陰上, 陰去, 陽去, 陰入, 陽入, S are "special tones"
-- S1, S4, S7: the tones sounds a bit like 1, 4, 7 after tone sandhi (according to 莆仙方言大詞典)
-- S3: 代詞促調, act like 陰上 in both Putian and Xianyou after tone sandhi
-- S5: 古陰入, labelled as 陽去 in dictionaries but has its own rule for tone sandhi
local tones = {
	pt = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
		["5"] = "²¹", ["6"] = "¹", ["7"] = "⁴",
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S4"] = "⁴²", ["S5"] = "²¹", ["S7"] = "⁴⁵"
	},
	xy = {
		["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "³³²", ["4"] = "⁴²",
		["5"] = "²¹", ["6"] = "²", ["7"] = "²⁴",
		["S1"] = "⁵⁵", ["S3"] = "³²", ["S5"] = "²¹"
	},
}
 
local sandhi_rules = {
	pt = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6"]="2", ["7"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["6"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="S4", ["6"]="S4", ["7"]="S7"},
		["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="S4", ["6"]="S4", ["7"]="6"},
		["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
	},
	xy = {
		["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="2", ["5"]="2", ["6"]="2", ["7"]="5"},
		["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
		["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
		["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
		["6"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="6"},
		["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
		["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
	}
}

local initial_assimilation_rules = {
	pt = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "", ["p"] = "",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	},
	xy = {
		nasal_final = {
			["b"] = "m", ["p"] = "m", ["m"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
		},
		nasalized_final = {
			["b"] = "m", ["m"] = "m", ["p"] = "m",
			["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
			["g"] = "", ["k"] = "", ["h"] = "",
			["ng"] = "ng",
			[""] = ""
		},
		glottal_final = {}, -- remain unchanged
		other_final = {
			["b"] = "bh", ["p"] = "bh",
			["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
			["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
			["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
		}
	}
}

local buc_initials = {
	["b"] = "b",
	["p"] = "p",
	["m"] = "m",
	["d"] = "d",
	["t"] = "t",
	["n"] = "n",
	["l"] = "l",
	["z"] = "c",
	["c"] = "ch",
	["s"] = "s",
	["g"] = "g",
	["k"] = "k",
	["ng"] = "ng",
	["h"] = "h",
	[""] = ""
}

local buc_finals = {
	["a"] = {{"a", 1}, {"aⁿ", 1}, {"ah", 1}},
	["ae"] = {{"e", 1}},
	["ah"] = {{"ah", 1}},
	["ai"] = {{"ai", 1}},
	["ang"] = {{"ang", 1}},
	["ao"] = {{"au", 1}},
	["e"] = {{"a̤", 1}, {"a̤ⁿ", 1}, {"a̤h", 1}},
	["eh"] = {{"eh", 1}},
	["eng"] = {{"eng", 1}},
	["i"] = {{"i", 1}, {"ih", 1}},
	["ia"] = {{"ia", 2}, {"iaⁿ", 2}, {"iah", 2}},
	["iah"] = {{"iah", 2}},
	["ieh"] = {{"iah", 2}},
	["ieng"] = {{"iang", 2}},
	["ieo"] = {{"a̤u", 2}, {"a̤uⁿ", 2}, {"a̤uh", 2}}, -- on `u`
	["ih"] = {{"ih", 1}},
	["ing"] = {{"ing", 1}},
	["iu"] = {{"iu", 2}},
	["ng"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	["o"] = {{"eo", 2}, {"eoh", 2}},
	["oe"] = {{"e̤", 1}, {"e̤ⁿ", 1}},
	["oeh"] = {{"e̤h", 1}},
	["oeng"] = {{"e̤ng", 1}},
	["oh"] = {{"eoh", 2}},
	["ong"] = {{"eong", 2}},
	["or"] = {{"o̤", 1}, {"o̤ⁿ", 1}, {"o̤h", 1}},
	["orh"] = {{"o̤h", 1}},
	["orng"] = {{"o̤ng", 1}},
	["ou"] = {{"o", 1}},
	["u"] = {{"u", 1}},
	["ua"] = {{"ua", 2}, {"uaⁿ", 2}, {"uah", 2}},
	["uah"] = {{"uah", 2}},
	["uang"] = {{"uang", 2}},
	["ue"] = {{"oi", 1}, {"oiⁿ", 1}, {"oih", 1}}, -- on `o`
	["uh"] = {{"uh", 1}},
	["ui"] = {{"ui", 1}}, -- on `u`
	["ung"] = {{"ng", 1}}, -- actually in the middle of `n` and `g`
	["y"] = {{"ṳ", 1}},
	["yh"] = {{"ṳh", 1}},
	["yng"] = {{"ṳng", 1}},
	["yo"] = {{"io̤", 2}, {"io̤ⁿ", 2}, {"io̤h", 2}},
	["yoh"] = {{"io̤h", 2}},
	["yong"] = {{"io̤ng", 2}}
}

local buc_tones = {
	["1"] = "",	  -- 陰平 null
	["2"] = "́",	  -- 陽平 u+0301
	["3"] = "̂",	  -- 上聲 u+0302
	["4"] = "̍",	  -- 陰去 u+030D
	["5"] = "̄",	  -- 陽去 u+0304
	["S5"] = "̄",	  -- same as above
	["6"] = "",	  -- 陰入 -h
	["7"] = "̍",	  -- 陽入 -h + u+030D
}

local function split_dialect_codes(code_string)
	local codes = {}
	for code in code_string:gmatch("[^,]+") do
		-- Validate dialect code
		if not dialects[code] then
			error("Unsupported dialect: " .. code)
		end
		table.insert(codes, code)
	end
	return codes
end

local function get_syllable_markers(syllable)
	local markers = {
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if syllable:sub(1, 1) == SPECIAL_MARKERS.CAPITALIZATION then
		markers.capitalize = true
		syllable = syllable:sub(2)
	end

	if syllable:sub(-1) == SPECIAL_MARKERS.SPACE_AFTER then
		markers.space_after = true
		syllable = syllable:sub(1, -2)
	end

	if syllable:sub(-1) == "," then
		markers.comma_after = true
		syllable = syllable:sub(1, -2)
	end

	-- Check manual BUC
	local manual_start, manual_end = syllable:find("{[^}]+}")
	if manual_start then
		markers.manual_buc = syllable:sub(manual_start + 1, manual_end - 1)
		syllable = syllable:sub(1, manual_start - 1) .. syllable:sub(manual_end + 1)
	end

	return markers, syllable
end

local function split_initial_final(options)
	if not options or not options.form then
		error("split_initial_final: form is required")
	end

	local form = options.form
	local initial, final

	if form:sub(1, 2) == "bh" then
		initial, final = "bh", form:sub(3)
		require("Module:debug").track('cpx-pron/entries using bh')
	elseif form == "ng" then
		initial, final = "", form
	elseif form:sub(1, 2) == "ng" and #form > 2 then
		initial, final = "ng", form:sub(3)
	else
		initial = form:match("^[bpmnltdzcsghk]h?") or ""
		final = form:sub(#initial + 1)
	end

	if not final or final == "" then
		error("Invalid form: " .. form .. " (unable to extract final)")
	end

	return initial, final
end

-- Phonological rule application functions
local function get_final_type(options)
	if not options or type(options) ~= "table" then
		error("get_final_type: options must be a table")
	end

	local initial = options.initial
	local final = options.final
	local dialect = options.dialect

	if not final then
		error("get_final_type: final cannot be nil")
	end

	if sub(final, -2) == "ng" then
		return "nasal_final"
	elseif sub(final, -1) == "h" then
		return "glottal_final"
	elseif sub(final, -1) == "ⁿ" or 
			(match(initial, "[mn]g?") and 
			get_final_type({initial = "", final = final}) == "other_final" and 
			dialect == "xy") then
		return "nasalized_final"
	elseif sub(final, -1) == "n" then
		error('Please replace the syllable-final "n" with "ⁿ"')
	else
		return "other_final"
	end
end

local function track_buc_issue(reason)
	require("Module:debug").track('cpx-pron/' .. reason)
end

local function combine_buc_syllable(options)
	local initial = options.initial
	local final = options.final
	local tone = options.tone
	local tone_position = options.tone_position

	local tone_mark = buc_tones[tone]
	if not tone_mark then
		error("Invalid tone: " .. tone)
	end

	-- Split the final string into character table
	local chars = {}
	for char in mw.ustring.gmatch(final, ".") do
		table.insert(chars, char)
	end

	-- put tone diacritic
	if #chars >= tone_position then
		chars[tone_position] = chars[tone_position] .. tone_mark
	else
		error("Invalid tone position: " .. tone_position)
	end

	return mw.ustring.toNFC(initial .. table.concat(chars))
end

local function lookup_char_readings(char)
	if not m_data.buc[char] then
		return nil
	end
	return m_data.buc[char]
end

-- Convert single PSP syllable to BUC
local function convert_to_buc_syllable(options)
	local syllable_info = options.syllable_info
	local char = options.char

	-- mw.log("convert_to_buc_syllable - original_initial: " .. syllable_info.original_initial .. 
	--	   ", original_final: " .. syllable_info.original_final .. 
	--	   ", original_tone: " .. syllable_info.original_tone)

	-- If BUC is manually specified, first verify
	if syllable_info.manual_buc then
		local is_valid, error_msg = validate_manual_buc(syllable_info.manual_buc)
		if not is_valid then
			track_buc_issue("manual form incorrect")
			return nil
		end
		return syllable_info.manual_buc
	end

	-- Special handling for S3 tone
	local lookup_tone = syllable_info.original_tone
	local lookup_final = syllable_info.original_final
	if syllable_info.original_tone == "S3" then
		lookup_tone = "3"
		-- Remove final h if present
		if lookup_final:sub(-1) == "h" then
			lookup_final = lookup_final:sub(1, -2)
		end
	end

	-- Get possible BUC finals
	local possible_finals = buc_finals[lookup_final]
	if not possible_finals then
		track_buc_issue("no final found")
		return nil
	end

	-- Get BUC initial
	local initial = buc_initials[syllable_info.original_initial]
	if not initial then
		track_buc_issue("no initial found")
		return nil
	end

	-- No need to look up Hanzi-BUC table if hanzi's and PSP's counts don't match
	if not char then
		if #possible_finals > 1 then
			track_buc_issue("contraction and multiple final found")
			-- temp
			local finals_for_output = {}
			for _, final_info in ipairs(possible_finals) do
				table.insert(finals_for_output, final_info[1])
			end
			return nil
		end
		return combine_buc_syllable({
			initial = initial,
			final = possible_finals[1][1],
			tone = lookup_tone,
			tone_position = possible_finals[1][2]
		})
	end

	-- Generate all possible Pinghua word forms
	local filtered_finals = {} -- special check for BUC tone 7B which merged into tone 2
	for _, final_info in ipairs(possible_finals) do
		local final, tone_position = final_info[1], final_info[2]
		local is_tone_7b_final = final:match("h$")
		local psp_has_h = syllable_info.original_final:match("h$")

		local should_keep = true
		local use_tone = lookup_tone

		if is_tone_7b_final and not psp_has_h then
			if lookup_tone == "2" then
				use_tone = "7"
				final = final .. "*"
			else
				-- Impossible to correspond to this final
				should_keep = false
			end
		end

		if should_keep then
			table.insert(filtered_finals, {
				final = final,
				tone_position = tone_position,
				tone = use_tone
			})
		end
	end

	local candidates = {}
	for _, final_info in ipairs(filtered_finals) do
		local candidate = combine_buc_syllable({
			initial = initial,
			final = final_info.final,
			tone = final_info.tone,
			tone_position = final_info.tone_position
		})
		table.insert(candidates, candidate)
	end

	if #candidates == 1 then
		return candidates[1]
	end

	local char_readings = lookup_char_readings(char)

	if not char_readings then
		track_buc_issue("cannot look up table")
		return nil
	end

	local matches = {}
	for _, candidate in ipairs(candidates) do
		for _, reading in ipairs(char_readings) do
			if candidate == reading then
				table.insert(matches, candidate)
			end
		end
	end

	if #matches == 0 then
		track_buc_issue("no matching reading found")
		return nil
	elseif #matches > 1 then
		track_buc_issue("multiple matching readings found")
		return nil
	end

	return matches[1]
end

local function generate_buc(options)
	if not options.syllable_infos then
		error("Missing required syllable_infos in generate_buc")
	end

	if options.dialect ~= "pt" then
		return nil
	end

	local page_title = mw.title.getCurrentTitle().text

	local chars = mw.ustring.gsub(page_title, "[\n\r\t ,]", "")
	local char_count = mw.ustring.len(chars)

	-- Check if #PSP syllables == #hanzi
	local syllable_count = #options.syllable_infos
	local check_char_table = (syllable_count == char_count)

	local buc_syllables = {}

	for i, syllable_info in ipairs(options.syllable_infos) do
		if syllable_info.manual_buc then
			table.insert(buc_syllables, syllable_info.manual_buc)
		else
			-- Get possible BUC
			local syllable_result = convert_to_buc_syllable({
				syllable_info = syllable_info,
				char = check_char_table and mw.ustring.sub(chars, i, i) or nil,
				word = options.word
			})

			-- If any syllable cannot be uniquely identified
			if not syllable_result then
				return nil
			end

			if syllable_info.capitalize then
				local normalized = mw.ustring.toNFD(syllable_result)
				local first_char = mw.ustring.sub(normalized, 1, 1)
				syllable_result = mw.ustring.toNFC(
					mw.ustring.upper(first_char) .. 
					mw.ustring.sub(normalized, 2)
				)
			end

			table.insert(buc_syllables, syllable_result)
		end
	end

	-- concat syllables
	local result = {}
	for i = 1, #buc_syllables do
		table.insert(result, buc_syllables[i])
		if i < #buc_syllables then
			if options.syllable_infos[i].comma_after then
				table.insert(result, ", ")
			elseif options.syllable_infos[i].space_after then
				table.insert(result, " ")
			else
				table.insert(result, "-")
			end
		end
	end

	return table.concat(result)
end

local function split_syllable(syllable)
	-- Initialize result table
	local components = {
		orig_form = nil,
		changed_form = nil,
		tone_part = nil,
		orig_initial = nil,
		orig_final = nil,
		changed_initial = nil,
		changed_final = nil,
		orig_tone = nil,
		manual_sandhi_tone = nil,
		no_sandhi = false,
		no_assimilation = false,
		-- BUC (only for Putian)
		capitalize = false,
		space_after = false,
		comma_after = false,
		manual_buc = nil
	}

	if not syllable or syllable == "" then
		error("Invalid syllable: " .. tostring(syllable))
	end

	local markers, cleaned_syllable = get_syllable_markers(syllable)
	components.capitalize = markers.capitalize
	components.space_after = markers.space_after
	components.comma_after = markers.comma_after
	components.manual_buc = markers.manual_buc
	syllable = cleaned_syllable

	components.no_assimilation = syllable:sub(1, 1) == SPECIAL_MARKERS.NO_ASSIMILATION
	if components.no_assimilation then
		syllable = syllable:sub(2)
	end

	components.no_sandhi = syllable:sub(-1) == SPECIAL_MARKERS.NO_SANDHI
	if components.no_sandhi then
		syllable = syllable:sub(1, -2)
	end

	if syllable:find(SPECIAL_MARKERS.MANUAL_CHANGE) then
		components.orig_form, components.changed_form, components.tone_part = 
			syllable:match("(.-)>(.-)([1-7S]+.*)$")
	else
		components.orig_form, components.tone_part = 
			syllable:match("(.-)([1-7S]+.*)$")
		components.changed_form = components.orig_form
	end

	-- If the segmentation is not correct
	if not components.orig_form or not components.tone_part then
		error("Invalid syllable format: " .. syllable)
	end

	-- Process form components
	components.orig_initial, components.orig_final = 
		split_initial_final({form = components.orig_form})
	components.changed_initial, components.changed_final = 
		split_initial_final({form = components.changed_form})

	-- Process tone components
	if components.tone_part:find("-") then
		components.orig_tone, components.manual_sandhi_tone = 
			components.tone_part:match("^([1-7S]+)%-([1-7S]+)$")
		require("Module:debug").track('cpx-pron/manual sandhi tone')
	else
		components.orig_tone = components.tone_part
	end

	-- Special tone processing
	if components.orig_tone == '3' and components.changed_final:sub(-1) == 'h' then
		components.orig_tone = 'S3'
	end

	-- final validation
	if not (components.orig_initial and components.orig_final and components.orig_tone) then
		error("Unable to parse syllable: " .. syllable)
	end

	return components
end

local function create_syllable_info(options)
	local syllable_components = split_syllable(options.syllable)

	return {
		original_initial = syllable_components.orig_initial,
		original_final = syllable_components.orig_final,
		original_tone = syllable_components.orig_tone,
		changed_initial = syllable_components.changed_initial,
		changed_final = syllable_components.changed_final,
		changed_tone = syllable_components.orig_tone,  -- default: original tone
		no_sandhi = syllable_components.no_sandhi,
		no_assimilation = syllable_components.no_assimilation,
		is_first_syllable = options.is_first_syllable,
		manual_sandhi_tone = syllable_components.manual_sandhi_tone,
		-- BUC
		capitalize = syllable_components.capitalize,
		space_after = syllable_components.space_after,
		comma_after = syllable_components.comma_after,
		manual_buc = syllable_components.manual_buc
	}
end

-- Syllable processing functions
local function create_syllable_infos(options)
	local syllable_infos = {}
	for syllable in options.word:gmatch("%S+") do
		local syllable_options = {
			syllable = syllable,
			is_first_syllable = #syllable_infos == 0
		}
		table.insert(syllable_infos, create_syllable_info(syllable_options))
	end
	return syllable_infos
end

local function post_process_nasalization(options)
	local syllable = options.syllable

	-- Remove duplicate nasalization
	if syllable.changed_initial:match("^[mn]g?") and 
		syllable.changed_final:match("ⁿ$") then
		syllable.changed_final = syllable.changed_final:gsub("ⁿ$", "")
	end

	-- Simplify ng-initial syllables
	if syllable.changed_initial == "ng" and 
		syllable.changed_final == "ng" then
		syllable.changed_initial = ""
	end
end

local function get_sandhi_tone(options)
	local curr_syllable = options.curr_syllable
	local next_syllable = options.next_syllable
	local dialect = options.dialect

	-- Handle manual tone specification
	if curr_syllable.manual_sandhi_tone then
		return curr_syllable.manual_sandhi_tone
	end

	-- Handle final syllable
	if not next_syllable then
		return curr_syllable.original_tone
	end

	-- Apply sandhi rules
	local current_tone = curr_syllable.original_tone
	local next_tone = next_syllable.original_tone
	-- S5 is treated as regular 5 in non-sandhi position
	next_tone = gsub(next_tone, "S5", "5")

	return sandhi_rules[dialect][current_tone][next_tone] or 
			curr_syllable.original_tone
end

local function apply_sandhi(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos

	for i = 1, #syllable_infos do
		local curr_syllable = syllable_infos[i]
		local next_syllable = syllable_infos[i + 1]

		-- No sandhi if one of the following conditions are met
		-- 1. there is a no_sandhi mark
		-- 2. syllable followed by a comma
		-- 3. is the last syllable
		if curr_syllable.no_sandhi or 
			curr_syllable.comma_after or 
			-- curr_syllable.space_after or 
			not next_syllable then
			curr_syllable.changed_tone = curr_syllable.original_tone
		else
			curr_syllable.changed_tone = get_sandhi_tone({
				curr_syllable = curr_syllable,
				next_syllable = next_syllable,
				dialect = dialect
			})
		end

		-- Special tone adjustment for glottal finals
		if curr_syllable.changed_tone == '3' and 
			curr_syllable.changed_final:sub(-1) == 'h' then
			curr_syllable.changed_tone = 'S3'
		end
	end
end

local function apply_initial_assimilation(options)
	local dialect = options.dialect
	local syllable_infos = options.syllable_infos
	local result = {}

	-- Handle first syllable
	result[1] = syllable_infos[1]
	result[1].is_first_syllable = true

	-- Process subsequent syllables
	for i = 2, #syllable_infos do
		local prev_syllable = result[i-1]
		local curr_syllable = syllable_infos[i]

		-- No initial assimilation if one of the following conditions are met:
		-- 1. there is a no_assimilation mark
		-- 2. the previous syllable is followed by a comma
		if not curr_syllable.no_assimilation and 
			not prev_syllable.comma_after and
			curr_syllable.changed_initial == curr_syllable.original_initial then
			-- Get the type of the previous syllable's final
			local final_type = get_final_type({
				initial = prev_syllable.changed_initial,
				final = prev_syllable.changed_final,
				dialect = dialect
			})

			-- Special rule for nasalized finals
			local should_apply_nasal_rule = 
				final_type == "other_final" and
				curr_syllable.original_initial:match("^[bpdtzcs]") and
				get_final_type({
					initial = curr_syllable.original_initial,
					final = curr_syllable.original_final,
					dialect = dialect
				}) == "nasalized_final"

			if should_apply_nasal_rule then
				final_type = "nasal_final"
			end

			-- Apply assimilation rules
			curr_syllable.changed_initial = 
				initial_assimilation_rules[dialect][final_type][curr_syllable.original_initial] or 
				curr_syllable.original_initial
		end

		-- Post-process nasalization
		local post_process_options = {
			syllable = curr_syllable,
			dialect = dialect
		}
		post_process_nasalization(post_process_options)

		table.insert(result, curr_syllable)
	end

	return result
end

local function generate_actual_pronunciation(syllable_infos)
	local pronunciations = {}

	for _, syllable in ipairs(syllable_infos) do
		-- Combine the changed components
		local pronunciation = syllable.changed_initial ..
							syllable.changed_final ..
							syllable.changed_tone

		table.insert(pronunciations, pronunciation)
	end

	return table.concat(pronunciations, " ")
end

local function get_ipa_value(options)
	-- Validation
	if not options.type or not options.dialect or not options.value then
		error("Missing required parameter for IPA lookup")
	end

	-- Get the appropriate lookup table
	local lookup_tables = {
		initials = initials,
		finals = finals,
		tones = tones
	}

	local table = lookup_tables[options.type]
	if not table then
		error("Invalid lookup type: " .. options.type)
	end

	-- Get the result
	local result = table[options.dialect] and 
				  table[options.dialect][options.value]

	-- Handle variant finals
	if not result and options.type == "finals" then
		local final_variants = {
			["au"] = "ao",
			["iang"] = "ieng",
			["ieu"] = "ieo",
			["iau"] = "ieo",
			["iao"] = "ieo",
			["uai"] = "ue",
			["uei"] = "ue",
			["yoeh"] = "yeh",
			["yoeng"] = "yeng",
			["yor"] = "yo",
			["yorh"] = "yoh",
			["yorng"] = "yong"
		}

		if final_variants[options.value] then
			error(string.format(
				'Invalid final: %s. Please use "%s" instead.',
				options.value,
				final_variants[options.value]
			))
		end
	end

	-- Error if no result found
	if not result then
		error(string.format(
			"Invalid %s: %s",
			options.type:sub(1, -2),
			options.value
		))
	end

	return result
end

local function get_ipa_components(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get basic components
	local components = {
		initial = get_ipa_value({
			type = "initials",
			dialect = dialect,
			value = syllable_info.changed_initial
		}),
		final = get_ipa_value({
			type = "finals",
			dialect = dialect,
			value = syllable_info.changed_final
		}),
		tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.original_tone
		})
	}

	-- Handle tone change
	if syllable_info.changed_tone ~= syllable_info.original_tone then
		local sandhi_tone = get_ipa_value({
			type = "tones",
			dialect = dialect,
			value = syllable_info.changed_tone
		})

		if not sandhi_tone then
			error("Invalid sandhi tone: " .. syllable_info.changed_tone .. 
				  " for dialect: " .. dialect)
		end

		components.tone = components.tone .. "⁻" .. sandhi_tone
	end

	return components
end

local function get_original_initial_display(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Only show original initial for non-first syllables with changes
	if syllable_info.is_first_syllable or
		(syllable_info.original_initial == syllable_info.changed_initial and
		syllable_info.original_initial == syllable_info.changed_initial) then
		return ""
	end

	-- Handle empty initial case
	if syllable_info.original_initial == "" then
		return "<sup>(Ø-)</sup>"
	end

	-- Get IPA for original initial
	local ipa_initial = get_ipa_value({
		type = "initials",
		dialect = dialect,
		value = syllable_info.original_initial
	})

	return "<sup>(" .. ipa_initial .. "-)</sup>"
end

local function syllable_to_ipa(options)
	local syllable_info = options.syllable_info
	local dialect = options.dialect

	-- Get IPA components
	local ipa_components = get_ipa_components({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Generate display for changed initial if needed
	local original_initial_display = get_original_initial_display({
		syllable_info = syllable_info,
		dialect = dialect
	})

	-- Combine all parts
	return original_initial_display .. 
			ipa_components.initial .. 
			ipa_components.final .. 
			ipa_components.tone
end

-- Generate IPA for the syllables
local function generate_ipa(options)
	if not options or not options.syllable_infos then
		error("Missing required syllable_infos in generate_ipa")
	end

	local syllable_infos = options.syllable_infos
	local dialect = options.dialect

	local ipa_parts = {}
	for _, syllable_info in ipairs(syllable_infos) do
		table.insert(ipa_parts, syllable_to_ipa({
			syllable_info = syllable_info,
			dialect = dialect
		}))
	end

	return table.concat(ipa_parts, " ")
end

-- Process a single pronunciation entry
local function process_pronunciation(options)
	local result = {
		dialect_codes = options.dialect_codes,
		word = options.word,
		processed = {},
		index = options.index
	}

	local dialect_list = split_dialect_codes(options.dialect_codes)

	-- Create syllable info objects for each syllable
	local syllable_options = {
		word = options.word,
		is_first_syllable = true
	}
	local original_syllable_infos = create_syllable_infos(syllable_options)

	-- Process for each dialect
	for _, dialect in ipairs(dialect_list) do
		local syllable_infos = m_table.deepCopy(original_syllable_infos)

		-- Apply phonological rules
		local processed_syllables = apply_initial_assimilation({
			dialect = dialect,
			syllable_infos = syllable_infos
		})
		apply_sandhi({
			dialect = dialect,
			syllable_infos = processed_syllables
		})

		-- Generate IPA and collect results
		local pronunciation_result = {
			dialect = dialect,
			original = options.word,
			actual = generate_actual_pronunciation(processed_syllables),
			ipa = generate_ipa({
				syllable_infos = processed_syllables,
				dialect = dialect
			}),
			index = options.index
		}

		-- Generate BUC only for Putian
		if dialect == "pt" then
			pronunciation_result.buc = generate_buc({
				syllable_infos = processed_syllables,
				dialect = dialect,
				word = options.word
			})
		end

		table.insert(result.processed, pronunciation_result)
	end

	return result
end

-- Formatting helper functions
local function font_consolas(text)
	return '<span class="zhpron-monospace">' .. text .. '</span>'
end

local function font_ipa(text)
	return '<span class="IPA">/' .. text .. '/</span>'
end

local function clear_pinging_format(text)
	if not text then
		return ""
	end
	return text:gsub("%-S?%d", "") -- remove tone sandhi
			  :gsub(">[a-zⁿ]+", "") -- remove irregular sound change
			  :gsub("[#*^\\]+", "") -- remove special symbols
			  :gsub("{[^}]+}", "") -- remove manual BUC
			  :gsub("(%d)", "<sup>%1</sup>") -- superscript tone numbers
			  :gsub("S", "") -- remove "S" in special tones
end

-- Output formatting functions
local function format_debug_output(options)
	local results = options.results
	local output = {}

	for _, result in ipairs(results) do
		for _, processed in ipairs(result.processed) do
			local debug_parts = {
				processed.dialect .. ": ",
				processed.original
			}

			-- Add actual pronunciation if different
			if processed.original ~= processed.actual then
				table.insert(debug_parts, " → ")
				table.insert(debug_parts, processed.actual)
			end

			-- Add IPA
			table.insert(debug_parts, " /")
			table.insert(debug_parts, processed.ipa)
			table.insert(debug_parts, "/")

			table.insert(output, table.concat(debug_parts))
		end
	end

	return table.concat(output, ", ")
end

local function format_dialect_info(options)
	local dialect_names = {}
	for _, code in ipairs(options.dialect_codes) do
		table.insert(dialect_names, dialects[code] or code)
	end

	local prefix = options.include_full_title and 
				  "\n*: <small>(<i>" or
				  "\n** <small>(''"

	local suffix = options.include_full_title and 
				  ")</i>)</small>: " or
				  "'')</small>"

	local dialect_str = ""
	if options.include_full_title and #dialect_names == 1 then
		dialect_str = dialect_names[1] .. ", "
	end
					  
	return prefix .. dialect_str .. "[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]" .. suffix
end

function format_brief_output(options)
	local results = options.results
	local output_parts = {}
	local dialect_codes = {}
	local seen_pronunciations = {} 
	local order = {}

	-- Collecte Pronunciation and dialect codes in their original order
	for _, result in ipairs(results) do
		if result.processed and #result.processed > 0 then
			local original = result.processed[1].original
			local cleared_text = clear_pinging_format(original)

			-- If the cleaned pronunciation has not appeared before, record its order
			if not seen_pronunciations[cleared_text] then
				seen_pronunciations[cleared_text] = {
					original = original,
					index = result.index
				}
				table.insert(order, cleared_text)
			end

			-- Collect dialect codes
			for _, processed in ipairs(result.processed) do
				if not dialect_codes[processed.dialect] then
					dialect_codes[processed.dialect] = true
				end
			end
		end
	end

	local dialect_codes_array = {}
	for code, _ in pairs(dialect_codes) do
		table.insert(dialect_codes_array, code)
	end

	local output = " " -- "Puxian Min" already written in zh-pron
	if #dialect_codes_array == 1 then
		output = output .. "<small>(<i>" .. dialects[dialect_codes_array[1]] .. ", "
	else
		output = output .. "<small>(<i>"
	end
	output = output .. "[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i>): </small>"

	-- Generate the pronunciation parts in the original order
	if #order > 0 then
		local formatted = {}
		for _, cleared_text in ipairs(order) do
			table.insert(formatted, cleared_text)
		end
		output = output .. font_consolas(table.concat(formatted, " / "))
	end

	return output
end

function format_complete_output(options)
	local results = options.results
	local output = {}

	-- Organize pronunciation data in the order in which they were entered
	local ordered_pronunciations = {}
	for _, result in ipairs(results) do
		for _, processed in ipairs(result.processed) do
			table.insert(ordered_pronunciations, {
				original = processed.original,
				actual = processed.actual,
				ipa = processed.ipa,
				buc = processed.buc,
				dialect = processed.dialect,
				index = result.index,
				input_order = #ordered_pronunciations + 1
			})
		end
	end

	table.sort(ordered_pronunciations, function(a, b)
		return a.index < b.index
	end)

	-- Group by pronunciation, but keep the original order
	local grouped_by_pron = {}
	local order = {}
	for _, pron in ipairs(ordered_pronunciations) do
		local key = pron.original .. "|" .. pron.actual .. "|" .. pron.ipa
		if not grouped_by_pron[key] then
			grouped_by_pron[key] = {
				data = pron,
				dialects = {},
				index = pron.index
			}
			table.insert(order, key)
		end
		table.insert(grouped_by_pron[key].dialects, pron.dialect)
	end

	-- Output in original order
	for _, key in ipairs(order) do
		local group = grouped_by_pron[key]

		-- Dialect names
		local dialect_names = {}
		for _, dialect in ipairs(group.dialects) do
			table.insert(dialect_names, dialects[dialect])
		end
		table.insert(output, "\n** <small>(<i>" .. table.concat(dialect_names, ", ") .. "</i>)</small>")

		-- Pouseng Ping'ing
		table.insert(output, "\n*** <small><i>[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i></small>: " ..
			font_consolas(clear_pinging_format(group.data.original)))

		if clear_pinging_format(group.data.original) ~= clear_pinging_format(group.data.actual) then
			table.insert(output, font_consolas(
				" [<small>Phonetic</small>: " .. clear_pinging_format(group.data.actual)) ..
				"]")
		end

		-- BUC
		if group.data.dialect == "pt" and group.data.buc then
			table.insert(output, "\n*** <small>''[[w:Hinghwa Romanized|Báⁿ-uā-ci̍]]''</small>: " .. 
						font_consolas(group.data.buc:gsub("%*", "")))
		end

		-- IPA
		table.insert(output, '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] ' ..
			'<sup>([[w:Pu–Xian Min|key]])</sup></small>: ' ..
			font_ipa(group.data.ipa))
	end

	return table.concat(output)
end

-- Main entry point
function export.rom_display(text, mode)
	if type(text) == "table" then
		text = text.args[1]
	end

	-- Parameter validation
	if not text or text == "" then
		error("Invalid input: text must be a non-empty string")
	end

	mode = mode or FORMAT_MODES.DEBUG

	local pronunciation_data = {
		results = {},
		mode = mode
	}

	-- Process each pronunciation in the input
	local index = 1
	for pronunciation in text:gmatch("[^/]+") do
		local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
		if not dialect_codes or not word then
			error("Invalid input format: " .. pronunciation)
		end

		local pron_options = {
			dialect_codes = dialect_codes,
			word = word,
			index = index
		}
		table.insert(pronunciation_data.results, 
					process_pronunciation(pron_options))
		index = index + 1
	end

	-- Format output according to the specified mode
	if mode == FORMAT_MODES.DEBUG then
		return format_debug_output(pronunciation_data)
	elseif mode == FORMAT_MODES.BRIEF then
		return format_brief_output(pronunciation_data)
	elseif mode == FORMAT_MODES.COMPLETE then
		return format_complete_output(pronunciation_data)
	else
		error("Unsupported mode: " .. mode)
	end
end

-- Convert single BUC syllable to PSP
local function syllable_to_psp(input)
	local buc_to_psp_initials = {
		["b"] = "b", ["ch"] = "c", ["c"] = "z",
		["d"] = "d", ["g"] = "g", ["h"] = "h",
		["k"] = "k", ["l"] = "l", ["m"] = "m",
		["ng"] = "ng", ["n"] = "n", ["p"] = "p",
		["s"] = "s", ["t"] = "t", [""] = ""
	}

	local buc_to_psp_finals = {
		["a"] = "a",
		["aⁿ"] = "a",
		["ah"] = "ah",
		["ah*"] = "a",
		["ai"] = "ai",
		["ang"] = "ang",
		["au"] = "ao",
		["a̤"] = "e",
		["a̤ⁿ"] = "e",
		["a̤h"] = "eh",
		["a̤h*"] = "e",
		["e"] = "ae",
		["eh"] = "eh",
		["eng"] = "eng",
		["e̤"] = "oe",
		["e̤ⁿ"] = "oe",
		["e̤h"] = "oeh",
		["e̤ng"] = "oeng",
		["i"] = "i",
		["ih"] = "ih",
		["ih*"] = "i",
		["ing"] = "ing",
		["ia"] = "ia",
		["iaⁿ"] = "ia",
		["iah"] = "iah",
		["iah*"] = "ia",
		["iang"] = "ieng",
		["iu"] = "iu",
		["o"] = "ou",
		["o̤"] = "or",
		["o̤ⁿ"] = "or",
		["o̤h"] = "orh",
		["o̤h*"] = "or",
		["o̤ng"] = "orng",
		["eo"] = "o",
		["eoh"] = "oh",
		["eoh*"] = "o",
		["eong"] = "ong",
		["u"] = "u",
		["uh"] = "uh",
		["ua"] = "ua",
		["uaⁿ"] = "ua",
		["uah"] = "uah",
		["uah*"] = "ua",
		["uang"] = "uang",
		["ui"] = "ui",
		["oi"] = "ue",
		["oiⁿ"] = "ue",
		["oih"] = "ue",
		["oih*"] = "ue",
		["ṳ"] = "y",
		["ṳh"] = "yh",
		["ṳng"] = "yng",
		["io̤"] = "yo",
		["io̤ⁿ"] = "yo",
		["io̤h"] = "yoh",
		["io̤h*"] = "yo",
		["io̤ng"] = "yong",
		["ng"] = "ng",
		["a̤u"] = "ieo",
		["a̤uⁿ"] = "ieo",
		["a̤uh"] = "ieoh",
		["a̤uh*"] = "ieo"
	}

	-- Handle input parameter
	local syllable
	if type(input) == "table" then
		syllable = input.args[1]
	else
		syllable = input
	end

	if not syllable or syllable == "" then
		return syllable
	end

	-- Try to convert the syllable, return original if any error occurs
	local success, result = pcall(function()
		-- Decompose the syllable and check for validity
		local decomposed = mw.ustring.toNFD(syllable)
		if not decomposed then
			return syllable
		end

		-- Extract and remove tone marks
		local tone = ""
		if decomposed:find("́") then		  -- Tone 2: COMBINING ACUTE ACCENT
			tone = "2"
			decomposed = decomposed:gsub("́", "")
		elseif decomposed:find("̂") then	  -- Tone 3: COMBINING CIRCUMFLEX ACCENT
			tone = "3"
			decomposed = decomposed:gsub("̂", "")
		elseif decomposed:find("̍") then	  -- Tone 4/7: COMBINING VERTICAL LINE ABOVE
			if decomposed:find("h%*$") then   -- Special case: -h* ending -> tone 2
				if not decomposed:find("̍") then  -- If has h* but no vertical line
					return syllable
				end
				tone = "2"
			elseif decomposed:find("h$") then
				tone = "7"
			else
				tone = "4"
			end
			decomposed = decomposed:gsub("̍", "")
		elseif decomposed:find("̄") then	  -- Tone 5: COMBINING MACRON
			tone = "5"
			decomposed = decomposed:gsub("̄", "")
		else
			-- No tone mark: either tone 1 (no -h) or tone 6 (with -h)
			if decomposed:find("h$") and not decomposed:find("h%*$") then
				tone = "6"
			else
				tone = "1"
			end
		end

		-- Recompose and check validity
		local normalized = mw.ustring.toNFC(decomposed)
		if not normalized then
			return syllable
		end

		-- Special case: standalone `ng` syllable after tone removal
		if normalized == "ng" then
			return "ng" .. tone
		end

		-- Extract initial
		local initial = ""
		if normalized:match("^[Cc][Hh]") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^[Nn][Gg]") then
			initial = normalized:sub(1, 2):lower()
			normalized = normalized:sub(3)
		elseif normalized:match("^[BbCcDdFfGgHhKkLlMmNnPpSsTt]") then
			initial = normalized:sub(1, 1):lower()
			normalized = normalized:sub(2)
		end
		
		local psp_initial = buc_to_psp_initials[initial] or ""

		-- Process final
		-- Remove -h* marker if present (affects tone but not final lookup)
		local final = normalized:gsub("h%*$", "")

		-- Look up PSP final
		local psp_final = buc_to_psp_finals[final]
		if not psp_final then
			return syllable
		end

		-- Combine all parts to form complete PSP syllable
		return (psp_initial .. psp_final .. tone):lower()
	end)

	-- Return original syllable if conversion failed
	return success and result or syllable
end

-- Convert BUC to PSP (both single syllable and text)
function export.buc_to_psp(input)
	-- Handle input parameter
	local text
	if type(input) == "table" then
		text = input.args[1]
	else
		text = input
	end

	if not text or text == "" then
		return text
	end

	-- Split text into parts by delimiters while keeping delimiters
	local parts = {}
	local last_pos = 1
	local pattern = "[%s%-%.,;:!%?,。;:!?「」『』、]"

	for pos, delimiter in mw.ustring.gmatch(text, "()("..pattern..")") do
		if pos > last_pos then
			table.insert(parts, mw.ustring.sub(text, last_pos, pos - 1))
		end
		table.insert(parts, delimiter)
		last_pos = pos + mw.ustring.len(delimiter)
	end

	-- Handle the last part
	if last_pos <= mw.ustring.len(text) then
		table.insert(parts, mw.ustring.sub(text, last_pos))
	end

	-- Convert syllables and keep delimiters
	for i = 1, #parts do
		if not parts[i]:match("^[%s%-%.,;:!%?,。;:!?「」『』、]$") then
			parts[i] = syllable_to_psp(parts[i])
		end
	end

	return table.concat(parts)
end

return export