Jump to content

Module:zh/data/nan-pron/list

From Wiktionary, the free dictionary

Generates lists of items in Min Nan pronunciation data modules, such as Module:zh/data/nan-pron/000.


local export = {}

local sortedPairs = require("Module:table").sortedPairs
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"

-- Based on tone_to_mark in [[Module:nan-pron]], but omits final stops.
local tone_to_number = {
	[""] = "1",
	["́"] = "2",
	["̀"] = "3",
	["̂"] = "5",
	["̌"] = "6",
	["̄"] = "7",
	["̍"] = "8",
	["̋"] = "9",
}

local decompose = mw.ustring.toNFD
local prepare = require("Module:memoize")(function(item) -- Converts tones to numbers and hopefully puts them at end of syllable.
	return decompose(item)
		:lower()
		:gsub(UTF8_char, tone_to_number)
		:gsub("([0-9])([a-z]+)", "%2%1")
		:gsub("([0-9])ⁿ", "ⁿ%1")
end, true)

local function compare(item1, item2)
	return prepare(item1) < prepare(item2)
end

local function add(t, v)
	t.n = t.n + 1
	t[t.n] = v
end

function export.show(frame)
	local number = frame.args[1]
	local list = require("Module:zh/data/nan-pron/"..number)
	local results = {}
	local old_module = number == "-019"
	if old_module then
		for key in sortedPairs(list, compare) do
			local current = prepare(key:match(UTF8_char))
			if not results[current] then
				results[current] = ""
			else
				results[current] = results[current] .. "[[" .. key .. "]] "
			end
		end
	else
		-- Key is Han characters, value is Pe̍h-ōe-jī.
		for key, val in sortedPairs(list, compare) do
			local current = prepare(val:match(UTF8_char))
			if not results[current] then
				results[current] = { n = 0 }
			else
				add(results[current], "[[" .. key .. "|" .. val .. " " .. key .. "]]")
			end
		end
	end
	
	local result = {}
	local i = 0
	if old_module then
		for _, val in sortedPairs(results) do
			i = i + 1
			result[i] = val
		end
	else
		for _, val in sortedPairs(results) do
			i = i + 1
			result[i] = table.concat(val, " ")
		end
	end
	
	return table.concat(result, "\n\n")
end

return export