Jump to content

Module:tl-sortkey

From Wiktionary, the free dictionary

This module will sort Tagalog language text. It is also used to sort Bikol Central, Cebuano, Hiligaynon, Hanunoo, Ilocano, Kankanaey, Kapampangan, and Waray-Waray. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:tl-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

Alphabetic order: a b c d e ë f g h i j k l m n ñ ng o p q r s t u v w x y z.

Examples

  • ANDAR¹
andar
  • AN₃¹
ang
  • AN₃AL¹
angal
  • AN₃AL¹
ang̃al
  • ANTALA¹
antala
  • AN₂O¹
Año
  • BAMBAN¹
Bamban
  • BAMBAN₃¹
Bambang
  • BANAL¹
banal
  • BAN₃A¹
banga
  • BAN₃GA¹
bangga
  • BATANES¹
Batanes
  • BATAN₃AS¹
Batangas
  • BIN₂AN¹
Biñan
  • BINIBINI¹
binibini
  • BINYAG¹
binyag
  • BIN₃I¹
bingi

local export = {}
local u = mw.ustring.char
local a, b = u(0xF000), u(0xF001)

local oneChar = {
		["ë"] = "e" .. a, ["ñ"] = "n" .. a
}

local twoChars = {
	["ng"] = "n" .. b, ["ng̃"] = "n" .. b, ["ñg"] = "n" .. b
}

function export.makeSortKey(text, lang, sc)
	
	text = mw.ustring.gsub(text, "([!-&])([^%s]+)", "%2%1")
	
	for from, to in pairs(twoChars) do
		text = text:gsub(from, to)
	end
	
	return mw.ustring.upper(mw.ustring.gsub(mw.ustring.toNFC(text), ".", oneChar))
end

local tl = require("Module:languages").getByCode("tl")
local function tag(text)
	return require("Module:script utilities").tag_text(text, tl)
end

local showsubst = {
	[a] = "₂",
	[b] = "₃"
}

function export.showSortkey(frame)
	local output = {}
	
	for _, word in ipairs(frame.args) do
		local sc = tl:findBestScript(word):getCode()
		local sortkey = mw.ustring.gsub(export.makeSortKey(word, "tl", sc), ".", showsubst)
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])$", "%1¹")
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])(%s)", "%1¹%2")
		local example = "\n* <code>" .. sortkey .. "</code>\n: " .. tag(word)
		table.insert(output, example)
	end
	
	return table.concat(output)
end

function export.showSorting(frame)
	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:memoize")(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1) < makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		local sc = tl:findBestScript(term):getCode()
		local sortkey = mw.ustring.gsub(export.makeSortKey(term, "tl", sc), ".", showsubst)
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])$", "%1¹")
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])(%s)", "%1¹%2")
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. sortkey .. "</code>)"
	end
	
	return table.concat(terms)
end

return export