Jump to content

Module:Tibt-sortkey

From Wiktionary, the free dictionary


local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local Tibt = require("Module:Tibt-common")
local a, b, c, d = u(0xE000), u(0xE001), u(0xE002), u(0xE003)

local letters = {
	{"ཀ", "ྐ"}, {"ཀ༹", "ྐ༹", ""}, {"ཫ", "ཫ"}, {"ཫ༹", "ཫ༹", ""}, {"ཁ", "ྑ"}, {"ཁ༹", "ྑ༹", ""}, {"ག", "ྒ"}, {"ག༹", "ྒ༹", ""}, {"ང", "ྔ"}, {"ང༹", "ྔ༹", ""}, {"ཅ", "ྕ"}, {"ཆ", "ྖ"}, {"ཇ", "ྗ"}, {"ཉ", "ྙ"}, {"ཉ༹", "ྙ༹", ""}, {"ཊ", "ྚ"}, {"ཊ༹", "ྚ༹", ""}, {"ཋ", "ྛ"}, {"ཋ༹", "ྛ༹", ""}, {"ཌ", "ྜ"}, {"ཌ༹", "ྜ༹", ""}, {"ཎ", "ྞ"}, {"ཎ༹", "ྞ༹", ""}, {"ཏ", "ྟ"}, {"ཏ༹", "ྟ༹", ""}, {"ཐ", "ྠ"}, {"ཐ༹", "ྠ༹", ""}, {"ད", "ྡ"}, {"ད༹", "ྡ༹", ""}, {"ན", "ྣ"}, {"ན༹", "ྣ༹", ""}, {"པ", "ྤ"}, {"པ༹", "ྤ༹", ""}, {"ཕ", "ྥ"}, {"ཕ༹", "ྥ༹", ""}, {"བ", "ྦ"}, {"བ༹", "ྦ༹", ""}, {"མ", "ྨ"}, {"མ༹", "ྨ༹", ""}, {"ཙ", "ྩ"}, {"ཚ", "ྪ"}, {"ཛ", "ྫ"}, {"ཝ", "ྭ"}, {"ཝ༹", "ྭ༹", ""}, {"ཞ", "ྮ"}, {"ཞ༹", "ྮ༹", ""}, {"ཟ", "ྯ"}, {"ཟ༹", "ྯ༹", ""}, {"འ", "ྰ"}, {"འ༹", "ྰ༹", ""}, {"ཡ", "ྱ"}, {"ཡ༹", "ྱ༹", ""}, {"ར", "ྲ"}, {"ར༹", "ྲ༹", ""}, {"ཬ", "ཬ"}, {"ཬ༹", "ཬ༹", ""}, {"ལ", "ླ"}, {"ལ༹", "ླ༹", ""}, {"ཤ", "ྴ"}, {"ཤ༹", "ྴ༹", ""}, {"ཥ", "ྵ"}, {"ཥ༹", "ྵ༹", ""}, {"ས", "ྶ"}, {"ས༹", "ྶ༹", ""}, {"ཧ", "ྷ"}, {"ཧ༹", "ྷ༹", ""}, {"ཨ", "ྸ"}, {"ཨ༹", "ྸ༹", ""}, {"ཱ", "ཱ"}, {"ི", "ི"}, {u(0xF73), "ཱི"}, {"ུ", "ུ"}, {u(0xF75), "ཱུ"}, {u(0xF76), "ྲྀ"}, {u(0xF77), "ྲཱྀ"}, {u(0xF78), "ླྀ"}, {u(0xF79), "ླཱྀ"}, {"ེ", "ེ"}, {"ཻ", "ཻ"}, {"ོ", "ོ"}, {"ཽ", "ཽ"}
}

local function findAffixes(text, mainStack)
	return (gsub(text, "(.*)" .. mainStack .. ".*", "%1")), (gsub(text, ".*" .. mainStack .. "(.*)", "%1"))
end

local function findVowel(mainStack)
	return (gsub(mainStack, "[ཱ-ཽྀ]+", "")), match(mainStack, "[ཱ-ཽྀ]+") or ""
end

local function mainStackParts(mainStack)
	local superjoined = match(mainStack, "(ར)[ྐྒྔྗྙྟྡྣྦྨྩྫ]") or match(mainStack, "(ལ)[ྐྒྔྕྗྟྡྤྦྷ]") or match(mainStack, "(ས)[ྐྒྔྙྟྡྣྤྦྨྩ]") or ""
	if (superjoined == "ར" and match(mainStack, "ར[^ྐྒྨ]ྱ")) or (superjoined == "ས" and (match(mainStack, "ས[^ྐྒྤྦྨ]ྱ") or match(mainStack, "ས[^ྐྒྣྤྦྨ]ྲ"))) then
		superjoined = ""
	end
	local radical = match(mainStack, "^" .. superjoined .. "(.)")
	local subjoined = match(mainStack, "^" .. superjoined .. radical .. "(.*)")
	for _, letter in ipairs(letters) do
		radical = gsub(radical, letter[2], letter[1])
	end
	return superjoined, radical, subjoined
end

local function sortRadical(radical)
	for _, letter in ipairs(letters) do
		if letter[3] then
			radical = gsub(radical, letter[3], letter[1])
		end
	end
	radical = gsub(radical, "༹", b)
	local radicalSubs = {
		["ཫ" .. b] = "ཀ" .. d, ["ཬ" .. b] = "ར" .. d
	}
	local radicalSubs2 = {
		["ཫ"] = "ཀ" .. c, ["ཬ"] = "ར" .. c
	}
	for char, replacement in pairs(radicalSubs) do
		radical = gsub(radical, char, replacement)
	end
	radical = gsub(radical, ".", radicalSubs2)
	return  (gsub(radical, "([^" .. b .. "-" .. d .. "])$", "%1" .. a))
end

-- Convert into base-6724 to reduce length.
function baseConvert(value)
	if #value%2 ~= 0 then table.insert(value, 1, 0) end
	local newValue = {}
	for i = 1, #value/2 do
		newValue[i] = u(0x4E00+(value[(i*2)-1]*(#letters+1))+value[i*2])
	end
	return table.concat(newValue)
end

local function sortValue(part, partType)
	local length
	if partType == "superjoined" or partType == "prefix" or partType == "vowel" then
		length = 1
	elseif partType == "subjoined" then
		length = 9
	elseif partType == "suffix" then
		length = 6
	end
	local partLetters = {}
	for i = 1, length do
		if len(part) >= i then
			table.insert(partLetters, sub(part, i, i))
		else
			table.insert(partLetters, "")
		end
	end
	for i, partLetter in ipairs(partLetters) do
		for j, letter in ipairs(letters) do
			if partLetter == letter[1] or partLetter == letter[2] or partLetter == letter[3] then
				partLetters[i] = j
			end
		end
		if match(tostring(partLetters[i]), "[^0-9]") or partLetters[i] == "" then
			partLetters[i] = 0
		end
	end
	return partLetters
end

function export.makeSortKey(text, lang, sc)
	local langObj
	if not lang then
		error("Language code required.")
	else
		langObj = require("Module:languages").getByCode(lang)
	end
	
	if not sc then
		sc = langObj:findBestScript(text):getCode()
	end
	
	if sc ~= "Tibt" then
		return text
	end
	
	text = (langObj:makeEntryName(text))
	
	local initSubs = {
		["ཪ"] = "ར", ["ྺ"] = "ྭ", ["ྻ"] = "ྱ", ["ྼ"] = "ྲ"
	}
	text = gsub(text, ".", initSubs)
	
	local syllables = {}
	local sort, prefix, mainStack, superjoined, radical, subjoined, vowel, suffix
	for word in Tibt.getWords(text) do
		for syllable in Tibt.getSyllables(word) do
			mainStack = Tibt.findMainStack(syllable, lang)
			
			for _, letter in ipairs(letters) do
				if letter[3] then
					syllable = gsub(syllable, letter[1], letter[3])
					syllable = gsub(syllable, letter[2], letter[3])
					mainStack = gsub(mainStack, letter[1], letter[3])
					mainStack = gsub(mainStack, letter[2], letter[3])
				end
			end
			
			for i = 42, #letters do
				syllable = gsub(syllable, letters[i][2], letters[i][1])
				mainStack = gsub(mainStack, letters[i][2], letters[i][1])
			end
			
			prefix, suffix = findAffixes(syllable, mainStack)
			mainStack, vowel = findVowel(mainStack)
			superjoined, radical, subjoined = mainStackParts(mainStack)
			
			local set1 = {table.concat(sortValue(superjoined, "superjoined")), table.concat(sortValue(prefix, "prefix"))}
			local set2 = sortValue(subjoined, "subjoined")
			table.insert(set2, table.concat(sortValue(vowel, "vowel")))
			local set3 = sortValue(suffix, "suffix")
			
			sort = sortRadical(radical) .. baseConvert(set1) .. baseConvert(set2) .. baseConvert(set3)
			
			table.insert(syllables, sort)
		end
	end
	
	text = table.concat(syllables)
	
	if match(text, ".[་༌]") or match(text, "[་༌].") then
		text = gsub(text, "[་༌]", "")
	end
	
	return toNFC(text)
end

local bo = require("Module:languages").getByCode("bo")
local function tag(text)
	return require("Module:script utilities").tag_text(text, bo)
end

function export.showSorting(frame)
	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:memoize")(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1, "bo", "Tibt") < makeSortKey(term2, "bo", "Tibt")
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		local sc = require("Module:scripts").getByCode("Tibt")
		local sortkey = export.makeSortKey(term, "bo", sc)
		terms[i] = "\n* " .. tag(term)
	end
	
	return table.concat(terms)
end

return export