Jump to content

Module:cop-sortkey

From Wiktionary, the free dictionary

This module will sort Coptic language text. It is also used to sort Old Nubian. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:cop-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

Testcases

  • ⲁⲗⲁⲕ (ΑΛΚ1Α )
  • ⲁⲗⲟⲕ (ΑΛΚ1Ο )
  • ⲁⲗⲕⲉ (ΑΛΚ2Ε )
  • ⲁⲗⲓⲕⲓ (ΑΛΚ2ΙΙ )
  • ⲁⲗⲕⲟⲩ (ΑΛΚ2Υ )
  • ⲟⲩⲣ (ΥΡ1 )
  • ⲟⲩⲏⲣ (ΥΡ1Η )
  • ⲟⲩⲱⲣ (ΥΡ1Ω )
  • ⲟⲩⲉⲓⲣⲉ (ΥΡ2ΙΕ )
  • ⲟⲩⲟⲣⲉ (ΥΡ2ΟΕ )
  • ⲟⲩⲣⲱ (ΥΡ2Ω )
  • ⲁⲗⲟⲩ (ΑΛ2Υ )
  • ⲁⲗⲱ (ΑΛ2Ω )
  • ⲁⲗⲧⲏⲁⲥ (ΑΛΤΣ1ΗΑ )
  • ⲁⲗⲁⲩ (ΑΛΥ1Α )
  • ⲁⲗⲏⲟⲩ (ΑΛΥ1Η )
  • ⲁⲗⲓⲟⲩⲓ (ΑΛΥ2ΙΙ )
  • ⲁⲗⲱⲟⲩⲉ (ΑΛΥ2ΩΕ )
  • ⲁⲗϣⲱⲟⲩ (ΑΛϢΥ1Ω )
  • ⲙⲁⲧⲟⲩ (ΜΤ2ΑΥ )
  • ⲙⲁⲩ (ΜΥ1Α )
  • ⲙⲁⲁⲩ (ΜΥ1ΑΑ )
  • ⲙⲟⲟⲩ (ΜΥ1Ο )
  • ⲙⲁⲩⲁⲁ- (ΜΥ2ΑΑΑ )
  • ⲙⲉⲉⲩⲉ (ΜΥ2ΕΕΕ )
  • ⲙⲁⲟⲩⲥⲉ (ΜΥΣ2ΑΕ )
  • ⲙⲟⲩⲟⲩⲧ (ΜΥΤ1Υ )
  • ⲙⲫⲏ (ΜΦ2Η )
  • ⲧⲁⲗ (ΤΛ1Α )
  • ⲧⲏⲗ⸗ (ΤΛ1Η )
  • ⲧⲁⲗⲟ (ΤΛ2ΑΟ )
  • ϯⲗⲓ (ΤΛ2ΙΙ )
  • ⲧⲱⲓⲗⲓ (ΤΛ2ΩΙΙ )
  • ⲕⲱ (Κ2Ω )
  • ⲕⲱ ⲉⲃⲟⲗ (Κ2Ω ΕΒΛ1Ο )
  • ˋϣⲗⲏⲗ (ϢΛΛ1Η )

export = {}

local match = mw.ustring.match
local function ugsub(text, regex, replacement)
	local out = mw.ustring.gsub(text, regex, replacement)
	return out
end

local alphabet = "ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱϣϥⳉϧϩϫϭw"
local vowels = "ⲁⲉⲏⲓⲟⲩⲱ"
local vowel = "[" .. vowels .. "]"
local consonants = ugsub(alphabet, vowel, "")
local consonant = "[" .. consonants .. "]"

local replacements = {
	["ⲟⲩ"] = "ⲩ",
	["ⳤ"] = "ⲕⲉ",
	["ⲉⲓ"] = "ⲓ",
	["ϯ"] = "ⲧⲓ",
	["-"] = "",
	["⸗"] = "",
	["ˋ"] = "",
}

local CopticToGreek = {
	["ⲁ"] = "α",
	["ⲃ"] = "β",
	["ⲅ"] = "γ",
	["ⲇ"] = "δ",
	["ⲉ"] = "ε",
	["ⲍ"] = "ζ",
	["ⲏ"] = "η",
	["ⲑ"] = "θ",
	["ⲓ"] = "ι",
	["ⲕ"] = "κ",
	["ⲗ"] = "λ",
	["ⲙ"] = "μ",
	["ⲛ"] = "ν",
	["ⲝ"] = "ξ",
	["ⲟ"] = "ο",
	["ⲡ"] = "π",
	["ⲣ"] = "ρ",
	["ⲥ"] = "σ",
	["ⲧ"] = "τ",
	["ⲩ"] = "υ",
	["ⲫ"] = "φ",
	["ⲭ"] = "χ",
	["ⲯ"] = "ψ",
	["ⲱ"] = "ω",
}

function export.makeSortKey(text, lang, sc)
	if not text then
		return nil
	elseif sc and sc ~= "Copt" then
		return mw.ustring.upper(text)
	end
	
	local str_gsub = string.gsub
	
	text = mw.ustring.lower(text)
	
	for letter, replacement in pairs(replacements) do
		text = str_gsub(text, letter, replacement)
	end
	
	local origText = text
	
	text = ugsub(text, "ⲩ(" .. vowel .. ")", "w%1")
	text = ugsub(text, "(" .. vowel .. ")ⲩ", "%1w")
	
--	mw.log(origText, text)
	
	local sort = {}
	
	for word in mw.ustring.gmatch(text, "%S+") do
		-- Add initial vowel (if any).
		table.insert(sort, match(word, "^" .. vowel) )
		-- Add consonants (in order).
		table.insert(sort, ugsub(word, vowel .. "+", ""))
		
		--[[
			Add the number "1" if word ends in consonant.
			"1" sorts before Greek–Coptic and Coptic Unicode blocks.
		]]
		if mw.ustring.match(word, consonant .. "$") then
			table.insert(sort, "1")
		elseif mw.ustring.match(word, vowel .. "$") then
			table.insert(sort, "2")
		end
		
		-- Get non-initial vowels (in order) by removing initial vowel and all consonants.
		table.insert(sort, ugsub(ugsub(word, "^" .. vowel, ""), consonant, ""))
		
		table.insert(sort, " ")
	end
	
	sort = table.concat(sort)
	
	sort = str_gsub(sort, "w", "ⲩ")
	
	--[[
		Convert Greek-derived Coptic characters to Greek ones.
		Otherwise, the uniquely Coptic letters would sort first, because
		they were added to Unicode earlier.
		ϣϥⳉϧϩϫϭ		ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱ

		αβγδεζηθικλμνξοπρστυφχψω	ϣϥⳉϧϩϫϭ		
	]]
	sort = str_gsub(sort, "[\194-\244][\128-\191]+", CopticToGreek)
	
	return mw.ustring.upper(sort)
end

local lang = require("Module:languages").getByCode("cop")
local sc = require("Module:scripts").getByCode("Copt")
local function tag(text)
	return require("Module:script utilities").tag_text(text, lang, sc)
end

function export.showSorting(frame)
	local terms = {}
	
	for i, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local function comp(term1, term2)
		return export.makeSortKey(term1) < export.makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. export.makeSortKey(term) .. "</code>)"
	end
	
	return table.concat(terms)
end

return export