Module:cop-sortkey
Appearance
- The following documentation is located at Module:cop-sortkey/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will sort Coptic language text. It is also used to sort Old Nubian.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{sortkey}}
.
Within a module, use Module:languages#Language:makeSortKey.
For testcases, see Module:cop-sortkey/testcases.
Functions
makeSortKey(text, lang, sc)
- Generates a sortkey for a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the sort fails, returns
nil
.
Testcases
- ⲁⲗⲁⲕ (
ΑΛΚ1Α
) - ⲁⲗⲟⲕ (
ΑΛΚ1Ο
) - ⲁⲗⲕⲉ (
ΑΛΚ2Ε
) - ⲁⲗⲓⲕⲓ (
ΑΛΚ2ΙΙ
) - ⲁⲗⲕⲟⲩ (
ΑΛΚ2Υ
)
- ⲟⲩⲣ (
ΥΡ1
) - ⲟⲩⲏⲣ (
ΥΡ1Η
) - ⲟⲩⲱⲣ (
ΥΡ1Ω
) - ⲟⲩⲉⲓⲣⲉ (
ΥΡ2ΙΕ
) - ⲟⲩⲟⲣⲉ (
ΥΡ2ΟΕ
) - ⲟⲩⲣⲱ (
ΥΡ2Ω
)
- ⲁⲗⲟⲩ (
ΑΛ2Υ
) - ⲁⲗⲱ (
ΑΛ2Ω
) - ⲁⲗⲧⲏⲁⲥ (
ΑΛΤΣ1ΗΑ
) - ⲁⲗⲁⲩ (
ΑΛΥ1Α
) - ⲁⲗⲏⲟⲩ (
ΑΛΥ1Η
) - ⲁⲗⲓⲟⲩⲓ (
ΑΛΥ2ΙΙ
) - ⲁⲗⲱⲟⲩⲉ (
ΑΛΥ2ΩΕ
) - ⲁⲗϣⲱⲟⲩ (
ΑΛϢΥ1Ω
)
- ⲙⲁⲧⲟⲩ (
ΜΤ2ΑΥ
) - ⲙⲁⲩ (
ΜΥ1Α
) - ⲙⲁⲁⲩ (
ΜΥ1ΑΑ
) - ⲙⲟⲟⲩ (
ΜΥ1Ο
) - ⲙⲁⲩⲁⲁ- (
ΜΥ2ΑΑΑ
) - ⲙⲉⲉⲩⲉ (
ΜΥ2ΕΕΕ
) - ⲙⲁⲟⲩⲥⲉ (
ΜΥΣ2ΑΕ
) - ⲙⲟⲩⲟⲩⲧ (
ΜΥΤ1Υ
) - ⲙⲫⲏ (
ΜΦ2Η
)
- ⲧⲁⲗ (
ΤΛ1Α
) - ⲧⲏⲗ⸗ (
ΤΛ1Η
) - ⲧⲁⲗⲟ (
ΤΛ2ΑΟ
) - ϯⲗⲓ (
ΤΛ2ΙΙ
) - ⲧⲱⲓⲗⲓ (
ΤΛ2ΩΙΙ
)
- ⲕⲱ (
Κ2Ω
) - ⲕⲱ ⲉⲃⲟⲗ (
Κ2Ω ΕΒΛ1Ο
) - ˋϣⲗⲏⲗ (
ϢΛΛ1Η
)
export = {}
local match = mw.ustring.match
local function ugsub(text, regex, replacement)
local out = mw.ustring.gsub(text, regex, replacement)
return out
end
local alphabet = "ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱϣϥⳉϧϩϫϭw"
local vowels = "ⲁⲉⲏⲓⲟⲩⲱ"
local vowel = "[" .. vowels .. "]"
local consonants = ugsub(alphabet, vowel, "")
local consonant = "[" .. consonants .. "]"
local replacements = {
["ⲟⲩ"] = "ⲩ",
["ⳤ"] = "ⲕⲉ",
["ⲉⲓ"] = "ⲓ",
["ϯ"] = "ⲧⲓ",
["-"] = "",
["⸗"] = "",
["ˋ"] = "",
}
local CopticToGreek = {
["ⲁ"] = "α",
["ⲃ"] = "β",
["ⲅ"] = "γ",
["ⲇ"] = "δ",
["ⲉ"] = "ε",
["ⲍ"] = "ζ",
["ⲏ"] = "η",
["ⲑ"] = "θ",
["ⲓ"] = "ι",
["ⲕ"] = "κ",
["ⲗ"] = "λ",
["ⲙ"] = "μ",
["ⲛ"] = "ν",
["ⲝ"] = "ξ",
["ⲟ"] = "ο",
["ⲡ"] = "π",
["ⲣ"] = "ρ",
["ⲥ"] = "σ",
["ⲧ"] = "τ",
["ⲩ"] = "υ",
["ⲫ"] = "φ",
["ⲭ"] = "χ",
["ⲯ"] = "ψ",
["ⲱ"] = "ω",
}
function export.makeSortKey(text, lang, sc)
if not text then
return nil
elseif sc and sc ~= "Copt" then
return mw.ustring.upper(text)
end
local str_gsub = string.gsub
text = mw.ustring.lower(text)
for letter, replacement in pairs(replacements) do
text = str_gsub(text, letter, replacement)
end
local origText = text
text = ugsub(text, "ⲩ(" .. vowel .. ")", "w%1")
text = ugsub(text, "(" .. vowel .. ")ⲩ", "%1w")
-- mw.log(origText, text)
local sort = {}
for word in mw.ustring.gmatch(text, "%S+") do
-- Add initial vowel (if any).
table.insert(sort, match(word, "^" .. vowel) )
-- Add consonants (in order).
table.insert(sort, ugsub(word, vowel .. "+", ""))
--[[
Add the number "1" if word ends in consonant.
"1" sorts before Greek–Coptic and Coptic Unicode blocks.
]]
if mw.ustring.match(word, consonant .. "$") then
table.insert(sort, "1")
elseif mw.ustring.match(word, vowel .. "$") then
table.insert(sort, "2")
end
-- Get non-initial vowels (in order) by removing initial vowel and all consonants.
table.insert(sort, ugsub(ugsub(word, "^" .. vowel, ""), consonant, ""))
table.insert(sort, " ")
end
sort = table.concat(sort)
sort = str_gsub(sort, "w", "ⲩ")
--[[
Convert Greek-derived Coptic characters to Greek ones.
Otherwise, the uniquely Coptic letters would sort first, because
they were added to Unicode earlier.
ϣϥⳉϧϩϫϭ ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱ
⇓
αβγδεζηθικλμνξοπρστυφχψω ϣϥⳉϧϩϫϭ
]]
sort = str_gsub(sort, "[\194-\244][\128-\191]+", CopticToGreek)
return mw.ustring.upper(sort)
end
local lang = require("Module:languages").getByCode("cop")
local sc = require("Module:scripts").getByCode("Copt")
local function tag(text)
return require("Module:script utilities").tag_text(text, lang, sc)
end
function export.showSorting(frame)
local terms = {}
for i, term in ipairs(frame.args) do
table.insert(terms, term)
end
local function comp(term1, term2)
return export.makeSortKey(term1) < export.makeSortKey(term2)
end
table.sort(terms, comp)
for i, term in pairs(terms) do
terms[i] = "\n* " .. tag(term) .. " (<code>" .. export.makeSortKey(term) .. "</code>)"
end
return table.concat(terms)
end
return export