Module:category tree/poscatboiler/data/scripts/blocks
Appearance
- The following documentation is located at Module:category tree/poscatboiler/data/scripts/blocks/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Creates a list of the Unicode blocks in which a script's characters are found. This list is displayed on category pages, such as the category for the Latin script.
local m_str_utils = require("Module:string utilities")
local concat = table.concat
local cp = m_str_utils.codepoint
local floor = math.floor
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local insert = table.insert
local sort = table.sort
local u = m_str_utils.char
local export = {}
local fun = require "Module:fun"
local Array = require "Module:array"
local function compare_script_codes(code1, code2)
-- Sort four-letter codes and non-four-letter codes alphabetically.
if (#code1 == 4) == (#code2 == 4) then
return code1 < code2
-- Put four-letter codes before non-four-letter codes.
else
return #code1 == 4
end
end
local function sort_scripts(script_codes)
sort(script_codes, compare_script_codes)
return script_codes
end
local block_data = require "Module:Unicode data/blocks"
-- Add position of range in the array of blocks to the range tables.
for i, range in ipairs(block_data) do
range[4] = i
end
-- Binary search, to avoid iterating over entire table in order to look up the
-- higher codepoints.
local function binary_lookup_block(codepoint)
local iStart, iEnd = 1, block_data.length or #block_data
while iStart <= iEnd do
local iMid = floor((iStart + iEnd) / 2)
local range = block_data[iMid]
if codepoint < range[1] then
iEnd = iMid - 1
elseif codepoint <= range[2] then
return range
else
iStart = iMid + 1
end
end
error(string.format("No block found for codepoint U+%04X.", codepoint))
end
function export.lookup_block(char)
local codepoint = cp(char)
local range = binary_lookup_block(codepoint)
if range then
return range
else
error(string.format("No block found for U+%04X (%s).", codepoint, u(codepoint)))
end
end
function export.get_singles_and_ranges(pattern)
local ranges, singles = {}, {}
pattern = gsub(
pattern,
"(.)%-(.)",
function(lower, higher)
insert(ranges, { lower, higher })
return ""
end)
for character in gmatch(pattern, ".") do
insert(singles, character)
end
return singles, ranges
end
function export.get_block_arrays(pattern)
local singles, ranges = export.get_singles_and_ranges(pattern)
local blocks = {}
for _, character in ipairs(singles) do
blocks[export.lookup_block(character)] = true
end
for _, range in ipairs(ranges) do
local block_array1, block_array2 = export.lookup_block(range[1]), export.lookup_block(range[2])
for i = block_array1[4], block_array2[4] do
blocks[block_data[i]] = true
end
end
return Array.keysToList(blocks, function (block1, block2) return block1[4] < block2[4] end)
end
local function format_block_info(block_array)
return ("[[Appendix:Unicode/%s|%s]] (U+%04X–U+%04X)"):format(block_array[3], block_array[3], block_array[1], block_array[2])
end
function export.print_blocks(block_arrays, prefix)
sort(
block_arrays,
function (block_array1, block_array2)
return block_array1[1] < block_array2[1]
end)
local block_names = fun.map(
function (block_array)
return "* " .. format_block_info(block_array)
end,
block_arrays)
if prefix then
insert(block_names, 1, prefix)
end
return concat(block_names, "\n")
end
function export.print_blocks_by_canonical_name(script_name)
if type(script_name) ~= "string" then
error("script_name should be a string, not " .. type(script_name) .. ".")
end
local scripts_by_pattern = {}
setmetatable(
scripts_by_pattern,
{
__index = function(self, key)
if key == nil then
return
end
local val = Array()
self[key] = val
return val
end
})
local count = 0
for code, data in pairs(mw.loadData("Module:scripts/data")) do
if data[1] == script_name and data.characters then
count = count + 1
scripts_by_pattern[data.characters]:insert(code)
end
end
if not next(scripts_by_pattern) then
return nil
end
local block_arrays_by_scripts = {}
local block_count = 0
-- Construct arrays of blocks and count the blocks.
for pattern, scripts in pairs(scripts_by_pattern) do
local array = export.get_block_arrays(pattern)
block_arrays_by_scripts[sort_scripts(scripts)] = array
block_count = block_count + #array
end
require("Module:debug").track{
"scriptcatboiler/blocks/" .. count,
"scriptcatboiler/blocks/" .. block_count
}
if count == 1 and block_count == 1 then
local scripts, block_arrays = next(block_arrays_by_scripts)
if scripts[2] or block_arrays[2] then
error("More than one script or more than one block. Something is wrong.")
end
return ("The characters of <code>%s</code> are found in the Unicode block %s")
:format(scripts[1], format_block_info(block_arrays[1]))
else
local collapsible1 = '{|\n'
local collapsible3 = '</div></div>'
return '{| class="mw-collapsible mw-collapsed wikitable" style="width: 30em;"\n|+ style="font-weight: normal;" | ' .. "'''Unicode block"
.. (block_count > 1 and "s" or "") .. " for characters in "
.. (count > 1 and "these scripts" or "this script") .. "'''\n|\n"
.. concat(
fun.mapIter(
function(block_arrays, scripts)
return export.print_blocks(
block_arrays,
"; Block" .. (block_arrays[2] and "s" or "") .. " in "
.. concat(
fun.map(
function (script_code)
return "<code>" .. script_code .. "</code>"
end,
scripts),
", "))
end,
require("Module:table").sortedPairs(
block_arrays_by_scripts,
function(script_array1, script_array2)
return compare_script_codes(script_array1[1], script_array2[1])
end)),
"\n")
.. '\n|}'
end
end
-- For testing.
function export.print_blocks_by_canonical_name_template(frame)
return export.print_blocks_by_canonical_name(frame.args[1])
end
return export