Jump to content

Module:Unicode data/testcases

From Wiktionary, the free dictionary

2 of 89 tests failed. (refresh)

TextExpectedActual
test get_block_range:
PassedBasic LatinU+0000–U+007FU+0000–U+007F
Passedblahnilnil
TextExpectedActual
test get_entry_title:
PassedU+0023: #Unsupported titles/Number signUnsupported titles/Number sign
PassedU+0020:  Unsupported titles/SpaceUnsupported titles/Space
TextExpectedActual
test is_assigned:
PassedU+0061: atruetrue
PassedU+0378falsefalse
PassedU+40000falsefalse
TextExpectedActual
test is_combining:
PassedU+0060: `falsefalse
PassedU+0300: ◌̀truetrue
PassedU+0378falsefalse
PassedU+DC00falsefalse
TextExpectedActual
test is_printable:
PassedU+0000falsefalse
PassedU+0020:  truetrue
PassedU+0061: atruetrue
TextExpectedActual
test is_valid_pagename:
Passed#falsefalse
Passed falsefalse
Passedwordtruetrue
TextExpectedActual
test is_whitespace:
PassedU+0020:  truetrue
PassedU+0061: afalsefalse
TextExpectedActual
test lookup_block:
PassedU+0064: dBasic LatinBasic Latin
PassedU+030B: ◌̋Combining Diacritical MarksCombining Diacritical Marks
PassedU+03A3: ΣGreek and CopticGreek and Coptic
PassedU+3175: ㅵHangul Compatibility JamoHangul Compatibility Jamo
PassedU+AC01: 각Hangul SyllablesHangul Syllables
PassedU+10FFFFSupplementary Private Use Area-BSupplementary Private Use Area-B
TextExpectedActual
test lookup_category:
PassedU+0009CcCc
PassedU+0020:  ZsZs
PassedU+005B: [PsPs
PassedU+005D: ]PePe
PassedU+005E: ^SkSk
PassedU+0041: ALuLu
PassedU+00ADCfCf
PassedU+00BE: ¾NoNo
PassedU+00AB: «PiPi
PassedU+00BB: »PfPf
PassedU+0300: ◌̀MnMn
PassedU+0488: ҈MeMe
PassedU+0663: ٣NdNd
PassedU+5B50: 子LoLo
PassedU+1FAE: ᾮLtLt
PassedU+1B44: ◌᭄McMc
PassedU+2208: ∈SmSm
PassedU+203F: ‿PcPc
PassedU+21B9: ↹SoSo
PassedU+2E17: ⸗PdPd
PassedU+2167: ⅧNlNl
PassedU+2028ZlZl
PassedU+2029ZpZp
PassedU+309E: ゞLmLm
PassedU+D800CsCs
PassedU+FFE1: £ScSc
PassedU+FFFFCnCn
PassedU+100000CoCo
TextExpectedActual
test lookup_image:
FailedU+203D: ‽
FailedU+30A2: ア
PassedU+0B85: அ
TextExpectedActual
test lookup_name:
PassedU+0000<control-0000><control-0000>
PassedU+007F<control-007F><control-007F>
PassedU+00C1: ÁLATIN CAPITAL LETTER A WITH ACUTELATIN CAPITAL LETTER A WITH ACUTE
PassedU+0300: ◌̀COMBINING GRAVE ACCENTCOMBINING GRAVE ACCENT
PassedU+0378<reserved-0378><reserved-0378>
PassedU+1B44: ◌᭄BALINESE ADEG ADEGBALINESE ADEG ADEG
PassedU+1F71: άGREEK SMALL LETTER ALPHA WITH OXIAGREEK SMALL LETTER ALPHA WITH OXIA
PassedU+3555: 㕕CJK UNIFIED IDEOGRAPH-3555CJK UNIFIED IDEOGRAPH-3555
PassedU+AC01: 각HANGUL SYLLABLE GAGHANGUL SYLLABLE GAG
PassedU+D5FF: 헿HANGUL SYLLABLE HEHHANGUL SYLLABLE HEH
PassedU+DC00<surrogate-DC00><surrogate-DC00>
PassedU+EEEE<private-use-EEEE><private-use-EEEE>
PassedU+FDD1<noncharacter-FDD1><noncharacter-FDD1>
PassedU+FFFD: �REPLACEMENT CHARACTERREPLACEMENT CHARACTER
PassedU+FFFF<noncharacter-FFFF><noncharacter-FFFF>
PassedU+1F4A9: 💩PILE OF POOPILE OF POO
PassedU+E0000<reserved-E0000><reserved-E0000>
PassedU+F0F0F<private-use-F0F0F><private-use-F0F0F>
PassedU+10FFFF<noncharacter-10FFFF><noncharacter-10FFFF>
TextExpectedActual
test lookup_script:
PassedU+005B: [ZyyyZyyy
PassedU+0041: ALatnLatn
PassedU+4E00: 一HaniHani
PassedU+0300: ◌̀ZinhZinh
PassedU+03CE: ώGrekGrek
PassedU+1F66: ὦGrekGrek
PassedU+0416: ЖCyrlCyrl
PassedU+0478: ѸCyrlCyrl
PassedU+A651: ꙑCyrlCyrl
PassedU+10D0: აGeorGeor
PassedU+10A0: ႠGeorGeor
PassedU+2D00: ⴀGeorGeor
PassedU+0021: !ZyyyZyyy
PassedU+2F82B: 北HaniHani

local tests = require("Module:UnitTests")

local m_str_utils = require("Module:string utilities")
local m_Unicode_data = require("Module:Unicode data")

local cp = m_str_utils.codepoint
local len = m_str_utils.len
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local function show_whitespace(whitespace)
	return '<span style="background-color: lightgray;">'
		.. whitespace:gsub(" ", "&nbsp;") .. '</span>'
end

local function show(codepoint)
	if m_Unicode_data.is_printable(codepoint) then
		local printed_codepoint = u(codepoint)
		if toNFC(printed_codepoint) ~= printed_codepoint then
			printed_codepoint = ("&#x%X;"):format(codepoint)
		end
		if m_Unicode_data.is_whitespace(codepoint) then
			printed_codepoint = show_whitespace(printed_codepoint)
		end
		if m_Unicode_data.is_combining(codepoint) then
			printed_codepoint = "◌" .. printed_codepoint
		end
		return ("U+%04X: %s"):format(codepoint, printed_codepoint)
	else
		return ("U+%04X"):format(codepoint)
	end
end

local function tag(char, sc)
	return string.format('<span class="%s">%s</span>', sc, char)
end

local function return_all(...) return ... end

local function test_lookup(what, examples, display)
	local funcname = "lookup_" .. what
	local func = m_Unicode_data[funcname]
	display = display or return_all
	
	tests["test_" .. funcname] = function (self)
		self:iterate(
			examples,
			function (self, codepoint, result)
				self:equals(
					show(codepoint),
					display(func(codepoint)),
					display(result))
			end)
	end
end

test_lookup(
	"category",
	{
		{ cp "\t", "Cc" },
		{ cp " ",  "Zs" },
		{ cp "[",  "Ps" },
		{ cp "]",  "Pe" },
		{ cp "^",  "Sk" },
		{ cp "A",  "Lu" },
		{ 0x00AD,             "Cf" },
		{ cp "¾",  "No" },
		{ cp "«",  "Pi" },
		{ cp "»",  "Pf" },
		{ 0x0300,             "Mn" },
		{ 0x0488,             "Me" },
		{ cp "٣",  "Nd" },
		{ cp "子", "Lo" },
		{ cp "ᾮ",  "Lt" },
		{ 0x1B44,             "Mc" },
		{ cp "∈",  "Sm" },
		{ cp "‿",  "Pc" },
		{ cp "↹",  "So" },
		{ cp "⸗",  "Pd" },
		{ cp "Ⅷ", "Nl" },
		{ 0x2028,              "Zl" },
		{ 0x2029,              "Zp" },
		{ cp "ゞ",  "Lm" },
		{ 0xD800,              "Cs" },
		{ cp "£",  "Sc" },
		{ 0xFFFF,              "Cn" },
		{ 0x100000,            "Co" },
	})

test_lookup(
	"name",
	{
		{   0x0000, "<control-0000>" },
		{   0x007F, "<control-007F>" },
		{   0x00C1, "LATIN CAPITAL LETTER A WITH ACUTE" },
		{   0x0300, "COMBINING GRAVE ACCENT" },
		{   0x0378, "<reserved-0378>" },
		{   0x1B44, "BALINESE ADEG ADEG" },
		{   0x1F71, "GREEK SMALL LETTER ALPHA WITH OXIA" },
		{   0x3555, "CJK UNIFIED IDEOGRAPH-3555" },
		{   0xAC01, "HANGUL SYLLABLE GAG" },
		{   0xD5FF, "HANGUL SYLLABLE HEH" },
		{   0xDC00, "<surrogate-DC00>", },
		{   0xEEEE, "<private-use-EEEE>" },
		{   0xFDD1, "<noncharacter-FDD1>", },
		{   0xFFFD, "REPLACEMENT CHARACTER" },
		{   0xFFFF, "<noncharacter-FFFF>" },
		{  0x1F4A9, "PILE OF POO" },
		{  0xE0000, "<reserved-E0000>" },
		{  0xF0F0F, "<private-use-F0F0F>" },
		{ 0x10FFFF, "<noncharacter-10FFFF>" },
	})

test_lookup(
	"script",
	{
		{ cp "[", "Zyyy" },
		{ cp "A", "Latn" },
		{ cp "一", "Hani" },
		{ 0x0300,             "Zinh" },
		{ cp "ώ", "Grek" },
		{ cp "ὦ", "Grek" },
		{ cp "Ж", "Cyrl" },
		{ cp "Ѹ", "Cyrl" },
		{ cp "ꙑ", "Cyrl" },
		{ cp "ა", "Geor" },
		{ cp "Ⴀ", "Geor" },
		{ cp "ⴀ", "Geor" },
		{ cp "!", "Zyyy" },
		{ 0x2F82B,           "Hani" },
	})

test_lookup(
	"block",
	{
		{   0x0064, "Basic Latin"                      },
		{   0x030B, "Combining Diacritical Marks"      },
		{   0x03A3, "Greek and Coptic"                 },
		{   0x3175, "Hangul Compatibility Jamo"        },
		{   0xAC01, "Hangul Syllables"                 },
		{ 0x10FFFF, "Supplementary Private Use Area-B" },
	})

test_lookup(
	"image",
	{
		{ 0x203D, "Interrobang.svg" },
		{ 0x30A2, "Japanese Katakana A.svg" },
		{ 0x0B85, "Tamil-alphabet-அஅ.svg" },
	},
	function (image_title)
		return "[[File:" .. image_title .. "|frameless|14px]]"
	end)

local function test_is(what, examples)
	local funcname = "is_" .. what
	local func = m_Unicode_data[funcname]
	
	tests["test_" .. funcname] = function (self)
		self:iterate(
			examples,
			function (self, codepoint, result)
				self:equals(
					show(codepoint),
					func(codepoint),
					result)
			end)
	end
end

test_is(
	"assigned",
	{
		{  0x0061, true  },
		{  0x0378, false },
		{ 0x40000, false },
	})

test_is(
	"combining",
	{
		{ cp "`", false },
		{ 0x0300, true },
		{ 0x0378, false },
		{ 0xDC00, false },
	})

test_is(
	"printable",
	{
		{ 0x0000, false },
		{ 0x0020, true },
		{ 0x0061, true },
	})

test_is(
	"whitespace",
	{
		{ 0x0020, true },
		{ 0x0061, false },
	})

function tests:test_get_block_range()
	local function display_block_range(low, high)
		if type(low) == "nil" then
			return "nil"
		end
		
		return ("U+%04X&ndash;U+%04X"):format(low, high)
	end
	
	self:iterate(
		{
			{ "Basic Latin", { 0x0000, 0x007F } },
			{ "blah", { nil, nil } },
		},
		function (self, block_name, block_range)
			self:equals(
				block_name,
				display_block_range(m_Unicode_data.get_block_range(block_name)),
				display_block_range(unpack(block_range)))
		end)
end

function tests:test_is_valid_pagename()
	self:iterate(
		{
			{ "#", false },
			{ " ", false },
			{ "word", true },
		},
		function (self, pagename, validity)
			local displayed_pagename
			
			if len(pagename) == 1
					and m_Unicode_data.is_whitespace(cp(pagename)) then
				displayed_pagename = show_whitespace(pagename)
			else
				displayed_pagename = pagename
			end
			
			self:equals(
				displayed_pagename,
				m_Unicode_data.is_valid_pagename(pagename),
				validity)
		end)
end

function tests:test_get_entry_title()
	self:iterate(
		{
			{ cp "#", "Number sign" },
			{ cp " ", "Space" },
		},
		function (self, codepoint, entry_title)
			self:equals(
				show(codepoint),
				"[[" .. m_Unicode_data.get_entry_title(codepoint) .. "]]",
				"[[Unsupported titles/" .. entry_title .. "]]")
		end)
end

-- Log names of functions that don't have testcases yet.
for k, v in pairs(require "Module:Unicode data") do
	if type(k) == "string" and type(v) == "function" and not tests["test_" .. k] then
		mw.log(k)
	end
end

for k, v in require "Module:table".sortedPairs(tests) do
	if type(k) == "string" then
		local new_k = k:gsub("^test_(.+)$", "test <code>%1</code>")
		if new_k ~= k then
			tests[k] = nil
			tests[new_k] = v
		end
	end
end

return tests