Jump to content

Module:User:Erutuon/grapheme cluster break/testcases

From Wiktionary, the free dictionary

successes: 834; failures: 0


local test_content = mw.title.new("User:Erutuon/GraphemeBreakTest.txt"):getContent()

local char = mw.ustring.char

local function parse_tests(path)
	local tests = {}
	for line in test_content:gmatch("[^\n]+") do
		local test_text, comment = line:match "^([^#]*)#?([^#]*)"
		if not line:find "<" and test_text and #test_text > 0 then -- not a comment line
			local breaks = {}
			local breaks_i = 0
			local printed_characters = ""
			for item in test_text:gmatch "%S+" do
				if item == "÷" or item == "×" then
					breaks_i = breaks_i + 1
					breaks[breaks_i] = item == "÷"
				else
					local codepoint = assert(tonumber(item, 16), line)
					printed_characters = printed_characters .. char(codepoint)
				end
			end
			local rules = {}
			for item in comment:gmatch "%S+" do
				local rule = item:match "^%[([^%]]+)%]$"
				if rule then
					table.insert(rules, rule)
				end
			end
			table.insert(tests, { breaks = breaks, printed_characters = printed_characters, text = line, rules = rules })
		end
	end
	return tests
end

local function run_tests()
	local tests = parse_tests(grapheme_break_test_txt_path)
	
	local output = require "Module:array"()
	local successes = 0
	local failures = 0
	for i, test in ipairs(tests) do
		local should_break = require "Module:User:Erutuon/grapheme cluster break" ()
		local breaks = {}
		local last_codepoint
		local i = 0
		local codepoints = {}
		for codepoint in mw.ustring.gcodepoint(test.printed_characters) do
			i = i + 1
			codepoints[i] = codepoint
			if last_codepoint and should_break(last_codepoint, codepoint) then
				table.insert(breaks, true)
			else
				table.insert(breaks, i == 1)
			end
			last_codepoint = codepoint
		end
		local printed_text = false
		for i, expected in ipairs(test.breaks) do
			if breaks[i] ~= nil and breaks[i] ~= expected then
				if not printed_text then
					output:insert(test.printed_characters .. "\n")
					output:insert(test.text .. "\n")
					printed_text = true
				end
				output:insert(table.concat({"rule broken = ", test.rules[i], "; expected = ", test.breaks[i] and "break" or "no break",
					"; actual = ", breaks[i] and "break" or "no break", "; code points = ",
					("U+%04X U+%04X"):format(codepoints[i - 1], codepoints[i]), "\n"}))
			end
		end
		if printed_text then
			output:insert "\n"
			failures = failures + 1
		else
			successes = successes + 1
		end
	end
	return "<pre>successes: " .. successes .. "; failures: " .. failures .. "\n\n" .. output:concat() .. "</pre>"
end

return { run_tests = run_tests }