Jump to content

Module:Unicode data/templates/codepoint

From Wiktionary, the free dictionary


local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint
local find = m_str_utils.find
local gcodepoint = m_str_utils.gcodepoint
local len = m_str_utils.len
local sub = m_str_utils.sub
local u = m_str_utils.char
local yesno = require("Module:yesno")

local m_unicodedata = require("Module:Unicode data")

local export = {}

local function get_html_entity_character(c)
	local data = require("Module:Unicode data/data/html entities")
	if data[c] then
		return string.format("&%s;", data[c])
	end
	
	-- hex?
	-- return string.format("&#x%x;", c)
	return string.format("&#%u;", c)
end

function export.get_html_entity(s, escape)
	local entity = ""
	for c in gcodepoint(s) do
	    entity = entity .. get_html_entity_character(c)
	end
	if escape then
		entity = mw.text.encode(entity)
	end
	return entity
end

function export.get_codepoint_link_target(ch)
	local data = mw.loadData("Module:Unicode data/data")
	local c = codepoint(ch)
	if data.unsupported_title[c] then
		return data.unsupported_title[c]
	end
	return mw.uri.encode(ch, "PATH")
end

local function unicode_link(ch, text)
	return "[[" .. export.get_codepoint_link_target(ch) .. "|" .. text .. "]]"
end

function export.show(frame)
	local args = frame:getParent().args

	if not args[1] then error("The first parameter is required.") end
	
	local c
	if len(args[1]) == 1 then
		c = codepoint(args[1], 1, 1)	
	elseif find(args[1], "^U%+[0-9A-Fa-f]+$") then
		local hexcode = sub(args[1], 3)
		if len(hexcode) <= 7 then
			c = tonumber(hexcode, 16)
		end
	end
	
	local display
	if args["display"] then
		display = yesno(args["display"], nil)
	end
	
	local link
	if args["link"] then
		link = yesno(args["link"], nil)
	end
	
	if args["plain"] then
		display = false
		link = false
	end
	
	if not c then error("Argument 1 is unsupported (must be a single codepoint or a hex code of the form U+NNNN)") end
	
	if display == nil then
		display = m_unicodedata.is_assigned(c) and m_unicodedata.is_printable(c)
	
		if link == nil then
			link = display
		end
		
		display = display and not m_unicodedata.is_whitespace(c)
	end
	
	if link == nil then
		link = m_unicodedata.is_assigned(c) and m_unicodedata.is_printable(c)
	end
	
	local ch = u(c)
	local printed = unicode_reference
	local unicode_reference = '<span class="nowrap">' .. string.format("U+%04X", c) .. '</span>'
	local unicode_name = m_unicodedata.lookup_name(c)
	local unicode_name_display = '<span class="codepoint-name">' .. unicode_name .. "</span>"
	local extra = {}
	
	if link and not display then
		unicode_reference = unicode_link(ch, unicode_reference)
	end
	
	local unicode_display = unicode_reference
	if not args["noname"] then
		unicode_display = unicode_display .. " " .. unicode_name_display
	end
	
	if args["html"] then
		table.insert(extra, 'HTML <code class="nowrap">' .. export.get_html_entity(ch, true) .. '</code>')
	end
	
	if #extra > 0 then
		extra = table.concat(extra, ", ")
	else
		extra = nil
	end
	
	if display then
		local unicode_print = '<span class="Unicode codepoint-character">' .. ch .. "</span>"
		
		if link then
			unicode_print = unicode_link(ch, unicode_print)
		end
		
		if extra then
			unicode_display = unicode_print .. " (" .. unicode_display .. ", " .. extra .. ")"
		else
			unicode_display = unicode_print .. " (" .. unicode_display .. ")"
		end
	elseif extra then
		unicode_display = unicode_display .. " (" .. extra .. ")"
	end
	
	return unicode_display
end

return export