Jump to content

Module:parser

From Wiktionary, the free dictionary

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local metamethods_data_module = "Module:data/metamethods"
local table_module = "Module:table"

local concat = table.concat
local getmetatable = getmetatable
local insert = table.insert
local next = next
local rawget = rawget
local rawset = rawset
local remove = table.remove
local require = require
local setmetatable = setmetatable
local type = type
local unpack = unpack

local classes = {}

--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
	local function deep_copy(...)
		deep_copy = require(table_module).deepCopy
		return deep_copy(...)
	end

--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
	local metamethods
	local function get_metamethods()
		-- Use require, since lookup times are much slower with mw.loadData.
		metamethods, get_metamethods = require(metamethods_data_module), nil
		return metamethods
	end

------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------

local function get_nested(t, k, ...)
	if t == nil then
		return nil
	elseif ... == nil then
		return t[k]
	end
	return get_nested(t[k], ...)
end

local function set_nested(t, k, v, ...)
	if ... ~= nil then
		local t_next = t[k]
		if t_next == nil then
			t_next = {}
			t[k] = t_next
		end
		return set_nested(t_next, v, ...)
	end
	t[k] = v
end

local function inherit_metamethods(child, parent)
	if parent then
		for method, value in next, parent do
			if child[method] == nil and (metamethods or get_metamethods())[method] ~= nil then
				child[method] = value
			end
		end
	end
	return child
end

local function signed_index(t, n)
	return n and n <= 0 and #t + 1 + n or n
end

local function is_node(value)
	if value == nil then
		return false
	end
	local mt = getmetatable(value)
	return not (mt == nil or classes[mt] == nil)
end

-- Recursively calling tostring() adds to the C stack (limit: 200), whereas
-- calling __tostring metamethods directly does not. Occasionally relevant when
-- dealing with very deep nesting.
local tostring
do
	local _tostring = _G.tostring
	
	function tostring(value)
		if is_node(value) then
			return value:__tostring(value)
		end
		return _tostring(value)
	end
end

local function class_else_type(value)
	if value == nil then
		return type(value)
	end
	local mt = getmetatable(value)
	if mt == nil then
		return type(value)
	end
	local class = classes[mt]
	return class == nil and type(value) or class
end

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

local Node = {}
Node.__index = Node

function Node:next(i)
	i = i + 1
	return self[i], self, i
end

--[==[
Implements recursive iteration over a node tree.

By default, when a node is encountered (which may contain other nodes), it is returned on the first iteration, and then any child nodes are returned on each subsequent iteration; the same process is then followed if any of those children contain nodes themselves. Once a particular node has been fully traversed, the iterator then continues with any sibling nodes. The iterator will use the `next` method of each node to traverse it, which may differ depending on the node class.

Each iteration returns three values: `value`, `node` and `key`. Together, these can be used to manipulate the node tree at any given point without needing to know the full structure. Note that when the input node is returned on the first iteration, `node` and `key` will be nil.

The optional argument `test` can be used to limit the return values. This should be a function that returns a boolean value, where a return value of true means that the child will be returned by the iterator. If a node is not returned by the iterator, it will still be traversed, as it may contain children that should be returned.

The method `iterate_nodes` is provided as a special instance of iterate which uses `is_node` as the test.]==]
function Node:iterate(test)
	local node, k, n, nodes, keys, returned_self = self, 0, 0
	-- Special case if `test` is `is_node`.
	local is_node_is_test = test == is_node
	
	return function()
		if not returned_self then
			returned_self = true
			if test == nil or test(self) then
				return self
			end
		end
		-- Get `v`, which is the value at the last-returned key of the current node; if `v` is a node, it will be iterated over (i.e. recursive iteration). By default, `v` will be the last-returned value, but looking it up here means that any modifications made to the node during the loop will be taken into account. This makes it possible to swap one node out for something else (e.g. another node), or to remove it entirely, without being locked into recursively iterating over the old node; instead, the new node (if any) will be iterated over. This means node trees can be modified on-the-fly during the course of a single loop.
		local v, node_check = node[k], true
		while true do
			-- If `v` is a node, memoize the current node and key, then iterate over it.
			if node_check and is_node(v) then
				-- `n` is the current memo level.
				n = n + 1
				if nodes then
					nodes[n], keys[n] = node, k
				else
					nodes, keys = {node}, {k}
				end
				node, k = v, 0
			end
			v, node, k = node:next(k)
			-- If `v` is nil, move down one level, then continue iterating the node on that level (if any), or otherwise terminate the loop.
			if v == nil then
				if n == 0 then
					return nil
				end
				node, k, n = nodes[n], keys[n], n - 1
			elseif test == nil or test(v) then
				return v, node, k
			-- If `test` is `is_node`, there's no point checking it again on the next loop.
			elseif node_check and is_node_is_test then
				node_check = false
			end
		end
	end
end

function Node:iterate_nodes()
	return self:iterate(is_node)
end

function Node:__tostring()
	local output = {}
	for i = 1, #self do
		insert(output, tostring(self[i]))
	end
	return concat(output)
end

function Node:clone()
	return deep_copy(self, "keep", true)
end

function Node:new_class(class)
	local t = {type = class}
	t.__index = t
	t = inherit_metamethods(t, self)
	classes[t] = class
	return setmetatable(t, self)
end

Node.keys_to_remove = {"fail", "handler", "head", "override", "route"}

function Node:new(t)
	setmetatable(t, nil)
	local keys_to_remove = self.keys_to_remove
	for i = 1, #keys_to_remove do
		t[keys_to_remove[i]] = nil
	end
	return setmetatable(t, self)
end

do
	local Proxy = {}

	function Proxy:__index(k)
		local v = Proxy[k]
		if v ~= nil then
			return v
		end
		return self.__chars[k]
	end

	function Proxy:__newindex(k, v)
		local key = self.__keys[k]
		if key then
			self.__chars[k] = v
			self.__parents[key] = v
		elseif key == false then
			error("Character is immutable.")
		else
			error("Invalid key.")
		end
	end

	function Proxy:build(a, b, c)
		local len = self.__len + 1
		self.__chars[len] = a
		self.__parents[len] = b
		self.__keys[len] = c
		self.__len = len
	end

	function Proxy:iter(i)
		i = i + 1
		local char = self.__chars[i]
		if char ~= nil then
			return i, self[i], self, self.__parents[i], self.__keys[i]
		end
	end
	
	function Node:new_proxy()
		return setmetatable({
			__node = self,
			__chars = {},
			__parents = {},
			__keys = {},
			__len = 0
		}, Proxy)
	end
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

local Parser = {}
Parser.__index = Parser

function Parser:read(delta)
	local v = self.text[self.head + (delta or 0)]
	return v == nil and "" or v
end

function Parser:advance(n)
	self.head = self.head + (n == nil and 1 or n)
end

function Parser:layer(n)
	if n ~= nil then
		return rawget(self, #self + n)
	end
	return self[-1]
end

function Parser:emit(a, b)
	local layer = self[-1]
	if b ~= nil then
		insert(layer, signed_index(layer, a), b)
	else
		rawset(layer, #layer + 1, a)
	end
end

function Parser:emit_tokens(a, b)
	local layer = self[-1]
	if b ~= nil then
		a = signed_index(layer, a)
		for i = 1, #b do
			insert(layer, a + i - 1, b[i])
		end
	else
		local len = #layer
		for i = 1, #a do
			len = len + 1
			rawset(layer, len, a[i])
		end
	end
end

function Parser:remove(n)
	local layer = self[-1]
	if n ~= nil then
		return remove(layer, signed_index(layer, n))
	end
	local len = #layer
	local token = layer[len]
	layer[len] = nil
	return token
end

function Parser:replace(a, b)
	local layer = self[-1]
	layer[signed_index(layer, a)] = b
end

-- Unlike default table.concat, this respects __tostring metamethods.
function Parser:concat(a, b, c)
	if a == nil or a > 0 then
		return self:concat(0, a, b)
	end
	local layer, ret, n = self:layer(a), {}, 0
	for i = b and signed_index(layer, b) or 1, c and signed_index(layer, c) or #layer do
		n = n + 1
		ret[n] = tostring(layer[i])
	end
	return concat(ret)
end

function Parser:emitted(delta)
	if delta == nil then
		delta = -1
	end
	local i = 0
	while true do
		local layer = self:layer(i)
		if layer == nil then
			return nil
		end
		local layer_len = #layer
		if -delta <= layer_len then
			return rawget(layer, layer_len + delta + 1)
		end
		delta = delta + layer_len
		i = i - 1
	end
end

function Parser:push(route)
	local layer = {
		head = self.head,
		route = route
	}
	self[#self + 1] = layer
	self[-1] = layer
end

function Parser:push_sublayer(handler, inherit)
	local sublayer = {
		handler = handler,
		sublayer = true
	}
	if inherit then
		local layer = self[-1]
		setmetatable(sublayer, inherit_metamethods({
			__index = layer,
			__newindex = layer
		}, getmetatable(layer)))
	end
	self[#self + 1] = sublayer
	self[-1] = sublayer
end

function Parser:pop()
	local len, layer = #self
	while true do
		layer = self[len]
		self[len] = nil
		len = len - 1
		local new = self[len]
		self[-1] = new == nil and self or new
		if layer.sublayer == nil then
			break
		end
		self:emit_tokens(layer)
	end
	return layer
end

function Parser:pop_sublayer()
	local len, layer = #self, self[-1]
	self[len] = nil
	local new = self[len - 1]
	self[-1] = new == nil and self or new
	setmetatable(layer, nil)
	layer.sublayer = nil
	return layer
end

function Parser:get(route, ...)
	self:push(route)
	local layer = route(self, ...)
	if layer == nil then
		layer = self:traverse()
	end
	return layer
end

function Parser:try(route, ...)
	local failed_layer = get_nested(self.failed_routes, route, self.head)
	if failed_layer ~= nil then
		return false, failed_layer
	end
	local layer = self:get(route, ...)
	return not layer.fail, layer
end

function Parser:consume(this, ...)
	local layer = self[-1]
	if this == nil then
		this = self:read()
	end
	return (layer.override or layer.handler)(self, this, ...)
end

function Parser:fail_route()
	local layer = self:pop()
	layer.fail = true
	set_nested(self, "failed_routes", layer.route, layer.head, layer)
	self.head = layer.head
	return layer
end

function Parser:traverse()
	while true do
		local layer = self:consume()
		if layer ~= nil then
			return layer
		end
		self:advance()
	end
end

-- Converts a handler into a switch table the first time it's called, which avoids creating unnecessary objects, and prevents any scoping issues caused by parser methods being assigned to table keys before they've been declared.
-- false is used as the default key.
do
	local Switch = {}
	
	function Switch:__call(parser, this)
		return (self[this] or self[false])(parser, this)
	end
	
	function Parser:switch(func, t)
		local layer = self[-1]
		-- Point handler to the new switch table if the calling function is the current handler.
		if layer.handler == func then
			layer.handler = t
		end
		return setmetatable(t, Switch)
	end
end

-- Generate a new parser class object, which is used as the template for any parser objects. These should be customized with additional/modified methods as needed.
function Parser:new_class()
	local t = {}
	t.__index = t
	return setmetatable(inherit_metamethods(t, self), self)
end

-- Generate a new parser object, which is used for a specific parse.
function Parser:new(text)
	return setmetatable({
		text = text,
		head = 1
	}, self)
end

function Parser:parse(data)
	local parser = self:new(data.text)
	local success, tokens = parser:try(unpack(data.route))
	if #parser > 0 then
		-- This shouldn't happen.
		error("Parser exited with non-empty stack.")
	elseif success then
		local node = data.node
		return true, node[1]:new(tokens, unpack(node, 2)), parser
	elseif data.allow_fail then
		return false, nil, parser
	end
	error("Parser exited with failed route.")
end

export.class_else_type = class_else_type
export.is_node = is_node
export.tostring = tostring

function export.new()
	return Parser:new_class(), Node:new_class("node")
end

return export