Module:User:Surjection/unpacker
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local export = {}local export = {}
--local data = "\001fi\004foo\019bar\001"
--description of format:
--\001TOP_LEVEL_KEY\004 -> used to search a top level key
--value is expected to be a table. table format:
-- a key followed by \001, \003, \016-\020.
-- if key is empty, the key is taken as the next available numeric index
-- if the key is only \002, it is taken as the empty string
-- if it starts with \002, the rest of the key is parsed as a number.
-- if it starts with \005, the character code of the next character is used as a numeric index into common_keys
-- otherwise it is taken as a string key.
-- \001 means "end of all tables, a top-level key is up next"
-- \003 means "end of this table only"
-- \016-\020 are data types.
-- \016 is NIL (followed by nothing),
-- \017 is BOOL (followed by one byte, 0 = false, anything else is true)
-- \018 is NUMBER. anything until a \001 (end of top-level table), \002 (next index), \003 (end of this table), \005 (compressed key), \016-\020 (data type), \031 (end of value, if ambiguous otherwise i.e. if a string key follows) is captured and converted into a number.
-- \019 is STRING. anything until a \001 (end of top-level table), \002 (next index), \003 (end of this table), \005 (compressed key), \016-\020 (data type), \031 (end of value, if ambiguous otherwise i.e. if a string key follows) is captured and stored as a string.
-- \020 is TABLE and starts a nested table. go back to step 1, expecting a key. \003 ends a nested table, \001 ends all tables.
-- no \031 should follow, we know when a table ends and don't need a special terminator.
--\001 -> end of top-level keys
--in theory, we could have an escape code to allow these characters in keys and strings. that is not implemented, because there is currently no need.
local common_keys = {
"from", "remove_diacritics",
"type", "ancestors", "wikimedia_codes", "wikipedia_article", "translit",
"link_tr", "display_text", "entry_name", "sort_key", "dotted_dotless_i",
"standardChars"
}
local data = "\001fi\004\019Finnish\0181412\019urj-fin\019Latn\005\010\020\005\001\020\019’\003to\020\019'\003\005\002\019ˣ\003\005\011\020\005\001\020\019ø\019æ\019œ\019ß\003to\020\019o\019ae\019oe\019ss\003\005\002\019̧̀́̂̃̋̌':\003\005\003\019regular\005\009\020to\020\019’\003\005\001\020\019'\003\003\005\013\019AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö ',%-–…∅\001"
local function unpack_row(packed, index, common_keys)
local result = {}
local packed_len = packed:len()
while index <= packed_len do
-- find key
local table_key_prefix = packed:byte(index)
local table_key, table_value_type, table_value
if table_key_prefix == 5 then
table_key = common_keys[packed:byte(index + 1)]
index = index + 3
else
table_key = packed:match("([^\001\003\016-\020]*)", index)
if not table_key then break end
index = index + table_key:len() + 1
end
table_value_type = packed:byte(index - 1) - 16
if table_value_type < 0 then -- end of table
if table_value_type == -15 then -- support nested end of table: \001 is end of top-level value - do not consume it
index = index - 1
end
break
end
if table_key:len() == 0 then
-- next number
table_key = #result + 1
elseif table_key:byte() == 2 then
-- \002 is a numeric key
table_key = tonumber(table_key:sub(2)) or ""
end
if table_value_type == 0 then -- \016 NIL
table_value = nil
elseif table_value_type == 1 then -- \017 BOOL
result[table_key] = packed:byte(index) > 0
index = index + 1
elseif table_value_type == 4 then -- \020 TABLE
table_value, index = unpack_row(packed, index, common_keys)
else
local capture = packed:match("([^\001-\003\005\016-\020\031]*)", index)
if table_value_type == 2 then -- \018 NUMBER
table_value = tonumber(capture)
else--if table_value_type == 3 then -- \019 STRING
table_value = capture
end
index = index + capture:len()
if packed:byte(index) == 31 then -- skip value separator
index = index + 1
end
end
result[table_key] = table_value
end
return result, index
end
function export.find_key(key)
local regex = "\001" .. key .. "\004"
local index, end_index = data:find(regex)
if not index then error("Key not found") end
return (unpack_row(data, end_index + 1, common_keys))
end
return export