Module:User:Erutuon/split language data modules
Appearance
local export = {}
-- Version of deepcopy from [[Module:table]] that doesn't preserve
-- references to the same table. If tables are recursive, this will cause
-- a stack overflow!
local function deepcopy(orig, level)
if type(orig) == 'table' then
local copy = {}
for orig_key, orig_value in pairs(orig) do
copy[deepcopy(orig_key)] = deepcopy(orig_value)
end
return copy
else -- number, string, boolean, etc
return orig
end
end
-- Load data module containing multiple language data tables.
-- mw.text.jsonEncode converts integer keys to strings if the data table
-- contains any string keys. Have to convert them back.
function export.load_json_language_data(json)
local data = mw.text.jsonDecode(json)
local fixed_data = {}
for code, old_table in pairs(data) do
-- Key 1 (canonical name) is always present in the original
-- language data, so key "1" is always present in old_table
-- if it was decoded from a JSON object.
if old_table["1"] then
local new_table = {}
for k, v in pairs(old_table) do
k = tonumber(k) or k
new_table[k] = v
end
fixed_data[code] = new_table
else
fixed_data[code] = old_table
end
end
return fixed_data
end
function export.split(key_maker)
local tables_by_key = {}
for code, data in pairs(require "Module:languages/data/all") do
local key = key_maker(code, data)
local subtable = tables_by_key[key]
if not subtable then
subtable = {}
tables_by_key[key] = subtable
end
-- mw.text.jsonEncode refuses to directly encode the language data
-- because of "circular references", which probably means
-- the scripts fields that refer to the same tables containing
-- {"Latn"}, {"Cyrl"}, {"Arab"}.
-- Sequence tables (only canonical name, Wikidata item, and family)
-- are encoded as JSON arrays, others as objects.
-- Thus the number-indexed values will be under either string or
-- number fields. Thus keys should be processed using
-- tonumber(key) or key
-- or the equivalent on the other side.
subtable[code] = deepcopy(data)
end
return mw.text.jsonEncode(tables_by_key)
end
function export.split_by_two_letter_prefix(frame)
return export.split(
function(code)
return code:sub(1, 2)
end)
end
function export.show(frame)
local all_languages = export.split_by_two_letter_prefix()
return "length: " .. #all_languages
.. "\n\n"
.. all_languages
end
return export