Module:User:Erutuon/lang stuff
Jump to navigation
Jump to search
- The following documentation is located at Module:User:Erutuon/lang stuff/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
Generates the tables of information about Wiktionary languages in User:Erutuon/language stuff, User:Erutuon/languages with no scripts, and User:Erutuon/otherNames.
local export = {}
local script_key = 4
local all_scripts
local function _link_script (script_code)
all_scripts = all_scripts or require "Module:scripts/data"
local script_data = all_scripts[script_code]
if not script_data then
error("No script with code " .. tostring(script_code) .. ".")
end
local name = script_data[1]
local last_word = name:match "%a+$":lower()
if last_word == "scripts" or last_word == "code" or last_word == "semaphore" then
return "[[:Category:" .. name .. "|" .. script_code .. "]]"
else
return "[[:Category:" .. name .. " script|" .. script_code .. "]]"
end
end
local cache = {}
local function link_script (script_code)
local link = cache[script_code]
if not link then
link = _link_script(script_code)
cache[script_code] = link
end
return link
end
local array_from_comma_list_or_array = require "Module:fun".memoize(function (list)
if type(list) == "table" then
return require "Module:array"(list)
end
local array = require "Module:array"()
for name in string.gmatch(list, "[^%s,]+") do
array:insert(name)
end
return array
end)
local function ToC_item(title)
return '<div style="overflow: hidden; height: 0; margin: 0; padding: 0;">\n=='
.. title .. '==\n</div>'
end
function export.lang_and_fam_name(frame)
local language_name_to_code = require "Module:languages/canonical names"
local family_data = require "Module:families/data"
local Map = require "Module:User:Erutuon/lang_stuff/map"
local is_combining = require "Module:Unicode data".is_combining
local fun = require "Module:fun"
local function link_name(name, family)
if family then
local catname = name:find "[Ll]anguages$" and name or name .. " languages"
return "[[:Category:" .. catname .. "|" .. catname .. "]]"
else
return "[[:Category:" .. (name:find "[Ll]anguage$" and name or name .. " language") .. "|" .. name .. "]]"
end
end
local get_sort_value = fun.memoize(function (canonical_name)
return mw.ustring.toNFD(canonical_name):gsub(
"[\194-\244][\128-\191]+",
function (nonASCII_char)
if is_combining(mw.ustring.codepoint(nonASCII_char)) then
return ""
end
end)
end)
local count = 0
local families_that_share_name_with_language = Map:new(family_data)
:filter(
function (data)
count = count + 1
return language_name_to_code[data[1]] ~= nil
end)
return ToC_item("Languages and language families with the same name")
.. '\n{| class="wikitable sortable"\n|+ ' .. count
.. ' pairs of languages and language families have the same canonical name\n! language !! code !! family !! code\n'
.. families_that_share_name_with_language
-- Convert to array and add language family code as "code" field in
-- data table.
:to_array("code")
:sort(
function (family1, family2)
return get_sort_value(family1[1]) < get_sort_value(family2[1])
end)
:map(
function (data)
local canonicalName = data[1]
return ("|-\n| %s || <code>%s</code> || %s || <code>%s</code>\n")
:format(link_name(canonicalName), language_name_to_code[canonicalName],
link_name(canonicalName, true), data.code)
end)
:concat()
.. '|}'
end
function export.number_of_scripts(frame, number)
local fun = require "Module:fun"
local m_table = require "Module:table"
local Map = require "Module:User:Erutuon/lang_stuff/map"
local Array = require "Module:array"
local minimum_number_of_scripts = number or tonumber(frame.args[1])
or error("Supply a number in parameter 1.")
local get_length = fun.memoize(m_table.length)
local languages = Map:new(require "Module:languages/data/all")
:filter(
function (data)
return data[script_key] and #array_from_comma_list_or_array(data[script_key]) >= minimum_number_of_scripts
end)
local count = languages:size()
return ToC_item("Number of scripts")
.. '\n{| class="wikitable sortable"\n|+ ' .. count .. ' languages use ' .. minimum_number_of_scripts
.. ' or more scripts\n! canonical name !! code !! script<br>count !! style="width: 8em;" | scripts\n'
.. languages
:filter(function(data, code) return not (code == "und" or code == "mul") end)
:map(
function (data, code)
local canonical_name = data[1]
local scripts = array_from_comma_list_or_array(data[script_key])
return ('|-\n| [[:Category:%s|%s]] || <code style="white-space: nowrap;">%s</code> || %d || %s\n')
:format(
canonical_name .. (canonical_name:find("language") and "" or " language"),
canonical_name,
code,
#scripts,
scripts:map(link_script):concat(", "))
end)
:sorted_concat()
.. "|}"
end
function export.census(frame)
local alldata = require "Module:languages/data/all"
local Map = require "Module:User:Erutuon/lang_stuff/map"
local count = require "Module:count":new()
for code, data in pairs(alldata) do
local module_key
if #code == 2 then
module_key = 2
elseif #code == 3 then
count[3] = count[3] + 1
module_key = 3 .. code:sub(1, 1)
else
module_key = "exceptional"
end
count[module_key] = count[module_key] + 1
end
return ToC_item('Languages in each module')
.. '\n{| class="wikitable sortable"\n|+ '
.. 'Total number of codes in each language data module\n! module !! count\n'
.. Map:new(count)
:map(
function(count, module_key)
local module
if module_key == 2 then
module = "data/2"
elseif module_key == 3 then
return ('|-\n| data-sort-value="%d" | three-letter codes || %d\n')
:format(module_key, count)
elseif module_key:sub(1, 1) == "3" then
module = "data/3/" .. module_key:sub(2, 2)
else
module = "data/exceptional"
end
return ('|-\n| data-sort-value="%s" | [[Module:languages/%s]] || %d\n')
:format(tostring(module_key), module, count)
end)
:sorted_concat()
.. '|}'
end
function export.exceptional_code_formats(frame)
local Map = require "Module:User:Erutuon/lang_stuff/map"
local language_data = Map:new(require "Module:languages/data/exceptional")
local function add(t, k, v)
local subtable = t[k]
if not subtable then
subtable = {}
t[k] = subtable
end
table.insert(subtable, v)
end
local codes_by_format = setmetatable({}, { __index = Map:new{ add = add } })
for code in language_data:sorted_pairs() do
local code_repr = code:gsub("[^-]", "a")
codes_by_format:add(code_repr, code)
end
local function get_sort_value(code_repr)
return code_repr:gsub(
"[^-]+",
string.len)
end
local function compare(code_repr1, code_repr2)
return get_sort_value(code_repr1) < get_sort_value(code_repr2)
end
return ToC_item('Exceptional code formats')
.. '\n{| class="wikitable sortable"\n|+ '
.. 'Code formats in [[Module:languages/data/exceptional]]\n! format !! count\n'
.. codes_by_format
:map(
function(codes, code_repr)
codes = Map:new(codes)
return ('|-\n| <code>%s</code> || title="%s" | %d\n'):format(
code_repr,
codes:sorted_concat(", "),
#codes)
end)
:sorted_concat("", compare)
.. '|}'
end
function export.script_combinations(frame)
local Array = require "Module:array"
local Map = require "Module:User:Erutuon/lang_stuff/map"
local language_data = require "Module:languages/data/all"
local function add(t, k, lang_code)
local subtable = t[k]
if not subtable then
subtable = {}
t[k] = subtable
end
table.insert(subtable, lang_code)
end
local script_combinations = setmetatable({}, { __index = Map:new{ add = add } })
for code, data in pairs(language_data) do
if not (code == "und" or code == "mul") then
local script_list = data[script_key]
if script_list == nil then
script_combinations:add("None", code)
else
script_combinations:add(array_from_comma_list_or_array(script_list):concat ", ", code)
end
end
end
local count = script_combinations:size()
local number_of_languages_in_tooltip = 80
local function display_language(language_code)
return language_data[language_code][1] .. " (" .. language_code .. ")"
end
return ToC_item('Script combinations')
.. [[
{| class="wikitable sortable"
|+ ]] .. count .. [[ script combinations (sorted alphabetically) and the number of languages that use them
! style="width: 8em;" | script list !! script<br>count !! languages
]]
.. script_combinations
:map(
function (languages, script_list)
-- Count alphabetic characters at beginning of string or after |.
local script_count
script_list, script_count = script_list:gsub("[^, ]+", link_script)
local language_count = #languages
local language_list = languages[2]
and Array(languages)
:sort()
:slice(1, number_of_languages_in_tooltip)
:map(display_language)
:concat ", "
or display_language(languages[1])
if languages[number_of_languages_in_tooltip + 1] then
language_list = language_list .. ", ..."
end
return ('|-\n| %s || %d || title="%s" | %d\n')
:format(script_list, script_count, language_list,
language_count)
end)
:sorted_concat(
"",
function (script_list1, script_list2)
return script_list1:lower() < script_list2:lower()
end)
.. '|}'
end
function export.count_data_items(frame)
local counts = require "Module:count":new()
local Map = require "Module:User:Erutuon/lang_stuff/map"
for _, data in pairs(require "Module:languages/data/all") do
for k in pairs(data) do
counts[k] = counts[k] + 1
end
counts.total = counts.total + 1
end
local info = { "canonical name", "Wikidata item", "family" }
return ToC_item('Data item census')
.. [[
{| class="wikitable sortable"
|+ Number of languages with each data item in their table
! data item !! count
|-
]]
.. Map:new(counts)
:map(
function (count, data_key)
if data_key == "total" then
return ("| total<br>languages || %d"):format(count)
elseif info[data_key] then
return ("| <code>%s</code> (%s) || %d"):format(data_key, info[data_key], count)
else
return ("| <code>%s</code> || %d"):format(data_key, count)
end
end)
:sorted_concat(
"\n|-\n",
function (data_key1, data_key2)
if data_key1 == "total" then -- Ensure "total languages" shows at the top.
return true
else
return counts[data_key1] > counts[data_key2]
end
end)
.. "\n|}"
end
function export.no_scripts(frame)
local Map = require "Module:User:Erutuon/lang_stuff/map"
return ToC_item('Languages with no scripts') ..
[[
{| class="wikitable sortable"
|+ Languages with no scripts
! code !! name !! module
|-
]] .. Map:new(require "Module:languages/data/all")
:filter(
function (data)
return data[script_key] == nil
end)
:map(
function(data, code)
local name = data[1]
local article = data.wikipedia_article
or data.wikidata_item and mw.wikibase.sitelink(data.wikidata_item, 'enwiki')
or name:find("[Ll]anguage") and name
or name .. " language"
local module =
#code == 3 and "data/3/" .. code:sub(1, 1)
or #code == 2 and "data/2"
or "data/exceptional"
return ('| %s || [[w:%s|%s]] || [[Module:languages/%s|%s]]')
:format(code, article, name, module, module)
end)
:sorted_concat("\n|-\n")
.. "\n|}"
end
function export.entry_name_replacements(frame)
local Array = require "Module:array"
local Map = require "Module:User:Erutuon/lang_stuff/map"
local add_dotted_circle = require "Module:Unicode data".add_dotted_circle
local function script_tag(script_code, str)
return '<span class="' .. script_code .. '">' .. str .. '</span>'
end
local function show_from_or_to(from_or_to, script_code)
if not (from_or_to and from_or_to[1]) then return "" end
return script_tag(script_code, from_or_to[2]
and add_dotted_circle(Array(from_or_to):concat ", ")
or from_or_to[1])
end
local header = 'Languages with entry name replacements'
return ToC_item(header) .. ([[
{| class="wikitable sortable"
|+ header
! language !! script !! replacements
|-
]]):gsub('header', header)
.. Map:new(require "Module:languages/data/all")
:filter(
function (data)
return type(data.entry_name) == "table"
end)
:map(
function (data, code)
local output = Array()
for script, replacements in pairs(data.entry_name) do
if replacements.from then
local script = require "Module:languages".getByCode(code)
:findBestScript(Array(replacements.from):concat())
local script_code = script:getCode()
output:insert(('|-\n| %s (<code>%s</code>) || %s || %s<br>↓<br>%s')
:format(data[1], code,
link_script(script:getCode()),
show_from_or_to(replacements.from, script_code),
show_from_or_to(replacements.to, script_code)))
end
end
return output:concat("\n")
end)
:sorted_concat "\n|-\n"
.. "\n|}"
end
function export.wikimedia_languages(frame)
local fun = require "Module:fun"
local languages_with_Wikimedia_code = setmetatable({},
{
__index = function (self, key)
local val = {}
self[key] = val
return val
end,
})
local language_data = require "Module:languages/data/all"
for code, data in pairs(language_data) do
if data.wikimedia_codes then
for wikimedia_code in data.wikimedia_codes:gmatch "[^%s,]+" do
table.insert(languages_with_Wikimedia_code[wikimedia_code],
code)
end
end
end
for _, codes in pairs(languages_with_Wikimedia_code) do
if codes[2] then
table.sort(codes)
end
end
return ToC_item("Wiktionary languages by Wikimedia language")
.. [[
{| class="wikitable sortable"
|+ Languages by their Wikimedia language
! Wikimedia language !! Wiktionary language
]] .. table.concat(
fun.mapIter(
function (Wiktionary_codes, Wikimedia_code)
return ("|-\n| [https://%s.wiktionary.org %s] || %s"):format(
Wikimedia_code,
Wikimedia_code,
table.concat(
fun.map(
function (code)
return ("%s ([[:Category:%s language|%s]])"):format(
code,
language_data[code][1], -- canonical name
language_data[code][1])
end,
Wiktionary_codes),
", "))
end,
require "Module:table".sortedPairs(languages_with_Wikimedia_code)),
"\n")
.. "\n|}"
end
function export.ambiguous_names(frame)
local Map = require "Module:User:Erutuon/lang stuff/map"
local Array = require "Module:array"
local language_data = require "Module:languages/data/all"
local language_objects = require "Module:languages/cache"
local name_to_object = {}
setmetatable(name_to_object, {
__index = function (self, key)
local val = Array()
self[key] = val
return val
end
})
for code, data in pairs(language_data) do
local canonical_name = data[1]
name_to_object[canonical_name]:insert(code)
if data.otherNames then
for _, name in ipairs(data.otherNames) do
name_to_object[name]:insert(code)
end
end
if data.aliases then
local function add_aliases(aliases)
for _, alias in ipairs(aliases) do
if type(alias) == "table" then
add_aliases(alias)
else
name_to_object[alias]:insert(code)
end
end
end
add_aliases(data.aliases)
end
end
return ToC_item("Languages with ambiguous canonical or non-canonical names")
.. [[
{| class="wikitable sortable"
|+ Canonical or non-canonical names that correspond to more than one language
! name !! languages]]
.. Map:new(name_to_object)
:filter(function (languages) return #languages > 1 end)
:map(
function (lang_codes, name)
local languages = Array(lang_codes)
:map(function (lang_code) return language_objects[lang_code] end)
:sort(function (lang1, lang2)
return lang1:getCanonicalName() < lang2:getCanonicalName()
end)
:map(function (lang)
return (lang:getCanonicalName() == name
and "<mark>[[:Category:%s|%s]]</mark> (<code>%s</code>)"
or "[[:Category:%s|%s]] (<code>%s</code>)")
:format(lang:getCategoryName(), lang:getCanonicalName(), lang:getCode())
end)
:concat(", ")
return ("\n|-\n| %s || %s"):format(name, languages)
end)
:sortedConcat()
.. "\n|}"
end
function export.languages_with_prefix_of_another_language(frame)
local Array = require "Module:array"
local all_languages = require "Module:languages/data/all"
local language_name_to_code = require "Module:languages/canonical names"
local make_language_object = require "Module:languages".makeObject
local function get_category_name(canonical_name)
return canonical_name:find("[Ll]anguage$") and canonical_name
or canonical_name .. " language"
end
local function make_category_link(canonical_name)
return ("[[:Category:%s|%s]]")
:format(get_category_name(canonical_name), canonical_name)
end
return require "Module:User:Erutuon/lang stuff/map":new(all_languages)
:map(function (data, code)
local name = data[1]
local words = mw.text.split(name, " ", true)
for i = #words - 1, 1, -1 do
local prefix = table.concat(words, " ", 1, i)
if language_name_to_code[prefix] then
data.prefixes = data.prefixes or {}
table.insert(data.prefixes, prefix)
end
end
return data
end)
:filter(function (data, code)
return data.prefixes ~= nil
end)
:map(function (data, code)
local name = data[1]
return ("* %s (<code>%s</code>): %s")
:format(make_category_link(name), code,
Array(data.prefixes)
:map(make_category_link)
:concat(", "))
end)
:sorted_concat("\n")
end
function export.languages_with_otherNames_field(frame)
local Array = require "Module:array"
local all_languages = require "Module:languages/data/all"
local make_language_object = require "Module:languages".makeObject
local function get_category_name(canonical_name)
return canonical_name:find("[Ll]anguage$") and canonical_name
or canonical_name .. " language"
end
local function make_category_link(canonical_name)
return ("[[:Category:%s|%s]]")
:format(get_category_name(canonical_name), canonical_name)
end
local get_data_module = require "Module:languages".getDataModuleName
local function module_link(code)
local module = get_data_module(code)
return "[[Module:" .. module .. "|" .. module:gsub("languages/", "") .. "]]"
end
return [[
{| class="wikitable sortable"
|+ Languages with <code>otherNames</code> field in their language data
! name !! code !! otherNames !! module
]] .. require "Module:User:Erutuon/lang stuff/map":new(all_languages)
:filter(function (data)
return data.otherNames ~= nil
end)
:map(function (data, code)
local name = data[1]
return ("|-\n| %s || <code>%s</code> || %s || %s\n")
:format(make_category_link(name), code,
Array(data.otherNames)
:concat(", "),
module_link(code))
end)
:sorted_concat()
.. "|}"
end
function export.languages_with_odd_translit_modules(frame)
local Array = require "Module:array"
local all_languages = require "Module:languages/data/all"
local function get_category_name(canonical_name)
return canonical_name:find("[Ll]anguage$") and canonical_name
or canonical_name .. " language"
end
local function make_category_link(canonical_name)
return ("[[:Category:%s|%s]]")
:format(get_category_name(canonical_name), canonical_name)
end
local caption = "Languages with odd transliteration modules (not beginning with language or script code)"
return ToC_item(caption) .. [[
{| class="wikitable sortable"
|+ ]] .. caption .. [[
! name !! code !! script !! transliteration<br>module
]] .. require "Module:User:Erutuon/lang stuff/map":new(all_languages)
:filter(function (data, code)
return data.translit ~= nil
end)
:map(function (data, code)
local output
for script_code, translit in pairs(type(data.translit) == "table" and data.translit or {data.translit}) do
if type(translit) == "string" and translit ~= nil
and not (translit == code .. "-translit"
or translit == "translit-redirect"
or (data[script_key] and array_from_comma_list_or_array(data[script_key])
:some(function(script_code)
return translit:find("^.*" .. script_code:gsub("%-", "%%-") .. ".*%-translit$")
end))
)
then
local name = data[1]
output = output or Array()
output:insert(("|-\n| %s || <code>%s</code> || %s || [[Module:%s]]\n")
:format(
make_category_link(name), code,
type(script_code) == "string"
and assert(require "Module:scripts".getByCode(script_code), script_code):makeCategoryLink()
or "",
translit))
end
end
return output and output:concat "\n" or ""
end)
:sorted_concat()
.. "|}"
end
function export.data_item_combinations(frame)
local Array = require "Module:array"
local Map = require "Module:User:Erutuon/lang stuff/map"
local all_languages = {}
local function transfer(module_subpage)
for code, data in pairs(require("Module:languages/" .. module_subpage)) do
all_languages[code] = data
end
end
transfer "data/2"
for b = ("a"):byte(), ("z"):byte() do
transfer("data/3/" .. string.char(b))
end
transfer "data/exceptional"
local data_key_order = Array.keys(Map:new(all_languages)
:values()
:fold(
function(set, data)
for k in pairs(data) do
set[k] = true
end
return set
end,
{})):invert()
local function make_sortkey(data_key_list)
local sortkey = Array()
for _, key in pairs(data_key_list) do
local order = assert(data_key_order[key])
local char = string.char(("a"):byte() + order - 1)
sortkey:insert(char)
end
return sortkey:concat()
end
local data_keys = Map:new(all_languages)
:map(
function (data)
return Array.keys(data)
end)
:values()
local data_key_counts = data_keys
:fold(
function (counts, data_items)
local key = data_items:concat ", "
counts[key] = (counts[key] or {})
counts[key].count = (counts[key].count or 0) + 1
counts[key].items = data_items
return counts
end,
Map:new())
:values()
data_key_counts = data_key_counts:sort(
function(counts1, counts2)
return counts1.count > counts2.count
end)
local caption = "Count of each combination of data items"
return ToC_item(caption) .. [[
{| class="wikitable sortable"
|+ ]] .. caption .. [[
! combination<br>of data keys !! number of languages
]] .. data_key_counts
:map(
function(count)
return ('|-\n| data-sort-value="%s" | %s || %d\n'):format(make_sortkey(count.items), count.items:concat ", ", count.count)
end)
:concat()
.. "|}"
end
function export.show(frame)
local out = {}
for i, function_name in ipairs(frame.args) do
local func = export[function_name] or error("No exported function " .. function_name)
xpcall(function()
if function_name == "number_of_scripts" then
out[i] = func(frame, 3)
else
out[i] = func(frame)
end
end, function (err)
mw.addWarning("Error running export." .. function_name .. ":\n" .. tostring(err) .. "\n" .. debug.traceback())
end)
end
return table.concat(out, "\n")
end
return export