Module:family tree/etymology languages
Jump to navigation
Jump to search
- The following documentation is located at Module:family tree/etymology languages/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
Creates a version of Module:etymology languages/data in which there is just one code per language. Codes that have the same language data are reduced to the most language-codiest one. For instance, Austrian German has three codes (Austrian German
, AG.
, de-AT
) by which it can be accessed in etymology templates like {{cog}}
. de-AT
is chosen as the only language code for Austrian German because it looks the most like a language code.
The following criteria are used successively to weed out candidates for language-codiest code:
- The code must consist of letters and hyphens.
- The code must not contain an uppercase letter followed by a lowercase letter.
- The code must be as short as possible.
This list shows those etymology languages that have multiple codes, together with the code chosen by this module and the other codes:
local language_codes = require "Module:languages/code to canonical name"
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer (Guernsey and Jersey).
-- Adds 2 to the weighting.
local function isLangCode(code)
if language_codes[code:match("^%l+")] then return 1 else return 0 end
end
-- Order of preference:
-- xx, xxx, xx-xxx, xxx-xxx, xx-xxx-xxx, xxx-xxx-xxx, xx-XX, xxx-XX, xx-XX-xxx, xxx-XX-xxx
-- Language codes are preferred over family codes of the same format.
local function determine_preferred_etymology_language_code(code1, code2)
local function weighting(code)
if code:find("^%l%l$") then
return 14
elseif code:find("^%l%l%l$") then
return 13
elseif code:find("^%l%l%-%l%l%l$") then
return 12
elseif code:find("^%l%l%l%-%l%l%l$") then
return 10 + isLangCode(code)
elseif code:find("^%l%l%-%l%l%l%-%l%l%l$") then
return 9
elseif code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
return 7 + isLangCode(code)
elseif code:find("^%l%l%-%u%u$") then
return 6
elseif code:find("^%l%l%l%-%u%u$") then
return 4 + isLangCode(code)
elseif code:find("^%l%l%-%u%u%-%l%l%l$") then
return 3
elseif code:find("^%l%l%l%-%u%u%-%l%l%l$") then
return 1 + isLangCode(code)
else
return 0
end
end
local weighting1, weighting2 = weighting(code1), weighting(code2)
if weighting1 > weighting2 then
return code1
elseif weighting1 < weighting2 then
return code2
elseif #code1 < #code2 then
return code1
elseif #code1 > #code2 then
return code2
-- If all else fails, use alphabetical order.
elseif code1 > code2 then
return code2
else
return code1
end
end
local function fold(t, accum, func)
for k, v in pairs(t) do
accum = func(k, v, accum)
end
return accum
end
local function invert(t)
local inverted = {}
for k, v in pairs(t) do
inverted[v] = k
end
return inverted
end
return invert(fold(
require "Module:etymology languages/data",
{},
function (code, data, data_to_code)
if data_to_code[data] then
local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
data_to_code[data] = preferred_code
table.insert(data.codes, code)
else
data_to_code[data] = code
data.codes = { code }
end
return data_to_code
end))