Module:User:Benwing2/category tree/poscatboiler/data/language varieties
Jump to navigation
Jump to search
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Benwing2, for their own experimentation. Items in this module may be added and removed at Benwing2's discretion; do not rely on this module's stability.
local raw_categories = {}
local raw_handlers = {}
local m_languages = require("Module:languages")
local m_table = require("Module:table")
local parse_utilities_module = "Module:parse utilities"
local pattern_utilities_module = "Module:pattern utilities"
local labels_module = "Module:labels"
local labels_utilities_module = "Module:labels/utilities"
local rsplit = mw.text.split
local function track(page)
-- [[Special:WhatLinksHere/Wiktionary:Tracking/poscatboiler/languages/PAGE]]
return require("Module:debug/track")("poscatboiler/language-varieties/" .. page)
end
local function pattern_escape(pattern)
return require(pattern_utilities_module).pattern_escape(pattern)
end
-- This module handles lect/variety categories of all sorts, e.g. regional lect categories such as
-- [[:Category:American English]] and [[:Category:Provençal]]; temporal lect categories such as
-- [[:Category:Early Modern English]]; sociolect categories such as [[:Category:Polari]]; and umbrella categories of the
-- form e.g. [[:Category:Varieties of English]] and [[:Category:Regional French]].
-- FIXME: Eliminate the word "dialect" here and in the {{auto cat}} parameter in favor of "lect" or "variety".
-----------------------------------------------------------------------------
-- --
-- RAW CATEGORIES --
-- --
-----------------------------------------------------------------------------
raw_categories["Language varieties"] = {
description = "Categories that group terms in varieties of various languages (regional, temporal, sociolectal, etc.).",
additional = "{{{umbrella_meta_msg}}}",
parents = {
"Fundamental",
},
}
raw_categories["Regionalisms"] = {
description = "Categories that group terms in regional varieties of various languages.",
additional = "{{{umbrella_meta_msg}}}",
parents = {
"Fundamental",
"Language varieties",
},
}
-----------------------------------------------------------------------------
-- --
-- RAW HANDLERS --
-- --
-----------------------------------------------------------------------------
local function split_on_comma(term)
if term:find(",%s") then
return require(parse_utilities_module).split_on_comma(term)
else
return rsplit(term, ",")
end
end
local function ucfirst(text)
return mw.getContentLanguage():ucfirst(text)
end
local function lcfirst(text)
return mw.getContentLanguage():lcfirst(text)
end
-- Handle categories such as [[:Category:Varieties of French]] and [[:Category:Varieties of Ancient Greek]].
table.insert(raw_handlers, function(data)
local langname = data.category:match("^Varieties of (.*)$")
if langname then
local lang = require("Module:languages").getByCanonicalName(langname)
if lang then
return {
lang = lang:getCode(),
description = "Categories containing terms in varieties of " .. lang:makeCategoryLink() .. " (regional, temporal, sociolectal, etc.).",
parents = {
"{{{langcat}}}",
{name = "Language varieties", sort = langname},
},
breadcrumb = "Varieties",
}
end
end
end)
-- Handle categories such as [[:Category:Regional French]] and [[:Category:Regional Ancient Greek]].
table.insert(raw_handlers, function(data)
local langname = data.category:match("^Regional (.*)$")
if langname then
local lang = require("Module:languages").getByCanonicalName(langname)
if lang then
return {
lang = lang:getCode(),
description = "Categories containing terms in regional varieties of " .. lang:makeCategoryLink() .. ".",
additional = "This category sometimes also directly contains terms that are uncategorized regionalisms: such terms should be recategorized by the particular regional variety they belong to, or categorized as dialectal.",
parents = {
"Varieties of {{{langname}}}",
{name = "Regionalisms", sort = langname},
},
breadcrumb = "Regional",
}
end
end
end)
-- Fancy version of ine() (if-not-empty). Converts empty string to nil, but also strips leading/trailing space.
local function ine(arg)
if not arg then return nil end
arg = mw.text.trim(arg)
if arg == "" then return nil end
return arg
end
-- Get the full language to return in the settings.
local function get_returnable_lang(lang)
if lang:hasType("family") then
return "und"
else
return lang:getFullCode()
end
end
local function infer_region_from_lang(pagename, lang)
-- Try to figure out the region (used as the default breadcrumb and region description) from the language. If the
-- language name is an etymology-only language, try to derive a region based on a parent etymology-only or full
-- language. For example, if the pagename is '[[:Category:British English]]', the language is 'en-GB' (British English)
-- and the same as the pagename, but we'd like to return a region 'British'. This is also called in cases where the
-- language is explicitly given but we need to infer the region from the parent language; e.g.
-- [[:Category:Lucerne Alemmanic German]] is a type of High Alemannic German but we want to infer 'Lucerne' based on
-- the parent 'Alemannic German'. If this doesn't work and the language name has a space in it, we try using
-- progressively smaller suffixes of the language. For example, for [[:Category:Walser German]]', the language is
-- 'wae' (Walser German), but the parent is 'Highest Alemannic German', whose parent is 'Alemannic German' (a full
-- language), and just "German" is nowhere in the parent-child relationships but found as a suffix in the parent
-- language. Another such case is with [[:Category:Ionic Greek]], whose parent is 'Ancient Greek'.
local langname = lang:getCanonicalName()
local lang_to_check = lang
if ucfirst(langname) == pagename then
lang_to_check = lang_to_check:getParent()
end
-- First check against the language name and progressively smaller suffixes; then repeat for any parents (of etymology
-- languages). If the language name is the same as the page name, we need to start with the parent; otherwise we will
-- always match against a suffix, but that's not what we want.
while lang_to_check do
local suffix = lang_to_check:getCanonicalName()
while true do
region = pagename:match("^(.*) " .. pattern_escape(suffix) .. "$")
if region then
return region
end
suffix = suffix:match("^.- (.*)$")
if not suffix then
break
end
end
lang_to_check = lang_to_check:getParent()
end
return nil
end
-- Modeled after splitLabelLang() in [[Module:auto cat]]. Try to split off a maximally long language (full or
-- etymology-only) on the right, and return the resulting language object and the region preceding it. We need to
-- check the maximally long language because of cases like 'English' vs 'Middle English' and 'Chinese Pidgin English';
-- [[:Category:Late Middle English]] should split as 'Late' and 'Middle English', not as 'Late Middle' and 'English'.
local function split_region_lang(pagename)
local getByCanonicalName = require("Module:languages").getByCanonicalName
local canonical_name
local lang
local region
-- Try the entire title as a language; if not, chop off a word on the left and repeat.
local words = mw.text.split(pagename, " ")
for i = 1, #words do
canonical_name = table.concat(words, " ", i, #words)
lang = getByCanonicalName(canonical_name, nil, "allow etym", "allow family")
if not lang then
-- Some languages have lowercase-initial names e.g. 'the BMAC substrate', but the category begins with an
-- uppercase letter.
lang = getByCanonicalName(lcfirst(canonical_name), nil, "allow etym", "allow family")
end
if lang then
if i == 1 then
region = nil
else
region = table.concat(words, " ", 1, i - 1)
end
break
end
end
if not region and lang then
-- The pagename is the same as a language name. Try to infer the region from the parent. See comment at function.
region = infer_region_from_lang(pagename, lang)
end
return lang, region
end
-- Return the default parent cat for the given language and category. If the language and category are the same, we're
-- dealing with the overall cat for an etymology-only language, so use the category of the parent language; otherwise
-- we're dealing with a subcategory of a regular or etymology-only language (e.g. [[:Category:Issime Walser]], a
-- subcategory of [[:Category:Walser German]]), so use the language's category itself. If the resulting language is an
-- etymology-only language or a family, the parent category is that language or family's category, which for
-- etymology-only languages is named the same as the etymology-only language, and for families is named
-- "FAMILY languages"; otherwise, use "Regional LANG" as the category unless `noreg` is given, in which case we use
-- "Varieties of LANG".
local function get_default_parent_cat_from_category(category, lang, noreg)
if lang:getCode():find("^qsb%-") then
-- substrate
return "Substrate languages"
end
local lang_for_cat
if ucfirst(lang:getCanonicalName()) == category then
lang_for_cat = lang:getParent()
if not lang_for_cat then
error(("Category '%s' has a name the same as a full language; you probably need to explicitly specify a different language using |lang="):format(category))
end
else
lang_for_cat = lang
end
if lang_for_cat:hasType("etymology-only") or lang_for_cat:hasType("family") then
return lang_for_cat:getCategoryName()
elseif noreg then
return "Varieties of " .. lang_for_cat:getCanonicalName()
else
return "Regional " .. lang_for_cat:getCanonicalName()
end
end
-- Given a category (without the "Category:" prefix), look up the page defining the category, find the call to
-- {{auto cat}} (if any), and return a table of its arguments. If the category page doesn't exist or doesn't have
-- an {{auto cat}} invocation, return nil.
local function scrape_category_for_auto_cat_args(cat)
local cat_page = mw.title.new("Category:" .. cat)
if cat_page then
local contents = cat_page:getContent()
if contents then
for name, args in require("Module:template parser").findTemplates(contents) do
-- The template parser automatically handles redirects and canonicalizes them, so uses of {{autocat}}
-- will also be found.
if name == "auto cat" then
return args
end
end
end
end
return nil
end
-- Find the labels that categorize into `category`. Only categories specified using the `regional_categories` and
-- `plain_categories` fields will be returned. `lang` is the language object to use when looking up categories specified
-- using the `regional_categories` field, which append the language onto the specified category prefix. If `lang` is a
-- family or is omitted, no categories specified using `regional_categories` will be returned. Lang-specific modules for
-- all languages will be checked for matching labels that specify `category` as their category using `plain_categories`;
-- this helps e.g. with varieties of Chinese, whose labels are found in [[Module:labels/data/lang/zh]]. The return value
-- is a table in the same format as returned by `find_labels_for_category` in [[Module:labels/utilities]].
local function find_labels_for_category(category, lang)
local regional_cat_labels, plain_cat_labels
local full_lang
local m_labels_utilities = require(labels_utilities_module)
if lang and lang:hasType("language") then
full_lang = lang:getFull()
local regional_component = category:match("^(.-) " .. pattern_escape(full_lang:getCanonicalName()) .. "$")
if regional_component then
regional_cat_labels = m_labels_utilities.find_labels_for_category(regional_component,
"regional", full_lang)
end
end
plain_cat_labels = m_labels_utilities.find_labels_for_category(category, "plain", full_lang, "check all langs")
local all_labels
if regional_cat_labels and plain_cat_labels then
all_labels = regional_cat_labels
for k, v in pairs(plain_cat_labels) do
all_labels[k] = v
end
else
all_labels = regional_cat_labels or plain_cat_labels
end
return all_labels
end
-- Find the labels for category `category` and language object `lang`. Then filter them down to those that are specified
-- using a lang-specific module and sort them for use in checking properties such as parent and description. We filter
-- down to only lang-specific labels because those specified in a general module (especially
-- [[Module:labels/data/regional]]) won't be able to have proper descriptions and especially parents, which tend to be
-- language-specific. The sort order prioritizes labels that match the category exactly (either through the canonical
-- version or any alias); this is followed by labels that are a prefix of the category (again, either through the
-- canonical version or any alias), so that labels whose categories are specified using `regional_categories` are
-- prioritized. Any other labels are sorted last, so that e.g. if both the label "Alberta" and "Canada" (with alias
-- "Canadian") for lang=en categorize into [[:Category:Canadian English]], we prefer the label "Canada". For cases where
-- e.g. both labels match the category as prefixes, ties are broken by prioritizing the labels found in the
-- lang-specific module whose language matches `lang`.
--
-- Returns two items. The first is a table of all labels categorizing into `category` (subject to the provisos described
-- in `find_labels_for_category()`), in the same format as returned by `find_labels_for_category` in
-- [[Module:labels/utilities]]. (Specifically, the values are objects containing all relevant information on a given
-- label, and the keys are less important.) The second is a list of label objects after filtering and sorting, in the
-- same format as the values in the `all_labels` table. The first return value will be nil if no labels could be found
-- categorizing into `category`, and the second return value will be nil if no labels remain after filtering.
local function get_sorted_labels(category, lang)
local all_labels = find_labels_for_category(category, lang)
if not all_labels then
return nil
end
local m_labels = require(labels_module)
local lang_specific_pattern = "^" .. pattern_escape(m_labels.lang_specific_data_modules_prefix)
local sorted_labels = {}
for _, labelobj in pairs(all_labels) do
if labelobj.module:find(lang_specific_pattern) then
table.insert(sorted_labels, labelobj)
end
end
local function sort_labelobj(a, b)
local function matches_exactly(labelobj)
if labelobj.canonical == category then
return true
end
for _, alias in ipairs(labelobj.aliases) do
if alias == category then
return true
end
end
return false
end
local function matches_as_prefix(labelobj)
if category:find("^" .. pattern_escape(labelobj.canonical) .. " ") then
return true
end
for _, alias in ipairs(labelobj.aliases) do
if category:find("^" .. pattern_escape(alias) .. " ") then
return true
end
end
return false
end
local a_matches_lang = lang and a.lang:getFullCode() == lang:getFullCode()
local a_matches_exactly = matches_exactly(a)
local b_matches_exactly = matches_exactly(b)
if a_matches_exactly and not b_matches_exactly then
return true
elseif b_matches_exactly and not a_matches_exactly then
return false
elseif a_matches_exactly and b_matches_exactly then
return a_matches_lang
end
local a_matches_as_prefix = matches_as_prefix(a)
local b_matches_as_prefix = matches_as_prefix(b)
if a_matches_as_prefix and not b_matches_as_prefix then
return true
elseif b_matches_as_prefix and not a_matches_as_prefix then
return false
elseif a_matches_as_prefix and b_matches_as_prefix then
return a_matches_lang
end
return a_matches_lang
end
table.sort(sorted_labels, sort_labelobj)
if #sorted_labels > 0 then
return all_labels, sorted_labels
else
return all_labels, nil
end
end
-- Find the categories (only of type `regional_categories` and `plain_categories`) that label `label` categorizes into.
-- Return value is nil if the label couldn't be located at all, otherwise a list of categories (which may be empty).
local function get_categories_for_label(label, lang)
local m_labels = require(labels_module)
local labret = m_labels.get_label_info { label = label, lang = lang }
if not labret then
return nil
end
local categories = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,
{["plain_categories"] = true})
local reg_cats = m_labels.fetch_categories(labret.canonical or label, labret.data, lang, nil, nil,
{["regional_categories"] = true})
if #reg_cats > 0 then
for _, cat in ipairs(reg_cats) do
table.insert(categories, cat)
end
end
return categories
end
local function get_default_parent_cat_from_sorted_labels(sorted_labels, category)
for _, labobj in ipairs(sorted_labels) do
local parent = labobj.labdata.parent
if parent then
if parent == true then
-- use default parent
return nil, labobj
end
local cats = get_categories_for_label(parent, labobj.lang)
if not cats then
error(("Label '%s' for category '%s' (defined in module [[%s]]) specified parent label '%s' but that parent label couldn't be located"):format(
labobj.canonical, category, labobj.module, parent))
end
if #cats > 0 then
return cats[1], labobj
end
-- FIXME: If the parent doesn't specify any categories, should we try the next parent or fall back
-- to the parent determined through get_default_parent_cat_from_category() (which is what we currently
-- do)?
return nil, labobj
end
end
return nil, nil
end
-- To avoid the need to scrape every category, we keep a list of those categories that satisfy the following:
-- (a) They are a dialect category;
-- (b) They occur as the parent category of some other dialect category;
-- (c) They are not the name of a known language (including etymology-only languages) or contain a known language as a
-- suffix.
-- Condition (c) is necessary because we automatically scrape categories that have a language suffix, since they're
-- likely to be dialect categories.
local dialect_parent_cats_to_scrape = m_table.listToSet {
"Assyrian",
"Babylonian",
"Limburgan-Ripuarian transitional dialects",
"North Sea Germanic",
"Ripuarian Franconian",
}
-- Handle dialect categories such as [[:Category:New Zealand English]], [[:Category:Late Middle English]],
-- [[:Category:Arbëresh Albanian]], [[:Category:Provençal]] or arbitrarily-named categories like
-- [[:Category:Issime Walser]]. We currently require that dialect=1 is specified to the call to {{auto cat}} to avoid
-- overfiring. However, if called from inside, we are processing the breadcrumb for the parent (or conceivably the
-- child) of a dialect category, and won't have any params set, so we can't rely on dialect=1. In that case, only fire
-- if the category is or ends in the name of a full or etymology-only language, and scrape the category's call to
-- {{auto cat}} to get the appropriate params. This means that nonstandardly-named categories like
-- [[:Category:Issime Walser]] can't be parents of other dialect categories. To work around this, either we have to
-- relax the code below to operate on all raw categories (not necessarily a good idea), or we rename the
-- nonstandardly-named categories (e.g. in the case above, to [[:Category:Issime Walser German]], since Walser German
-- is a recognized etymology-only language).
--
-- NOTE: We are able to handle categories for etymology-only families (currently only [[:Category:Middle Iranian]] and
-- [[:Category:Old Iranian]]) and for etymology-only substrate languages (e.g. [[:Category:The BMAC substrate]]).
-- There is some special "family" code for the former.
local function dialect_handler(category, raw_args, called_from_inside)
-- Try to figure out if this variety is extinct or reconstructed, if type= not given.
local function determine_lect_type(lang, default_parent_cat)
if category:find("^Proto%-") or lang:getCanonicalName():find("^Proto%-") or lang:hasType("reconstructed") then
-- Is it reconstructed?
return "reconstructed"
end
if lang:getCode():find("^qsb%-") then
-- Substrate.
return "unattested"
end
if lang:hasType("full") then
-- If a full language, scrape the {{auto cat}} call and check for extinct=1.
local parent_args = scrape_category_for_auto_cat_args(lang:getCategoryName())
if parent_args and ine(parent_args.extinct) and require("Module:yesno")(parent_args.extinct, false) then
return "extinct"
end
end
-- Otherwise, call the dialect handler recursively for the parent category. This is correct e.g. for
-- things like subvarieties of Classical Persian, where the lang itself (Persian) isn't extinct but the
-- parent category refers to an extinct variety. If the dialect handler fails to return a type, it's because
-- the parent category doesn't exist or isn't defined using {{auto cat}}, and doesn't have a language as a
-- suffix. In that case, if we're dealing with an etymology-only language, check the parent language. Finally,
-- fall back to returning "extant" if all else fails.
local parent_type
if default_parent_cat then
_, parent_type = dialect_handler(default_parent_cat, nil, true)
end
if parent_type then
return parent_type
end
local parent_lang = lang:getParent()
if parent_lang then
return determine_lect_type(parent_lang, nil)
end
return "extant"
end
if called_from_inside then
-- Avoid infinite loops from wrongly processing non-lect categories. We have a check around line 344 below
-- for categories whose {{auto cat}} doesn't say dialect=1, but we still need the following in case of
-- non-existent categories we're being asked to process (e.g. [[:Category:User bcc]] ->
-- [[:Category:Southern Balochi]] (nonexistent) -> [[:Category:Regional Baluchi]] (nonexistent), which
-- causes an infinite loop without the check below.
if category:find("^Regional ") or category:find("^Varieties of ") or category:find("^Rhymes:") then
return nil
end
-- If called from inside we won't have any params available. See comment above about this. We scrape the
-- category page's call to {{auto cat}} to get the appropriate params, and if that fails, we currently fall back
-- to defaults based on the name of the category. Since the call from inside is only to get the parent category
-- and breadcrumb, these defaults actually work in most cases but not all; e.g. in the chain
-- [[:Category:Regional Yoruba]] -> [[:Category:Central Yoruba]] -> [[:Category:Ekiti Yoruba]] ->
-- [[:Category:Akurẹ Yoruba]], if we are forced to use default values, we will produce the right parent for
-- [[:Category:Central Yoruba]] but not for [[:Category:Ekiti Yoruba]], where the default parent would be
-- [[:Category:Regional Yoruba]] instead of the correct [[:Category:Central Yoruba]].
local lang, breadcrumb = split_region_lang(category)
if lang or dialect_parent_cats_to_scrape[category] then
raw_args = scrape_category_for_auto_cat_args(category)
if raw_args and not ine(raw_args.dialect) then
-- We are scraping something like [[:Category:American Sign Language]] that ends in a valid language but is not
-- a dialect.
return nil
end
if not raw_args then
if not lang then
-- We were instructed to scrape by virtue of `dialect_parent_cats_to_scrape`, but couldn't scrape
-- anything.
return nil
end
-- If we can't parse the scraped {{auto cat}} spec, return default values. This helps e.g. in converting
-- from the old {{dialectboiler}} template and generally when adding new varieties.
track("dialect")
local default_parent_cat
local all_labels, sorted_labels = get_sorted_labels(category, lang)
if sorted_labels then
default_parent_cat = get_default_parent_cat_from_sorted_labels(sorted_labels, category)
end
if not default_parent_cat then
default_parent_cat = get_default_parent_cat_from_category(category, lang)
end
-- NOTE: When called from inside, the description doesn't matter; nor do any parents other than the
-- first. This is because called_from_inside is only set when computing the breadcrumb trail, which
-- only needs the language, first parent and breadcrumb.
return {
-- FIXME, allow etymological codes here
lang = get_returnable_lang(lang),
description = "Foo",
parents = {default_parent_cat},
breadcrumb = breadcrumb or lang:getCanonicalName(),
umbrella = false,
can_be_empty = true,
}, determine_lect_type(lang, default_parent_cat)
end
else
return nil
end
end
if not called_from_inside and not ine(raw_args.dialect) then
return nil
end
-------------------- 1. Process parameters. -------------------
local params = {
[1] = {},
dialect = {type = "boolean"},
lang = {},
verb = {},
prep = {},
def = {},
fulldef = {},
addl = {},
nolink = {type = "boolean"},
noreg = {type = "boolean"}, -- don't make the default parent be "Regional LANG"; instead, "Varieties of LANG"
type = {}, -- "extinct", "extant", "reconstructed", "unattested", "constructed"
cat = {},
othercat = {}, -- comma-separated
country = {}, -- comma-separated
wp = {},
wikidata = {},
breadcrumb = {},
pagename = {}, -- for testing or demonstration
}
local args = require("Module:parameters").process(raw_args, params)
local allowed_type_values = {"extinct", "extant", "reconstructed", "unattested", "constructed"}
if args.type and not m_table.contains(allowed_type_values, args.type) then
error(("Unrecognized value '%s' for type=; should be one of %s"):format(
args.type, table.concat(allowed_type_values, ", ")))
end
-------------------- 2. Determine the breadcrumb. -------------------
-- Also initialize regiondesc from the category name. It may be overridden later.
local lang, breadcrumb, regiondesc, langname
local region
category = args.pagename or category
if not args.lang then
lang, breadcrumb = split_region_lang(category)
if not lang then
error(("lang= not given and unable to parse language from category '%s'"):format(category))
end
langname = lang:getCanonicalName()
regiondesc = breadcrumb
else
lang = m_languages.getByCode(args.lang, "lang", "allow etym")
langname = lang:getCanonicalName()
if category == ucfirst(category) then
-- breadcrumb and regiondesc should stay nil; breadcrumb will get `category` as a default, and the lack of
-- regiondesc will cause an error to be thrown unless the user gave it explicitly or specified def=.
else
breadcrumb = category:match("^(.*) " .. pattern_escape(langname) .. "$")
if not breadcrumb then
-- Try to infer the region from the parent. See comment at function.
breadcrumb = infer_region_from_lang(category, lang)
end
regiondesc = breadcrumb
end
end
-- If no breadcrumb, this often happens when the langname and category are the same (happens only with etym-only
-- languages), and the parent category is set below to the full parent, so the breadcrumb should show the
-- language name (or equivalently, the category). If the langname and category are different, we should fall back to
-- the category. E.g. for Singlish, lang=en is specified and we can't infer a breadcrumb because the dialect name
-- doesn't end in "English"; in this case we want the breadcrumb to show "Singlish".
breadcrumb = args.breadcrumb or breadcrumb or category
-------------------- 3. Initialize `additional` with user-specified additional text. -------------------
local additional = args.addl
local function append_addl(addl_text)
if not addl_text then
return
end
if additional then
additional = additional .. "\n\n" .. addl_text
else
additional = addl_text
end
end
-------------------- 4. Augment `additional` with information about etymology-only codes. -------------------
local parents = {}
local langname_for_desc
local etymcodes = {}
local function make_code(code)
return ("<code>%s</code>"):format(code)
end
if lang:hasType("etymology-only") and ucfirst(langname) == category then
langname_for_desc = lang:getParentName()
local langcode = lang:getCode()
table.insert(etymcodes, make_code(langcode))
-- Find all alias codes for the etymology-only language.
-- FIXME: There should be a better/easier way of doing this.
local ety_code_to_name = mw.loadData("Module:etymology languages/code to canonical name")
for code, canon_name in pairs(ety_code_to_name) do
if canon_name == langname and code ~= langcode then
table.insert(etymcodes, make_code(code))
end
end
local addl_etym_codes = ("[[Module:etymology_languages/data|Etymology-only language]] code: %s."):format(
m_table.serialCommaJoin(etymcodes, {conj = "or"}))
append_addl(addl_etym_codes)
else
langname_for_desc = langname
end
-------------------- 5. Determine labels categorizing into this category. -------------------
-- In the process we also add text to `additional` about these labels.
local all_labels, sorted_labels = get_sorted_labels(category, lang)
if all_labels then
append_addl(m_labels_utilities.format_labels_categorizing(all_labels, nil, full_lang))
end
-------------------- 6. Determine parent categories. -------------------
local default_parent_cat = args.cat
local label_with_parent
local function getprop(prop)
return args[prop] or label_with_parent and label_with_parent.labdata[prop]
end
if not default_parent_cat and sorted_labels then
default_parent_cat, label_with_parent = get_default_parent_cat_from_sorted_labels(sorted_labels, category)
end
if not default_parent_cat then
default_parent_cat = get_default_parent_cat_from_category(category, lang, getprop("noreg"))
end
table.insert(parents, default_parent_cat)
local othercat = getprop("othercat")
if othercat and type(othercat) == "string" then
othercat = split_on_comma(othercat)
end
if othercat then
for _, cat in ipairs(othercat) do
if not cat:find("^Category:") then
cat = "Category:" .. cat
end
table.insert(parents, cat)
end
end
local countries = getprop("country")
if countries and type(countries) == "string" then
countries = split_on_comma(countries)
end
if args[1] then
regiondesc = args[1]
else
local regionprop = getprop("region")
if regionprop then
regiondesc = regionprop
end
end
countries = countries or {regiondesc}
for _, country in ipairs(countries) do
if not country:find("[<=]") then
country = require("Module:links").remove_links(country)
local cat = "Category:Languages of " .. country
local cat_page = mw.title.new(cat)
if cat_page and cat_page.exists then
table.insert(parents, cat)
end
end
end
-- Try to figure out if this variety is extinct or reconstructed, if type= not given.
local lect_type = getprop("type")
if not lect_type then
lect_type = determine_lect_type(lang, default_parent_cat)
end
local function prefix_addl(addl_text)
if additional then
additional = addl_text .. "\n\n" .. additional
else
additional = addl_text
end
end
if lect_type == "extinct" then
prefix_addl("This language variety is [[extinct language|extinct]].")
table.insert(parents, "Category:All extinct languages")
elseif lect_type == "reconstructed" then
prefix_addl("This language variety is [[reconstructed language|reconstructed]].")
table.insert(parents, "Category:Reconstructed languages")
elseif lect_type == "unattested" then
prefix_addl("This language variety is {{w|unattested language|unattested}}.")
table.insert(parents, "Category:Unattested languages")
elseif lect_type == "constructed" then
prefix_addl("This language variety is [[constructed language|constructed]].")
table.insert(parents, "Category:Constructed languages")
end
-------------------- 7. Compute `description`. -------------------
local description
local fulldef = getprop("fulldef")
if fulldef then
description = fulldef .. "."
end
if not description then
local def = getprop("def")
if def then
description = ("Terms or senses in %s."):format(def)
end
end
if not description then
if not regiondesc then
-- We need regiondesc for the description unless def= or fulldef= is given, which overrides the part that needs it.
error(("1= (region) not given and unable to infer region from category '%s' given language name '%s'"):
format(category, langname))
end
local lang_en = m_languages.getByCode("en", true)
local linked_regiondesc = regiondesc
if linked_regiondesc then
if linked_regiondesc:find("<country>") then
if not countries then
error(("Can't specify <country> in region description '%s' when country= not given"):format(linked_regiondesc))
end
-- Link the countries individually before calling serialCommaJoin(), which inserts HTML.
local linked_countries = {}
for _, country in ipairs(countries) do
-- don't try to link if HTML or = sign found in country
if not country:find("[<=]") then
country = require("Module:links").full_link { lang = lang_en, term = country }
end
table.insert(linked_countries, country)
end
linked_countries = m_table.serialCommaJoin(linked_countries)
linked_regiondesc = linked_regiondesc:gsub("<country>", require(pattern_utilities_module).replacement_escape(linked_countries))
elseif not getprop("nolink") and not linked_regiondesc:find("[<=]") then
-- Even if nolink not given, don't try to link if HTML or = sign found in linked_regiondesc, otherwise we're
-- likely to get an error.
linked_regiondesc = require("Module:links").full_link { lang = lang_en, term = linked_regiondesc }
end
end
local verb = getprop("verb") or "spoken"
local prep = getprop("prep")
description = ("Terms or senses in %s as %s%s %s."):format(
langname_for_desc, verb, prep == "-" and "" or " " .. (prep or "in"), linked_regiondesc)
end
-------------------- 8. Compute the Wikipedia articles that go into `topright`. -------------------
local topright_parts = {}
-- Insert Wikipedia article `article` for Wikimedia language `wmcode` into `topright_parts`, avoiding duplication.
local function insert_wikipedia_article(wmcode, article)
m_table.insertIfNot(topright_parts, ("{{wp%s%s}}"):format(
wmcode == "en" and "" or "|lang=" .. wmcode,
article == category and "" or "|" .. article
))
end
local function insert_wikipedia_articles_for_wikipedia_specs(specs, default)
for _, article in ipairs(specs) do
local foreign_wiki
if article == true then
article = default
else
if article:find(":[^ ]") then
local actual_article
foreign_wiki, actual_article = article:match("^([a-z][a-z][a-z-]*):([^ ].*)$")
if actual_article then
article = actual_article
end
end
if article == "+" then
article = default
elseif article == "-" then
article = nil
else
article = require("Module:yesno")(article, article)
if article == true then
article = default
end
end
end
if article then
insert_wikipedia_article(foreign_wiki or "en", article)
end
end
end
local function insert_wikipedia_articles_for_wikidata_specs(specs, lang)
if not mw.wikibase then
error(("Unable to retrieve data from Wikidata ID's '%s'; `mw.wikibase` not defined"):format(args.wikidata))
end
local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang)
local ids_without_wmcodes = {}
local ids_with_wmcodes = {}
for _, id in ipairs(specs) do
if id:find(":") then
table.insert(ids_with_wmcodes, id)
else
table.insert(ids_without_wmcodes, id)
end
end
for _, wmcode in ipairs(wikipedia_langs) do
for _, id in ipairs(ids_without_wmcodes) do
local article = mw.wikibase.sitelink(id, wmcode .. "wiki")
if article then
insert_wikipedia_article(wmcode, article)
end
end
end
for _, id in ipairs(ids_with_wmcodes) do
local wmcode, wikidata_id = id:match("^(.-):(.*)$")
local article = mw.wikibase.sitelink(wikidata_id, wmcode .. "wiki")
if article then
insert_wikipedia_article(wmcode, article)
end
end
end
if args.wp or args.wikidata then
if args.wp then
insert_wikipedia_articles_for_wikipedia_specs(split_on_comma(args.wp), category)
end
if args.wikidata then
insert_wikipedia_articles_for_wikidata_specs(rsplit(args.wikidata, "%s*,%s*"), lang)
end
elseif pagename == ucfirst(langname) then
local topright_parts = {}
local wikipedia_langs = require(labels_module).get_langs_to_extract_wikipedia_articles_from_wikidata(lang)
for _, wmcode in ipairs(wikipedia_langs) do
local article = lang:getWikipediaArticle("no category fallback", wmcode .. "wiki")
if article then
insert_wikipedia_article(wmcode, article)
end
end
end
if #topright_parts == 0 and sorted_labels then
for _, labobj in pairs(all_labels) do
local wp_specs = labobj.labdata.Wikipedia
if wp_specs then
if type(wp_specs) ~= "table" then
wp_specs = {wp_specs}
end
insert_wikipedia_articles_for_wikipedia_specs(wp_specs, labobj.canonical)
end
local wikidata_specs = labobj.labdata.Wikidata
if wikidata_specs then
if type(wikidata_specs) ~= "table" then
wikidata_specs = {wikidata_specs}
end
insert_wikipedia_articles_for_wikidata_specs(wikidata_specs, labobj.lang)
end
end
end
local topright
if #topright_parts > 0 then
topright = table.concat(topright_parts)
end
-------------------- 9. Return the combined structure of all information. -------------------
track("dialect")
return {
-- FIXME, allow etymological codes here
lang = get_returnable_lang(lang),
topright = topright,
description = description,
additional = additional,
parents = parents,
breadcrumb = {name = breadcrumb, nocap = true},
umbrella = false,
can_be_empty = true,
}, lect_type
end
-- Actual handler for dialect categories. See dialect_handler() above.
table.insert(raw_handlers, function(data)
local settings, _ = dialect_handler(data.category, data.args, data.called_from_inside)
return settings, not not settings
end)
return {RAW_CATEGORIES = raw_categories, RAW_HANDLERS = raw_handlers}