Jump to content

Module:category tree/poscatboiler/data/terms by lexical property

From Wiktionary, the free dictionary

This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local labels = {}
local raw_categories = {}
local handlers = {}

local insert = table.insert
local ucfirst = require("Module:string utilities").ucfirst

-----------------------------------------------------------------------------
--                                                                         --
--                                  LABELS                                 --
--                                                                         --
-----------------------------------------------------------------------------


labels["terms by lexical property"] = {
	description = "{{{langname}}} terms categorized by properties relating to spelling, pronunciation or meaning.",
	umbrella_parents = "Fundamental",
	parents = {{name = "{{{langcat}}}", raw = true}}
}

labels["autohyponyms"] = {
	description = "{{{langname}}} terms that have at least two meanings, one of which is a hyponym of the other.",
	parents = {"terms by lexical property"},
}

labels["contranyms"] = {
	description = "{{{langname}}} terms that have two opposite meanings.",
	parents = {"terms by lexical property"},
}

labels["double negatives"] = {
	description = "{{{langname}}} terms which include a [[double negative]], either etymologically or in a definition.",
	parents = {"terms by lexical property"},
}

labels["heteronyms"] = {
	description = "{{{langname}}} terms that have different meanings depending on their etymology and/or on how they are pronounced.",
	parents = {"terms by lexical property"},
}

labels["nuqtaless forms"] = {
	description = "{{{langname}}} terms that are spelled without a [[nuqta]].",
	parents = {"terms by orthographic property"},
}

labels["unhamzated forms"] = {
	description = "{{{langname}}} terms that are spelled without a [[hamza]].",
	parents = {"terms by orthographic property"},
}

labels["palindromes"] = {
	description = "{{{langname}}} terms whose characters are read equally both from left to right and vice versa, normally ignoring spaces, [[diacritic]]s and punctuation.",
	parents = {"terms by their sequences of characters"},
}

labels["pleonastic compounds"] = {
	description = "{{{langname}}} compound terms where the head is a hyponym of its other part and whose other part is its synonym.",
	parents = {"terms by lexical property", "compound terms"},
}

labels["pleonastic compound adjectives"] = {
	description = "{{{langname}}} compound adjectives where the head is a hyponym of its other part and whose other part is its synonym.",
	parents = {"pleonastic compounds", "compound adjectives"},
}

labels["pleonastic compound nouns"] = {
	description = "{{{langname}}} compound nouns where the head is a hyponym of its other part and where the head is the synonym for the whole.",
	parents = {"pleonastic compounds", "compound nouns"},
}

labels["pronunciation spellings"] = {
	description = "{{{langname}}} terms spelled to represent a pronunciation, often a nonstandard one.",
	parents = {"terms by orthographic property"},
}

labels["tautophrases"] = {
	description = "{{{langname}}} phrases that repeat the same idea or concept using the same words.",
	parents = {"terms by lexical property"},
}

labels["terms by orthographic property"] = {
	description = "{{{langname}}} terms categorized by properties relating to [[orthography]] or [[spelling]].",
	parents = {"terms by lexical property"},
}

labels["calculator words"] = {
	description = "{{{langname}}} terms that can be spelled on a [[seven-segment]] display, as found on pocket calculators, by turning numbers upside-down.",
	parents = {"terms by orthographic property"},
}

labels["words by number of letters"] = {
	description = "{{{langname}}} words categorized by number of letters.",
	parents = {"terms by orthographic property"},
}

labels["abbreviations by number of letters"] = {
	description = "{{{langname}}} abbreviations categorized by number of letters.",
	parents = {"terms by orthographic property"},
}

labels["roots by number of letters"] = {
	description = "{{{langname}}} {{lg|roots}} categorized by number of letters.",
	breadcrumb = "by number of letters",
	parents = {
		{name = "roots", sort = "number of letters"},
		"terms by orthographic property"
	},
}

labels["roots by shape"] = {
	description = "{{{langname}}} {{lg|roots}} categorized by their shape.",
	breadcrumb = "by shape",
	parents = {
		{name = "roots", sort = "shape"},
		"terms by lexical property"
	},
}

labels["words by number of syllables"] = {
	description = "{{{langname}}} words categorized by number of syllables.",
	parents = {"terms by phonemic property"},
}

labels["terms by their individual characters"] = {
	description = "{{{langname}}} terms categorized by whether they include certain individual characters.",
	parents = {"terms by orthographic property"},
}

labels["terms by their sequences of characters"] = {
	description = "{{{langname}}} terms categorized by whether they include certain sequences of characters.",
	parents = {"terms by orthographic property"},
}

labels["terms with consecutive instances of the same letter"] = {
	description = "{{{langname}}} words categorized by the number of consecutive instances of the same letter they contain.",
	parents = {"terms by orthographic property"},
}

labels["terms containing italics"] = {
	description = "{{{langname}}} terms containing [[italics]].",
	parents = {"terms by orthographic property"},
}

labels["terms containing Roman numerals"] = {
	description = "{{{langname}}} terms containing [[Roman numeral]]s.",
	parents = {"terms by orthographic property", "terms spelled with numbers"},
}

labels["terms with mixed convergence"] = {
	description = "{{{langname}}} terms where the spelling represents a variant pronunciation that differs from (one of) the current standard pronunciation(s).",
	parents = {"terms by orthographic property", "terms by phonemic property"},
}

labels["terms with homophones"] = {
	description = "{{{langname}}} terms that have one or more [[homophones]]: other terms that are pronounced in the same way but spelled differently.",
	parents = {"terms by lexical property"},
}

labels["terms with honorific transposition"] = {
	description = "{{{langname}}} compound terms where the written order of the elements is reversed due to [[honorific transposition]].",
	parents = {"terms by orthographic property"},
}

labels["terms where the adjective follows the noun"] = {
	description = "{{{langname}}} terms where the adjective follows the noun. These adjectives within these terms are sometimes referred to as postpositive or postnominal adjectives.",
	parents = {"terms by lexical property"},
}

labels["terms written in foreign scripts"] = {
	description = "{{{langname}}} terms that are written in a different, non-native script.",
	parents = {"terms by orthographic property"},
}

labels["terms written in multiple scripts"] = {
	description = "{{{langname}}} terms that are written using more than one script.",
	parents = {"terms by orthographic property"},
}

labels["terms by phonemic property"] = {
	description = "{{{langname}}} terms categorized by properties relating to [[pronunciation]] and [[phonemics]].",
	parents = {"terms by lexical property"},
}


-- Add 'umbrella_parents' key if not already present.
for _, data in pairs(labels) do
	if not data.umbrella_parents then
		data.umbrella_parents = "Terms by lexical property subcategories by language"
	end
end


-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["Terms by lexical property subcategories by language"] = {
	breadcrumb = "subcategories",
	description = "Umbrella categories covering topics related to terms categorized by their lexical properties, such as palindromes and number of letters or syllables in a word.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		{name = "terms by lexical property", is_label = true, sort = " "},
		"Umbrella metacategories",
	},
}

for _, v in ipairs{
	{"words", "letters"},
	{"abbreviations", "letters"},
	{"roots", "letters"},
	{"words", "syllables"}
} do
	raw_categories[("%s by number of %s subcategories by language"):format(ucfirst(v[1]), v[2])] = {
		breadcrumb = "subcategories",
		description = ("Umbrella categories covering topics related to %s categorized by their number of %s."):format(v[1], v[2]),
		additional = "{{{umbrella_meta_msg}}}",
		parents = {
			{name = ("%s by number of %s"):format(v[1], v[2]), is_label = true, sort = " "},
			"Umbrella metacategories",
		},
	}
end

raw_categories["Terms with consecutive instances of the same letter subcategories by language"] = {
	breadcrumb = "subcategories",
	description = "Umbrella categories covering topics related to terms categorized by the number of consecutive instances of the same letter they contain.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		{name = "terms with consecutive instances of the same letter", is_label = true, sort = " "},
		"Umbrella metacategories",
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                 HANDLERS                                --
--                                                                         --
-----------------------------------------------------------------------------

for _, v in ipairs{
	{"words", "letter", "composed of", function(n)
		local pl = n == "1" and "" or "s"
		return ("They have meaning(s) other than their letter%s or the shape of their letter%s which are not abbreviations, numbers or symbols."):format(pl, pl)
	end},
	{"abbreviations", "letter", "composed of"},
	{"roots", "letter", "composed of"},
	{"words", "syllable", "pronounced with"}
} do
	insert(handlers, function(data)
		local n = data.label:match(("^([1-9]%%d*)%%-%s %s$"):format(v[2], v[1]))
		if not n then
			return
		end
		local sortkey = require("Module:category tree").numeral_sortkey(n, 2097152)
		return {
			breadcrumb = n,
			description = ("{{{langname}}} %s that are %s %d %s%s."):format(v[1], v[3], n, v[2], n == "1" and "" or "s"),
			additional = v[4] and v[4](n) or nil,
			umbrella = {
				breadcrumb = ("%d %s"):format(n, v[2]),
				parents = {{name = ("%s by number of %ss subcategories by language"):format(ucfirst(v[1]), v[2]), sort = sortkey}},
			},
			parents = {{name = ("%s by number of %ss"):format(v[1], v[2]), sort = sortkey}}
		}
	end)
end

insert(handlers, function(data)
	local n = data.label:match("^terms with ([1-9]%d*) consecutive instances of the same letter$")
	if not (n and tonumber(n) > 2) then
		return
	end
	local sortkey = require("Module:category tree").numeral_sortkey(n, 2097152)
	return {
		breadcrumb = n,
		description = ("{{{langname}}} terms containing %d consecutive instances of the same letter."):format(n),
		umbrella = {
			breadcrumb = ("%d consecutive instances"):format(n),
			parents = {{name = "Terms with consecutive instances of the same letter subcategories by language", sort = sortkey}},
		},
		parents = {{name = "terms with consecutive instances of the same letter", sort = sortkey}}
	}
end)


return {LABELS = labels, RAW_CATEGORIES = raw_categories, HANDLERS = handlers}