Module:category tree/poscatboiler/data/lang-specific/uk

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module handles generating the descriptions and categorization for Ukrainian category pages of the format "Ukrainian LABEL" where LABEL can be any text. Examples are Category:Bulgarian conjugation 2.1 verbs and Category:Russian velar-stem neuter-form nouns. This module is part of the poscatboiler system, which is a general framework for generating the descriptions and categorization of category pages.

For more information, see Module:category tree/poscatboiler/data/lang-specific/documentation.

NOTE: If you add a new language-specific module, you must add the language code to the list at the top of Module:category tree/poscatboiler/data/lang-specific in order for the module to be recognized.


local labels = {}
local handlers = {}

local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


--------------------------------- Verbs --------------------------------

labels["verbs by class"] = {
	description = "Ukrainian verbs categorized by class.",
	parents = {{name = "verbs by inflection type", sort = "class"}},
}

labels["verbs by class and accent pattern"] = {
	description = "Ukrainian verbs categorized by class and accent pattern.",
	parents = {{name = "verbs by inflection type", sort = "class and accent pattern"}},
}

table.insert(handlers, function(data)
	local cls, variant, pattern = rmatch(data.label, "^class ([0-9]*)([()%[%]°]*)([abc]?) verbs$")
	if cls then
		if pattern == "" then
			return {
				description = "Ukrainian class " .. cls .. " verbs.",
				breadcrumb = cls,
				parents = {{name = "verbs by class", sort = cls .. variant}},
			}
		else
			return {
				description = "Ukrainian class " .. cls .. " verbs of " ..
					"accent pattern " .. pattern .. (
					variant == "" and "" or " and variant " .. variant) .. ". " .. (
					pattern == "a" and "With this pattern, all forms are stem-stressed."
					or pattern == "b" and "With this pattern, all forms are ending-stressed."
					or "With this pattern, the first singular present indicative and all forms " ..
					"outside of the present indicative are ending-stressed, while the remaining " ..
					"forms of the present indicative are stem-stressed.").. (
					variant == "" and "" or
					cls == "3" and variant == "°" and " The variant code indicates that the -н of the stem " ..
					"is missing in most non-present-tense forms." or
					cls == "3" and (variant == "(°)" or variant == "[°]") and
					" The variant code indicates that the -н of the stem " ..
					"is optionally missing in most non-present-tense forms." or
					cls == "6" and variant == "°" and
					" The variant code indicates that the present tense is not " ..
					"[[Appendix:Glossary#iotation|iotated]]. (In most verbs of this class, " ..
					"the present tense is iotated, e.g. писа́ти with present tense " ..
					"пишу́, пи́шеш, пи́ше, etc.)"
				),
				breadcrumb = cls .. variant .. pattern,
				parents = {
					{name = "class " .. cls .. " verbs", sort = pattern},
					{name = "verbs by class and accent pattern", sort = cls .. pattern},
				},
			}
		end
	end
end)


--------------------------------- Adjectives --------------------------------

labels["adjectives by stem type and stress"] = {
	description = "Ukrainian adjectives categorized by stem type and stress. " ..
		"Unlike for nouns, adjectives are consistently either stem-stressed or ending-stressed.",
	parents = {{name = "adjectives by inflection type", sort = "stem type and stress"}},
}


local adj_stem_expl = {
	["ц-stem"] = "-ц",
	["vowel-stem"] = "a vowel, or -й or -ь",
	["soft-stem"] = "a soft consonant",
	["hard-stem"] = "a hard consonant",
	["possessive"] = "-ов, -єв, -ин or -їн",
}

local adj_decl_endings = {
	["hard stem-stressed"] = {"-ий", "-а", "-е", "-і"},
	["hard ending-stressed"] = {"-и́й", "-а́", "-е́", "-і́"},
	["soft"] = {"-ій", "-я", "-є", "-і"},
	["c-stem"] = {"-ий", "-я", "-е", "-і"},
	["j-stem"] = {"-їй", "-я", "-є", "-ї"},
	["possessive"] = {"-", "-а", "-е", "-і"},
	["surname"] = {"-", "-а", "(nil)", "-и"},
}

table.insert(handlers, function(data)
	-- FIXME! Harmonize stem types here with stem types in 'adj_decl_endings' (used
	-- directly in adjectival nouns).
	local function stem_to_ending_type(stem, stress)
		if stem == "hard-stem" and stress == "stem" then
			return "hard stem-stressed"
		elseif stem == "hard-stem" and stress == "ending" then
			return "hard ending-stressed"
		elseif stress == "" then
			if stem == "soft-stem" then
				return "soft"
			elseif stem == "ц-stem" then
				return "c-stem"
			elseif stem == "vowel-stem" then
				return "j-stem"
			elseif stem == "possessive" then
				return "possessive"
			end
		end
		return nil
	end

	local breadcrumb, stem, stress = rmatch(data.label, "^(([^ ]*) ([^ *]*)-stressed) adjectives")
	if not breadcrumb then
		breadcrumb, stem = rmatch(data.label, "^(([^ ]*)) adjectives")
		stress = ""
	end
	if breadcrumb then
		local ending_type = stem_to_ending_type(stem, stress)
		if ending_type and adj_stem_expl[stem] then
			local m, f, n, p = unpack(adj_decl_endings[ending_type])
			local stresstext = stress == "stem" and
				"The adjectives in this category have stress on the stem." or
				stress == "ending" and
				"The adjectives in this category have stress on the endings." or
				"All adjectives of this type have stress on the stem."
			local endingtext = "ending in the nominative in masculine singular " .. m ..
				", feminine singular " .. f .. ", neuter singular " .. p .. " and plural " ..
				p .. "."
			local stemtext = " The stem ends in " .. adj_stem_expl[stem] .. "."
			return {
				description = "Ukrainian " .. stem .. " adjectives, " .. endingtext .. stemtext .. " " .. stresstext,
				breadcrumb = breadcrumb,
				parents = {"adjectives by stem type and stress"},
			}
		end
	end
end)


--------------------------------- Nouns/Pronouns/Numerals --------------------------------

for _, pos in ipairs({"nouns", "pronouns", "numerals"}) do
	local sgpos = pos:gsub("s$", "")
	
	local function make_label(label, description, parents, breadcrumb)
		labels[pos .. " " .. label] = {
			description = "Ukrainian " .. pos .. " " .. description,
			breadcrumb = breadcrumb or label,
			parents = parents,
		}
	end

	make_label("by stem type and gender",
		"categorized by stem type and typical gender. " ..
			"Note that \"typical gender\" means the gender that is typical for the " .. sgpos .. "'s ending (e.g. most " .. pos .. " in ''-а'' are " ..
			"feminine, and hence all such " .. pos .. " are considered to be \"typically feminine\"; but some are in fact masculine).",
		{{name = pos .. " by inflection type", sort = "stem type and gender"}}
	)

	make_label("by stem type, gender and accent pattern",
		"categorized by stem type, typical gender and " ..
			"accent pattern. Note that \"typical gender\" means the gender that is typical for the " .. sgpos .. "'s ending (e.g. most " ..
			pos .. " in ''-а'' are feminine, and hence all such " .. pos .. " are considered to be \"typically feminine\"; but some are in " ..
			"fact masculine). See [[Template:uk-ndecl]] for further information on accent patterns.",
		{{name = pos .. " by inflection type", sort = "stem type, gender and accent pattern"}}
	)

	make_label("by vowel alternation",
		"categorized according to their vowel alternation pattern (e.g. ''і'' vs. ''о'').",
		{{name = pos, sort = "vowel alternation"}}
	)

	make_label("by accent pattern",
		"categorized according to their accent pattern (see [[Template:uk-ndecl]]).",
		{{name = pos .. " by inflection type", sort = "accent pattern"}}
	)

	make_label("with reducible stem",
		"with a reducible stem, where an extra vowel is inserted " ..
			"before the last stem consonant in the nominative singular and/or genitive plural.",
		{{name = pos .. " by inflection type", sort = "reducible stem"}}
	)

	make_label("with multiple stems",
		"with multiple stems.",
		{{name = pos .. " by inflection type", sort = "multiple stems"}}
	)

	make_label("with multiple accent patterns",
		"with multiple accent patterns. See [[Template:uk-ndecl]].",
		{{name = pos .. " by inflection type", sort = "multiple accent patterns"}}
	)

	labels["adjectival " .. pos] = {
		description = "Ukrainian " .. pos .. " with adjectival endings.",
		parents = {pos},
	}

	make_label("with irregular stem",
		"with an irregular stem, which occurs in all cases except the nominative singular and maybe the accusative singular.",
		{{name = "irregular " .. pos, sort = "stem"}}
	)

	make_label("with irregular plural stem",
		"with an irregular plural stem, which occurs in all cases.",
		{{name = "irregular " .. pos, sort = "plural stem"}}
	)
end

local noun_stem_expl = {
	["hard"] = "a hard consonant",
	["velar-stem"] = "a velar (-к, -г or –x)",
	["semisoft"] = "a hushing consonant (-ш, -ж, -ч or -щ)",
	["soft"] = "a soft consonant",
	["c-stem"] = "-ц",
	["j-stem"] = "conceptual -й",
	["n-stem"] = "-м' (with -ен- in some forms)",
	["t-stem"] = "-я or -а (with -т- in most forms)",
	["possessive"] = "-ов, -єв, -ин or -їн",
	["surname"] = "-ов, -ів, -їв, -єв, -ин, -ін or -їн",
}

local noun_stem_to_declension = {
	["third-declension"] = "third",
	["t-stem"] = "fourth",
	["n-stem"] = "fourth",
}

local noun_stem_gender_endings = {
    masculine = {
		["hard"]              = {"a hard consonant", "-и"},
		["velar-stem"]        = {"a velar", "-и"},
		["semisoft"]          = {"a hushing consonant or -р", "-і"},
		["soft"]              = {"-ь or -р", "-і"},
		["j-stem"]            = {"-й", "-ї"},
		["hard-о"]            = {"-о", "-и or occasionally -а"},
		["velar-stem-о"]      = {"-о", "-и or occasionally -а"},
		["soft-о"]            = {"-ьо", "-і"},
		["semisoft-о"]        = {"-о", "-и"},
		["semisoft-е"]        = {"-е", "-а"},
	},
    feminine = {
		["hard"]              = {"-а", "-и"},
		["semisoft"]          = {"-а", "-і"},
		["soft"]              = {"-я", "-і"},
		["j-stem"]            = {"-я", "-ї"},
		["third-declension"]  = {"-ь, -р, a labial, or a hushing consonant", "-і"},
		["semisoft-е"]        = {"-е", "-і"},
	},
    neuter = {
		["hard"]              = {"-о", "-а"},
		["velar-stem"]        = {"-о", "-а"},
		["semisoft"]          = {"-е", "-а"},
		["soft"]              = {"-е", "-я"},
		["j-stem"]            = {"-є", "-я"},
		["soft-я"]            = {"-я", "-я"},
		["n-stem"]            = {"-я", "-я"},
		["t-stem"]            = {"-я or -а", "-та"},
	},
}

table.insert(handlers, function(data)
	local function escape_accent(accent)
		return rsub(accent, "'", "'")
	end

	local in_ending = "in %-([оея])"

	local function get_stem_gender_text(stem, genderspec, pos)
		local gender = genderspec
		local ending = rmatch(gender, in_ending .. "$")
		local stemindex = stem
		if ending then
			gender = rsub(gender, " " .. in_ending .. "$", "")
			stemindex = stemindex .. "-" .. ending
		end
		if not noun_stem_gender_endings[gender] then
			return nil
		end
		local endings = noun_stem_gender_endings[gender][stemindex]
		if not endings then
			return nil
		end
		local sgending, plending = endings[1], endings[2]
		local stemtext = noun_stem_expl[stem] and " The stem ends in " .. noun_stem_expl[stem] .. "." or ""
		local decltext =
			rfind(stem, "declension") and "" or
			" This is traditionally considered to belong to the " .. (
				noun_stem_to_declension[stem] or gender == "feminine" and "first" or "second"
			) .. " declension."
		local genderdesc
		if rfind(genderspec, in_ending .. "$") then
			genderdesc = gender .. " " .. pos .. "s"
		else
			genderdesc = "usually " .. gender .. " " .. pos .. "s"
		end
		return stem .. ", " .. genderdesc .. ", normally ending in " .. sgending .. " in the nominative singular " ..
			" and " .. plending .. " in the nominative plural." .. stemtext .. decltext
	end

	local stem, gender, accent, pos = rmatch(data.label, "^(.*) (.-) adjectival accent%-(.-) (.*)s$")
	if not stem then
		stem, gender, pos = rmatch(data.label, "^(.*) (.-) adjectival (.*)s$")
	end
	if stem and noun_stem_expl[stem] then
		local stemspec
		if stem == "hard" then
			stemspec = accent == "a" and "hard stem-stressed" or "hard ending-stressed"
		else
			stemspec = stem
		end
		local endings = adj_decl_endings[stemspec]
		if endings then
			local stemtext = " The stem ends in " .. noun_stem_expl[stem] .. "."
			local accentdesc = accent == "a" and
				"This " .. pos .. " is stressed according to accent pattern a (stress on the stem)." or
				accent == "b" and
				"This " .. pos .. " is stressed according to accent pattern b (stress on the ending)." or
				"All " .. pos .. "s of this class are stressed according to accent pattern a (stress on the stem)."
			local accenttext = accent and " accent-" .. accent or ""
			local m, f, n, pl = unpack(endings)
			local sg =
				gender == "masculine" and m or
				gender == "feminine" and f or
				gender == "neuter" and n or
				nil
			return {
				description = "Ukrainian " .. stem .. " " .. gender .. " " .. pos ..
				"s, with adjectival endings, ending in " .. (sg and sg .. " in the nominative singular and " or "") ..
				pl .. " in the nominative plural." .. stemtext .. " " .. accentdesc,
				breadcrumb = stem .. " " .. gender .. accenttext,
				parents = {
					{name = "adjectival " .. pos .. "s", sort = stem .. " " .. gender .. accenttext},
					pos .. "s by stem type, gender and accent pattern",
				}
			}
		end
	end

	local part1, stem, gender, accent, part2, pos = rmatch(data.label, "^((.-) (.-)%-form) accent%-(.-)( (.*)s)$")
	local ending
	if not stem then
		-- check for e.g. 'Ukrainian hard masculine accent-a nouns in -о'
		part1, stem, gender, accent, part2, pos, ending = rmatch(data.label, "^((.-) ([a-z]+ine)) accent%-(.-)( (.*)s " .. in_ending .. ")$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if not stem then
		-- check for e.g. 'Ukrainian soft neuter accent-a nouns in -я'
		part1, stem, gender, accent, part2, pos, ending = rmatch(data.label, "^((.-) (neuter)) accent%-(.-)( (.*)s " .. in_ending .. ")$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if stem then
		local stem_gender_text = get_stem_gender_text(stem, gender, pos)
		if stem_gender_text then
			local accent_text = " This " .. pos .. " is stressed according to accent pattern " ..
				escape_accent(accent) .. " (see [[Template:uk-ndecl]])."
			return {
				description = "Ukrainian " .. stem_gender_text .. accent_text,
				breadcrumb = "Accent-" .. escape_accent(accent),
				parents = {
					{name = part1 .. part2, sort = accent},
					pos .. "s by stem type, gender and accent pattern",
				}
			}
		end
	end

	local stem, gender, pos = rmatch(data.label, "^(.-) (.-)%-form (.*)s$")
	if not stem then
		-- check for e.g. 'Ukrainian hard masculine nouns in -о'
		stem, gender, pos, ending = rmatch(data.label, "^(.-) ([a-z]+ine) (.*)s " .. in_ending .. "$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if not stem then
		-- check for e.g. 'Ukrainian soft neuter nouns in -я'
		stem, gender, pos, ending = rmatch(data.label, "^(.-) (neuter) (.*)s " .. in_ending .. "$")
		if gender then
			gender = gender .. " in -" .. ending
		end
	end
	if stem then
		local stem_gender_text = get_stem_gender_text(stem, gender, pos)
		if stem_gender_text then
			return {
				description = "Ukrainian " .. stem_gender_text,
				breadcrumb = ending and stem .. " " .. gender or stem .. " " .. gender .. "-form",
				parents = {pos .. "s by stem type and gender"},
			}
		end
	end

	local pos, accent = rmatch(data.label, "^(.*)s with accent pattern (.*)$")
	if accent then
		return {
			description = "Ukrainian " .. pos .. "s with accent pattern " .. escape_accent(accent) ..
				" (see [[Template:uk-ndecl]]).",
			breadcrumb = {name = escape_accent(accent), nocap = true},
			parents = {{name = pos .. "s by accent pattern", sort = accent}},
		}
	end

	local pos, fromto, altfrom, altto = rmatch(data.label, "^(.*)s with ((.*)%-(.*)) alternation$")
	if altfrom then
		return {
			description = "Ukrainian " .. pos .. "s with vowel alternation between " .. altfrom ..
				" in the lemma and " .. altto .. " in the last syllable of some or all remaining forms.",
			breadcrumb = {name = fromto, nocap = true},
			parents = {{name = pos .. "s by vowel alternation", sort = fromto}},
		}
	end
end)


return {LABELS = labels, HANDLERS = handlers}