Module:ru-headword
Jump to navigation
Jump to search
- The following documentation is located at Module:ru-headword/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module is used by many of the Russian headword-line templates: {{ru-noun}}
, {{ru-proper noun}}
, {{ru-proper noun+}}
, {{ru-adj}}
, {{ru-verb}}
, {{ru-verb-cform}}
, {{ru-adv}}
.
--[=[
This module implements the headword templates {{ru-noun}}, {{ru-adj}},
{{ru-adv}}, {{ru-noun+}}, etc. The main entry point is show(), which is
meant to be called from one of the above templates. However, {{ru-noun+}}
uses the entry point noun_plus(), and {{ru-noun-m}} (not currently used)
uses the entry point noun_multi(). When calling show(), the first parameter
of the #invoke call is the part of speech. Other parameters are taken from
the parent template call.
The implementations for different types of headwords (different parts of
speech) are set in pos_functions[POS] for a given POS (part of speech).
The value is a 2-argument function of (ARGS, DATA):
-- ARGS on entry is initialized to the parent template call's arguments,
with blank arguments converted to nil.
-- DATA on entry is initialized to a table, with entries like this:
local data = {lang = lang, pos_category = poscat, categories = {}, heads = {}, translits = {}, genders = {}, inflections = {}}
where:
-- LANG is an object describing the language.
-- POS_CATEGORY is the (plural) part of speech, e.g. "nouns" or "verbs".
-- CATEGORIES on entry is a list of categories. There will be one category
corresponding to the part of speech (e.g. [[Category:Russian adverbs]]),
and possibly additional categories such as [[Category:Requests for accents in Russian entries]]
and [[Category:Russian terms with irregular pronunciations]]. On exit
it may contain additional categories to place the page in.
-- HEADS on entry is a list of the headwords, taken directly from arguments
'1', 'head2', 'head3', ...
-- TRANSLITS on entry is a list of translits, matching one-to-one with
heads in HEADS, or nil if no manual translit was specified.
-- GENDERS on entry is an empty list. On exit it should be the appropriate
gender settings, and will be passed directly to full_headword() in
[[Module:headword]]. See the documentation for that module for info on
the format of this setting.
-- INFLECTIONS on entry is an empty list. On exit it should be the
appropriate inflections to be displayed in the headword, and will be
passed directly to full_headword() in [[Module:headword]]. See the
documentation for that module for info on the format of this setting.
]=]--
local com = require("Module:ru-common")
local m_links = require("Module:links")
local m_headword = require("Module:headword")
local m_str_utils = require("Module:string utilities")
local m_utilities = require("Module:utilities")
local m_table = require("Module:table")
local m_table_tools = require("Module:table tools")
local m_debug = require("Module:debug")
local export = {}
local pos_functions = {}
local lang = require("Module:languages").getByCode("ru")
local IRREGMARKER = "△"
local HYPMARKER = "⟐"
local latin_text_class = "[a-zščžěáéíóúýàèìòùỳâêîôûŷạẹịọụỵȧėȯẏ]"
-- Forward references
local do_noun
local u = m_str_utils.char
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local ulower = m_str_utils.lower
local AC = u(0x0301) -- acute = ́
local function ine(x) return x ~= "" and x; end
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function glossary_link(entry, text)
text = text or entry
return "[[Appendix:Glossary#" .. entry .. "|" .. text .. "]]"
end
local function track(page)
m_debug.track("ru-headword/" .. page)
return true
end
local function insert_if_not(list, item)
return m_table.insertIfNot(list, item, nil, "deep compare")
end
-- Clone args while also assigning nil to empty strings.
local function clone_args(in_args)
local args = {}
for pname, param in pairs(in_args) do
if param == "" then args[pname] = nil
else args[pname] = param
end
end
return args
end
local function make_qualifier_text(text)
return require("Module:qualifier").format_qualifier(text)
end
-- Split a list of "RUSSIAN" or "RUSSIAN/TRANSLIT" strings into a list of {RUSSIAN, TRANSLIT} objects.
local function split_list_into_russian_tr(list)
local splitlist = {}
for i, item in ipairs(list) do
table.insert(splitlist, com.split_russian_tr(item, "dopair"))
end
return splitlist
end
-- Convert {RUSSIAN, TR} in `form` into an "inflection object" of the form needed for one of the inflection parts in
-- the inflections passed to [[Module:headword]]. The format of this object is as follows:
-- {term = "TERM", translit = "TRANSLIT", face = "FACE", accel = ACCELERATOR_OBJECT} where
-- ACCELERATOR_OBJECT is
-- {form = "FORM USED IN {{inflection of}} OR SIMILAR", lemma = "TERM" or LIST, lemma_translit = "TRANSLIT" or LIST,
-- target = "|head= USED IN {{head}} OR SIMILAR", translit = "|tr= USED IN {{head}} OR SIMILAR"}
-- Normally, `target` in the accelerator object is handled automatically and taken from the displayed text of the link,
-- but this doesn't work in comparative forms, where the form reads e.g. "([[покраснее|по]])[[краснее|красне́е]]" but we
-- want the target to be just красне́е. So we always specify the target and translit, but default it to the form and its
-- translit unless the `target` parameter is passed in. Note also that we don't specify translit="TRANSLIT" in the
-- outer (inflection) object because then the translit will be displayed in the headword inflection.
--
-- `data` is used to fetch the values of `lemma` and `lemma_translit` in the accelerator object and to add a "Requests
-- for accents" category if the form is missing accents. (FIXME: Consider throwing an error instead.) `pos` is the
-- part of speech of the lemma and is used for naming the "Requests for accents" category. `accel_form` goes in the
-- accelerator object; if nil, no accelerator object is specified. `accel_pos` is the part of speech of the inflection,
-- if different from the lemma, and goes in the accelerator object. `target` is used to populate the `target` and
-- `translit` fields in the accelerator object and is the form used to check for missing accents; in both cases it
-- defaults to `form` if omitted.
local function russian_tr_to_inflection_obj(data, form, pos, accel_form, accel_pos, target)
local ru, tr
if type(form) == "string" then
ru, tr = com.split_russian_tr(form)
else
ru, tr = unpack(form)
end
local sawhyp_ru, sawhyp_tr
ru, sawhyp_ru = rsubb(ru, HYPMARKER, "")
if tr then
tr, sawhyp_tr = rsubb(tr, HYPMARKER, "")
end
local accel
local target_ru, target_tr
if target then
target_ru, target_tr = unpack(target)
else
target_ru, target_tr = ru, tr
end
if accel_form then
-- FIXME, consider removing redundant translit
-- Stuff in data.heads and data.translits gets destructively modified by [[Module:headword]] (YUCK), so clone it.
accel = {form = accel_form, lemma = m_table.deepcopy(data.heads),
lemma_translit = m_table.deepcopy(data.translits), pos = accel_pos, target = target_ru, translit = target_tr
}
end
local obj = {term=ru, face=(sawhyp_ru or sawhyp_tr) and "hypothetical" or nil, accel=accel}
--Uncomment to see the manual translit for each inflected part.
--local obj = {term=ru, translit=tr, face=(sawhyp_ru or sawhyp_tr) and "hypothetical" or nil, accel=accel}
if com.needs_accents(m_links.remove_links(target_ru)) then
table.insert(data.categories, "Requests for accents in Russian " .. pos .. " entries")
end
return obj
end
-- Add a full inflection (e.g. genitive singular of nouns, abstract noun of adjectives) to `data.inflections`. `label`
-- is the label of the inflection (e.g. "abstract noun"). `forms` is a list of {RUSSIAN, TRANSLIT} objects specifying
-- the inflections, or a list of "RUSSIAN//TRANSLIT" strings. `pos` is the part of speech of the lemma, used for adding
-- a "Request for accents" category. `accel_form` is the accelerator form (e.g. "gen|s" for genitive singular) of the
-- inflection, or nil to add no accelerator. `accel_pos` is the part of speech of the inflection, if different from
-- the lemma.
local function add_inflection(data, label, forms, pos, accel_form, accel_pos)
if #forms == 0 then
return
end
local parts = {label = label}
if #forms > 0 and type(forms[1]) == "string" then
forms = split_list_into_russian_tr(forms)
end
forms = com.combine_translit_of_duplicate_forms(forms)
for _, form in ipairs(forms) do
insert_if_not(parts, russian_tr_to_inflection_obj(data, form, pos, accel_form, accel_pos))
end
table.insert(data.inflections, parts)
end
-- Zip the lemma heads and corresponding translits into a list of {RUSSIAN, TRANSLIT} objects. In the process, split
-- any combined translits (e.g. "azerbajdžánskij, azɛrbajdžánskij" with corresponding head "азербайджа́нский") into two
-- separate objects.
local function zip_head_and_translit(data)
return com.split_translit_of_duplicate_forms(com.zip_forms(data.heads, data.translits))
end
-- The main entry point.
function export.show(frame)
local iparams = {
[1] = {required = true, desc = "part of speech"},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local poscat = iargs[1]
local params = {
[1] = {list = "head"}, -- heads
["tr"] = {list = true}, -- translits
["noposcat"] = {type = "boolean"}, -- don't add part of speech category
["noacccat"] = {type = "boolean"}, -- don't add missing-accent tracking category
["notrcat"] = {type = "boolean"}, -- don't add 'irregular pronunciations' tracking category
}
if pos_functions[poscat] then
for key, val in pairs(pos_functions[poscat].params) do
params[key] = val
end
end
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local data = {lang = lang, pos_category = poscat, categories = {}, heads = {},
translits = {}, redundant_translits = {}, genders = {}, inflections = {},
noposcat = args.noposcat}
local PAGENAME = mw.loadData("Module:headword/data").pagename
local NAMESPACE = mw.title.getCurrentTitle().nsText
-- Get the head parameters
local heads = args[1]
if #heads == 0 then
heads = {PAGENAME}
end
data.heads = heads
for i, head in ipairs(heads) do
-- Catch errors in arguments where headword doesn't match page title,
-- but only in the main namespace; for the moment, do only with tracking.
local head_no_links = m_links.remove_links(head)
local head_noaccent = com.remove_accents(head_no_links)
if NAMESPACE == "" and head_noaccent ~= PAGENAME then
track("bad-headword")
--error("Headword " .. head .. " doesn't match pagename " .. PAGENAME)
end
if com.needs_accents(head_no_links) then
if not args.noacccat then
table.insert(data.categories, "Requests for accents in Russian entries")
end
end
local tr = args.tr[i]
if tr then
tr = com.decompose(tr)
local tr_gen = com.translit_no_links(head)
if tr == tr_gen then
data.redundant_translits[i] = true
elseif not args.notrcat then
table.insert(data.categories, "Russian terms with irregular pronunciations")
end
data.translits[i] = tr
end
end
if pos_functions[poscat] then
pos_functions[poscat].func(args, data)
end
return m_headword.full_headword(data) .. (data.extra_text or "")
end
-- Common params shared by {{ru-noun}} and {{ru-noun+}}.
local function add_common_noun_params(params)
params["unknown_decl"] = {type = "boolean"} -- declension unknown
params["unknown_stress"] = {type = "boolean"} -- stress position unknown
params["unknown_pattern"] = {type = "boolean"} -- stress pattern (a, b, b', ...) unknown
params["unknown_gender"] = {type = "boolean"} -- gender unknown
params["unknown_animacy"] = {type = "boolean"} -- animacy unknown
params["f"] = {list = true} -- feminine equivalent(s)
params["m"] = {list = true} -- masculine equivalent(s)
params["adj"] = {list = true} -- relational adjective(s)
params["dim"] = {list = true} -- diminutive(s)
params["aug"] = {list = true} -- augmentative(s)
params["pej"] = {list = true} -- pejorative(s)
params["dem"] = {list = true} -- demonym(s)
params["fdem"] = {list = true} -- female demonym(s)
return params
end
-- Implementation of {{ru-noun+}} and never-created {{ru-noun-m}}, an attempt to implement a slightly different
-- interface for nouns. If we plan to add a different noun interface, it should follow the form of {{uk-noun}}; e.g.
-- instead of existing {{ru-noun-table|[[дви́гатель]]|m|_|[[внутренний|вну́треннего]]|+$|_|[[сгорание|сгора́ния]]|$}}, it
-- should look more like {{ru-ndecl|дви́гатель<M> [[внутренний|вну́треннего]] [[сгорание|сгора́ния]]}}.
local function noun_plus_or_multi(frame, multi)
local iparams = {
[1] = {required = true, desc = "part of speech"},
["old"] = {type = "boolean"},
["ndef"] = {},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local poscat = iargs[1]
local params = add_common_noun_params({
["g"] = {list = true}, -- genders
["notes"] = {list = true}, -- "footnotes" displayed after headword
})
local parargs = frame:getParent().args
local headword_args, args = require("Module:parameters").process(parargs, params, "return unknown")
args = clone_args(args)
-- default value of n=, used in ru-proper noun+ where ndef=sg is set
args.ndef = args.ndef or iargs.ndef
local m_noun = require("Module:ru-noun")
if multi then
args = m_noun.do_generate_forms_multi(args, iargs.old)
else
args = m_noun.do_generate_forms(args, iargs.old)
end
local data = {lang = lang, pos_category = poscat, categories = {}, inflections = {}}
-- do explicit genders using g=, g2=, etc.
data.genders = headword_args.g
-- if none, do inferred or explicit genders taken from declension;
-- clone because will get destructively modified by do_noun()
if #data.genders == 0 then
data.genders = mw.clone(args.genders)
end
local saw_note = false
-- Given a list of {RU, TR} pairs, where TR may be nil, separate off the
-- footnote symbols from RU and TR, link the remainder if it's not already
-- linked, and remove monosyllabic accents (but not from multiword
-- expressions).
local function prepare_entry(list, ishead)
if not list or #list == 0 then
return {{"-"}}
end
local newlist = {}
for _, x in ipairs(list) do
local ru, tr = x[1], x[2]
-- separate_notes() just returns the note, but get_notes() adds
-- <sup>...</sup>. We want the former for checking whether the
-- note is nonempty after removing IRREGMARKER (if we use the
-- latter we'll get <sup></sup> in the case of just IRREGMARKER),
-- but the latter when generating the inflectional form.
if not ishead and (rfind(ru, "[%[|%]]") or tr and rfind(tr, "[%[|%]]")) then
track("form-with-link")
end
local ruentry, runotes = m_table_tools.separate_notes(ru)
local sawhyp
runotes = rsub(runotes, IRREGMARKER, "") -- remove note of irregularity
runotes, sawhyp = rsubb(runotes, HYPMARKER, "")
if runotes ~= "" then
saw_note = true
end
runotes = m_table_tools.superscript_notes(runotes)
local trentry, trnotes
if tr then
trentry, trnotes = m_table_tools.separate_notes(tr)
trnotes = rsub(trnotes, IRREGMARKER, "") -- remove note of irregularity
trnotes = m_table_tools.superscript_notes(trnotes)
end
ruentry, trentry = com.remove_monosyllabic_accents(ruentry, trentry)
if sawhyp then
table.insert(newlist, {ruentry .. runotes .. HYPMARKER,
trentry and trentry .. trnotes .. HYPMARKER})
elseif ishead then
table.insert(newlist, {ruentry .. runotes, trentry and trentry .. trnotes})
else
local ruspan, trspan
if ruentry == "-" then
ruspan = "-"
elseif rfind(ruentry, "[%[|%]]") then
-- don't add links around a form that's already linked
ruspan = ruentry .. runotes
else
ruspan = "[[" .. ruentry .. "]]" .. runotes
end
if trentry then
trspan = trentry .. trnotes
end
table.insert(newlist, {ruspan, trspan})
end
end
return newlist
end
local argsn = args.n or args.ndef
local heads, genitives, plurals, genpls
if argsn == "p" then
heads = prepare_entry(args.nom_pl_linked, "ishead")
genitives = prepare_entry(args.gen_pl)
plurals = {{"-"}}
genpls = {{"-"}}
else
heads = prepare_entry(args.nom_sg_linked, "ishead")
genitives = prepare_entry(args.gen_sg)
plurals = argsn == "s" and {{"-"}} or prepare_entry(args.nom_pl)
genpls = argsn == "s" and {{"-"}} or prepare_entry(args.gen_pl)
end
heads = com.combine_translit_of_duplicate_forms(heads)
data.heads, data.translits = com.unzip_forms(heads)
if next(data.translits) and not args.notrcat then
table.insert(data.categories, "Russian terms with irregular pronunciations")
end
do_noun(data, headword_args, argsn == "s", genitives, plurals, genpls, poscat)
local notes = headword_args.notes
local notes_segments = {}
if saw_note then
for _, note in ipairs(notes) do
table.insert(notes_segments, " " .. make_qualifier_text(note))
end
end
local notes_text = table.concat(notes_segments, "")
return m_headword.full_headword(data) .. (data.extra_text or "") .. notes_text
end
-- External entry point; implementation of {{ru-noun+}}.
function export.noun_plus(frame)
return noun_plus_or_multi(frame, false)
end
-- External entry point; implementation of never-created {{ru-noun-m}}.
function export.noun_multi(frame)
return noun_plus_or_multi(frame, true)
end
-- Implementation of {{ru-noun}} and {{ru-proper noun}}.
local function get_noun_pos(pos)
return {
params = add_common_noun_params({
[2] = {list = "g", required = true, default = "?"}, -- genders
[3] = {list = "gen"}, -- genitive singulars, or - for indeclinable
[4] = {list = "pl"}, -- nominative plurals
[5] = {list = "genpl"}, -- genitive plurals
["altyo"] = {type = "boolean"}, -- called from {{ru-noun-alt-ё}} or variants
["manual"] = {type = "boolean"}, -- allow manual specification of principal parts
}),
func = function(args, data)
data.genders = args[2]
local genitives = args[3]
local plurals = args[4]
local genpls = args[5]
if not args.altyo and not args.manual and genitives[1] ~= "-" and
mw.title.getCurrentTitle().nsText == "" and
not args.unknown_decl and not args.unknown_stress and
not args.unknown_pattern and not args.unknown_gender and
not args.unknown_animacy then
error("[[Template:ru-noun]] can now only be used with indeclinable and manually-declined nouns; use [[Template:ru-noun+]] instead")
end
genitives = split_list_into_russian_tr(genitives)
plurals = split_list_into_russian_tr(plurals)
genpls = split_list_into_russian_tr(genpls)
do_noun(data, args, pos == "proper nouns", genitives, plurals, genpls, pos)
end,
}
end
pos_functions["proper nouns"] = get_noun_pos("proper nouns")
pos_functions["pronouns"] = get_noun_pos("pronouns")
-- Display additional inflection information for a noun.
pos_functions["nouns"] = get_noun_pos("nouns")
-- Guts of {{ru-noun}} and {{ru-noun+}}.
do_noun = function(data, args, no_plural, genitives, plurals, genpls, pos)
local recognized_genders = {
"", -- not allowed when singular; this is needed because some invariant plural only words have no gender to speak of
"m",
"f",
"n",
"mf",
"mfbysense",
}
local recognized_animacies = {
"",
"?",
"an",
"in",
}
local recognized_numbers = {
"",
"p",
}
local function insert_if_not_blank(seq, part)
if part ~= "" then
table.insert(seq, part)
end
end
local singular_genders = {} -- a set
local plural_genders = {} -- a set
-- Generate the allowed gender/number/animacy specs.
for _, number in ipairs(recognized_numbers) do
for _, gender in ipairs(recognized_genders) do
for _, animacy in ipairs(recognized_animacies) do
local set = number == "" and singular_genders or plural_genders
if gender ~= "" or number == "p" then -- disallow blank gender unless plural
local gender_number = {}
insert_if_not_blank(gender_number, gender)
insert_if_not_blank(gender_number, animacy)
insert_if_not_blank(gender_number, number)
local spec = table.concat(gender_number, "-")
set[spec] = true
end
end
end
end
local seen_gender = nil
local seen_animacy = nil
for i, g in ipairs(data.genders) do
if g == "m" then
g = "m-?"
elseif g == "m-p" then
g = "m-?-p"
elseif g == "f" and plurals[1] ~= "-" and not no_plural then
g = "f-?"
elseif g == "f-p" then
g = "f-?-p"
elseif g == "p" then
g = "?-p"
end
if not singular_genders[g] and not plural_genders[g] and g ~= "?" and g ~= "?-in" and g ~= "?-an" then
error("Unrecognized gender: " .. g)
end
data.genders[i] = g
-- Categorize by number
if plural_genders[g] then
if g == "?-p" or g == "an-p" or g == "in-p" then
table.insert(data.categories, "Russian pluralia tantum with incomplete gender")
end
end
end
local function add_noun_forms(label, forms, accel_form)
add_inflection(data, label, forms, "noun", accel_form)
end
local function form_is_intentionally_missing(forms)
return #forms > 0 and forms[1][1] == "-"
end
-- Add the genitive forms
if form_is_intentionally_missing(genitives) then
table.insert(data.inflections, {label = glossary_link("indeclinable")})
table.insert(data.categories, "Russian indeclinable nouns")
else
add_noun_forms("genitive", genitives)
end
-- Add the plural forms
-- If the noun is plural only, then ignore the 4th parameter altogether
if form_is_intentionally_missing(genitives) then
-- do nothing
elseif plural_genders[data.genders[1]] then
table.insert(data.inflections, {label = glossary_link("plural only")})
elseif form_is_intentionally_missing(plurals) then
if pos ~= "proper nouns" then
table.insert(data.inflections, {label = glossary_link("uncountable")})
table.insert(data.categories, "Russian uncountable nouns")
end
else
add_noun_forms("nominative plural", plurals)
--This can't work currently because the forms in plurals are already
--linked with spans around them, superscripted notes, etc.
--for _, form in ipairs(plurals) do
-- local ru, tr = unpack(form)
-- if not rfind(form, HYPMARKER) and not mw.title.new(form).exists then
-- table.insert(categories, "Russian nouns with missing plurals")
-- end
--end
end
-- Add the genitive plural forms
if form_is_intentionally_missing(genitives) or plural_genders[data.genders[1]]
or form_is_intentionally_missing(plurals) then
-- indeclinable, plural only or uncountable; do nothing
elseif form_is_intentionally_missing(genpls) then
table.insert(data.inflections, {label = "genitive plural missing"})
else
add_noun_forms("genitive plural", genpls)
end
-- Add the feminine forms
add_noun_forms("feminine", args.f, "f")
-- Add the masculine forms; intentionally no accelerator as the masculine forms are lemmas and need manual handling
add_noun_forms("masculine", args.m)
-- Add the relational adjective forms; intentionally no accelerator, need manual handling
add_noun_forms(glossary_link("relational", "relational adjective"), args.adj)
-- Add the diminutive forms
add_noun_forms(glossary_link("diminutive"), args.dim, "diminutive")
-- Add the augmentative forms
add_noun_forms(glossary_link("augmentative"), args.aug, "augmentative")
-- Add the pejorative forms
add_noun_forms(glossary_link("pejorative"), args.pej, "pejorative")
-- Add the demonyms
add_noun_forms(glossary_link("demonym"), args.dem, "demonym")
-- Add the female demonyms
add_noun_forms(glossary_link("female demonym"), args.fdem, "female demonym")
local extra_notes = {}
if args.unknown_decl then
track("unknown-decl")
table.insert(extra_notes, "unknown declension")
end
if args.unknown_stress then
track("unknown-stress")
table.insert(extra_notes, "unknown stress")
end
if args.unknown_pattern then
track("unknown-pattern")
table.insert(extra_notes, "unknown accent pattern")
end
if args.unknown_gender then
track("unknown-gender")
table.insert(extra_notes, "unknown gender")
end
if args.unknown_animacy then
track("unknown-animacy")
table.insert(extra_notes, "unknown animacy")
end
if #extra_notes > 0 then
data.extra_text = " " .. make_qualifier_text(table.concat(extra_notes, ", "))
end
end
local function generate_informal_comp(comp)
local ru, tr = unpack(comp)
if rfind(ru, "е́?е$") then
ru, tr = com.strip_ending(ru, tr, "е") -- Cyrillic е
return com.concat_russian_tr(ru, tr, "й", nil, "dopair")
else
return nil
end
end
local function generate_po_variant(comp)
local ru, tr = unpack(comp)
if rfind(ru, "е$") or rfind(ru, "е́?й$") then
ru = "[[по" .. ru .. "|(по)]][[" .. ru .. "]]"
tr = tr and "(po)" .. tr or nil
return {ru, tr}
else
return comp
end
end
local function generate_periphrastic_comp(positive)
local ru, tr = unpack(positive)
return com.concat_russian_tr("[[бо́лее]] ", nil, ru, tr, "dopair")
end
local allowed_endings = {
"ый",
"ий",
"о́й",
--old
"ій",
-- last two for adverbs
"о",
"о́",
}
local velar_to_translit = {
["к"] = "k",
["г"] = "g",
["х"] = "x"
}
local velar_to_palatal = {
["к"] = "ч",
["г"] = "ж",
["х"] = "ш",
["k"] = "č",
["g"] = "ž",
["x"] = "š"
}
-- Generate the comparative(s) given the positive(s). `positives` is a list of {RUSSIAN, TR} forms. `compspec` is the
-- comparative spec (either + or a spec giving an adjectival accent pattern, such as +c'). If + is given, the default
-- is +a unless the positive is ending-stressed, in which case the default is +b. Return value is a list of
-- {RUSSIAN, TR} forms. Upon input, transliterations must be decomposed.
local function generate_comparative(positives, compspec)
local comps = {}
if not rfind(compspec, "^%+") then
error("Compspec '" .. compspec .. "' must begin with + in this function")
end
if compspec ~= "+" and not rfind(compspec, "^%+[abc]'*$") then
error("Compsec '" .. compspec .. "' has illegal format, should be e.g. + or +c''")
end
compspec = rsub(compspec, "^%+", "")
for _, positive in ipairs(positives) do
local ru, tr = unpack(positive)
ru = m_links.remove_links(ru)
local removed_ending = false
for _, allowed_ending in ipairs(allowed_endings) do
if rfind(ru, allowed_ending .. "$") then
if allowed_ending == "о́й" or allowed_ending == "о́" then
if compspec == "a" then
error("Short stress pattern a not allowed with ending-stressed adjectives/adverbs")
elseif compspec == "" then
compspec = "b"
end
end
ru, tr = com.strip_ending(ru, tr, allowed_ending)
removed_ending = true
break
end
end
if not removed_ending then
error("Russian '" .. ru .. "' doesn't end with expected ending")
end
local comp, comptr
if rfind(ru, "[кгх]$") then
local stemru, lastruchar = rmatch(ru, "^(.*)(.)$")
local stemtr, lasttrchar
if tr then
stemtr, lasttrchar = rmatch(tr, "^(.*)(.)$")
if velar_to_translit[lastruchar] ~= lasttrchar then
error("Translit '" .. tr .. "' doesn't end with transliterated equivalent of last char '" ..
lastruchar .. "' of Russian '" .. ru .. "'")
end
end
comp, comptr = com.make_ending_stressed(stemru, stemtr)
comp = comp .. velar_to_palatal[lastruchar] .. "е" -- Cyrillic е
if comptr then
comptr = comptr .. velar_to_palatal[lasttrchar] .. "e" -- Latin e
end
elseif compspec == "" or compspec == "a" then
comp = ru .. "ее" -- Cyrillic ее
if comptr then
comptr = tr .. "ee" -- Latin ee
end
else -- end-stressed comparative, including pattern a'
comp, comptr = com.make_unstressed_once(ru, tr)
comp = comp .. "е́е" -- Cyrillic е́е
if comptr then
comptr = comptr .. "e" .. AC .. "e" -- Latin decomposed ée
end
end
insert_if_not(comps, {comp, comptr})
end
return comps
end
-- Meant to be called from a bot
function export.generate_comparative(frame)
local iparams = {
[1] = {required = true, desc = "comparative"},
[2] = {},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local comps = iargs[1]
local compspec = iargs[2] or ""
comps = rsplit(comps, ",")
for i, comp in ipairs(comps) do
comps[i] = com.split_russian_tr(comp, "dopair")
end
comps = generate_comparative(comps, compspec)
return com.recompose(com.concat_forms(comps))
end
-- Handle comparative inflections. If an explicit form is given such as коро́че or красне́е, we add it in a "hacked"
-- format that notes that e.g. покоро́че or покрасне́е is a possible variant. We also generate an informal form in -ей
-- if possible, e.g. красне́й, with по-hacking applied (but no such variatn is possible for коро́че). We also handle
-- autogenerating comparatives when specified as + or +b, +c'', etc. (All specifications with an accent pattern are
-- equivalent other than +a.) We also allow and handle certain qualifiers such as dated-+b or awkward-нехитре́е.
-- Finally, we allow and handle periphrastic comparatives noted using "peri".
local function handle_comparatives(data, comps, catpos, noinf)
comps = split_list_into_russian_tr(comps)
if #comps == 1 and comps[1][1] == "-" then
table.insert(data.inflections, {label = "no comparative"})
track("nocomp")
elseif #comps > 0 then
local normal_comp_parts = {}
local rare_comp_parts = {}
local dated_comp_parts = {}
local awkward_comp_parts = {}
local function get_comp_parts(comptype)
return comptype == "rare" and rare_comp_parts or
comptype == "dated" and dated_comp_parts or
comptype == "awkward" and awkward_comp_parts or
normal_comp_parts
end
local function insert_comp_inflection(comptype, comp)
local comp_parts = get_comp_parts(comptype)
insert_if_not(comp_parts, comp)
end
local function insert_comp_of_type(comp, comptype)
insert_comp_inflection(comptype, generate_po_variant(comp))
if not noinf then
local informal = generate_informal_comp(comp)
if informal then
insert_comp_inflection(comptype, generate_po_variant(informal))
end
end
end
for _, comp in ipairs(comps) do
local ru, tr = unpack(comp)
local comptype = "normal"
if rfind(ru, "^rare%-") then
comptype = "rare"
ru = rsub(ru, "^rare%-", "")
elseif rfind(ru, "^dated%-") then
comptype = "dated"
ru = rsub(ru, "^dated%-", "")
elseif rfind(ru, "^awkward%-") then
comptype = "awkward"
ru = rsub(ru, "^awkward%-", "")
end
if ru == "peri" then
for _, positive in ipairs(zip_head_and_translit(data)) do
local comp = generate_periphrastic_comp(positive)
insert_comp_inflection(comptype, comp)
end
track("pericomp")
elseif rfind(ru, "^+") then
local autocomps = generate_comparative(zip_head_and_translit(data), ru)
for _, autocomp in ipairs(autocomps) do
insert_comp_of_type(autocomp, comptype)
end
else
insert_comp_of_type({ru, tr}, comptype)
end
end
local function add_comp_inflection(label, comp_parts, accel_form)
if #comp_parts == 0 then
return
end
local parts = {label = label}
comp_parts = com.combine_translit_of_duplicate_forms(comp_parts)
for _, form in ipairs(comp_parts) do
local ru, tr = unpack(form)
-- WARNING: This has intimate knowledge of how generate_po_variant() works. To avoid this, we could
-- maintain the un-po-hacked target in each form in comp_parts, but then we'd have to modify
-- com.combine_translit_of_duplicate_forms() to preserve the extra target info when combining
-- duplicate forms, or use a map from hacked Russian form to target.
local un_po_hacked_ru = m_links.remove_links(rsub(ru, "^%[%[.-%]%]", ""))
local un_po_hacked_tr = tr and rsub(tr, "^%(po%)", "") or nil
local un_po_hacked_form = {un_po_hacked_ru, un_po_hacked_tr}
insert_if_not(parts, russian_tr_to_inflection_obj(data, form, pos, accel_form, nil, un_po_hacked_form))
end
table.insert(data.inflections, parts)
end
add_comp_inflection("comparative", normal_comp_parts, "comparative")
add_comp_inflection("rare comparative", rare_comp_parts)
add_comp_inflection("dated comparative", dated_comp_parts)
add_comp_inflection("rare/awkward comparative", awkward_comp_parts)
end
end
-- Display additional inflection information for an adjective
pos_functions["adjectives"] = {
params = {
["indecl"] = {type = "boolean"}, --indeclinable
["noinf"] = {type = "boolean"}, --suppress informal comparatives
[2] = {list = "comp"}, --comparative(s)
[3] = {list = "sup"}, --superlative(s)
["adv"] = {list = true}, --corresponding adverb(s)
["absn"] = {list = true}, --corresponding abstract noun(s)
["dim"] = {list = true}, --corresponding diminutive(s)
["aug"] = {list = true}, --corresponding augmentative(s)
["pej"] = {list = true}, --corresponding pejorative(s)
},
func = function(args, data)
local comps = args[2]
if args.indecl then
table.insert(data.inflections, {label = "indeclinable"})
table.insert(data.categories, "Russian indeclinable adjectives")
end
handle_comparatives(data, comps, "adjective", args.noinf)
local function add_adj_forms(label, forms, accel_form, accel_pos)
add_inflection(data, label, forms, "adjective", accel_form, accel_pos)
end
-- Add the superlatives
if #args[3] > 0 then
local normalized_sups = {}
for _, sup in ipairs(args[3]) do
if sup == "peri" then
local lemmas = zip_head_and_translit(data)
for _, lemma in ipairs(lemmas) do
local ru, tr = unpack(lemma)
insert_if_not(normalized_sups, com.concat_russian_tr("[[са́мый]] ", nil, ru, tr, "dopair"))
end
else
insert_if_not(normalized_sups, com.split_russian_tr(sup, "dopair"))
end
end
add_adj_forms("superlative", normalized_sups, "superlative")
end
-- Add the adverbs
add_adj_forms("adverb", args.adv)
-- Add the abstract nouns
if #args.absn > 0 then
local normalized_absn = {}
for _, absn in ipairs(args.absn) do
if absn == "+" then
local lemmas = zip_head_and_translit(data)
for _, lemma in ipairs(lemmas) do
local ru, tr = unpack(lemma)
if rfind(ru, "о́?й$") then
error("Can't form default abstract noun of ending-stressed adjective " .. ru)
end
if rfind(ru, "ий$") then
ru, tr = com.strip_ending(ru, tr, "ий")
elseif rfind(ru, "ій$") then
ru, tr = com.strip_ending(ru, tr, "ій")
else
ru, tr = com.strip_ending(ru, tr, "ый")
end
insert_if_not(normalized_absn, com.concat_russian_tr(ru, tr, "ость", nil, "dopair"))
end
else
insert_if_not(normalized_absn, com.split_russian_tr(absn, "dopair"))
end
end
add_adj_forms("abstract noun", normalized_absn, "abstract noun", "noun")
end
-- Add the diminutives
add_adj_forms(glossary_link("diminutive"), args.dim, "diminutive")
-- Add the augmentatives
add_adj_forms(glossary_link("augmentative"), args.aug, "augmentative")
-- Add the pejoratives
add_adj_forms(glossary_link("pejorative"), args.pej, "pejorative")
end
}
-- Display additional inflection information for an adverb
pos_functions["adverbs"] = {
params = {
["noinf"] = {type = "boolean"}, --suppress informal comparatives
[2] = {list = "comp"}, --comparative(s)
-- ["3"] = {list = "sup"}, --FIXME: why no superlatives?
["dim"] = {list = true}, --corresponding diminutive(s)
["aug"] = {list = true}, --corresponding augmentative(s)
["pej"] = {list = true}, --corresponding pejorative(s)
},
func = function(args, data)
local comps = args[2]
handle_comparatives(data, comps, "adverb", args.noinf)
local function add_adv_forms(label, forms, accel_form)
add_inflection(data, label, forms, "adverb", accel_form)
end
-- Add the diminutives
add_adv_forms(glossary_link("diminutive"), args.dim, "diminutive")
-- Add the augmentatives
add_adv_forms(glossary_link("augmentative"), args.aug, "augmentative")
-- Add the pejoratives
add_adv_forms(glossary_link("pejorative"), args.pej, "pejorative")
end
}
-- Display additional inflection information for a verb and verbal combining form
local function get_verb_pos(pos)
return {
params = {
[2] = {required = true, default = "?"}, --aspect
["impf"] = {list = true}, -- imperfective(s),
["pf"] = {list = true}, -- perfective(s),
["vn"] = {list = true}, -- verbal noun(s),
},
func = function(args, data)
local cform = pos == "verbal combining forms"
if cform then
table.insert(data.categories, "Russian verbs")
end
-- Aspect
local aspect = args[2]
if aspect == "both" then
table.insert(data.genders, "biasp")
elseif aspect == "pf" or aspect == "impf" or aspect == "biasp" or aspect == "?" then
table.insert(data.genders, aspect)
else
error("Invalid Russian verb aspect '" .. aspect .. "', should be 'pf', 'impf', 'both', 'biasp' or '?'")
end
local function add_verb_forms(label, forms, accel_form, accel_pos)
add_inflection(data, label, forms, "verb", accel_form, accel_pos)
end
-- Add the imperfective forms; intentionally no accelerator, need manual handling
if #args.impf > 0 and aspect == "impf" then
error("Can't specify imperfective counterparts for an imperfective verb")
end
add_verb_forms("imperfective", args.impf)
-- Add the perfective forms; intentionally no accelerator, need manual handling
if #args.pf > 0 and aspect == "pf" then
error("Can't specify perfective counterparts for a perfective verb")
end
add_verb_forms("perfective", args.pf)
-- Add the verbal nouns
add_verb_forms("verbal noun", args.vn, "verbal noun", "noun")
end,
}
end
pos_functions["verbs"] = get_verb_pos("verbs")
pos_functions["verbal combining forms"] = get_verb_pos("verbal combining forms")
return export