Jump to content

Module:ru-nominal

From Wiktionary, the free dictionary

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

--[[
This module holds functions shared between ru-noun and ru-adjective.
]]

local export = {}

local lang = require("Module:languages").getByCode("ru")

local m_links = require("Module:links")
local m_table = require("Module:table")
local com = require("Module:ru-common")
local m_ru_translit = require("Module:ru-translit")
local m_table_tools = require("Module:table tools")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local usub = mw.ustring.sub

local HYPMARKER = "⟐"

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- Insert an entry into an existing list if not already present, comparing the entry to items in the existing list
-- using a key function. If entry already found, combine it into the existing entry using combine_func, a function of
-- two arguments (the existing and new entries), which should return the combined entry. Return false if entry already
-- found, true if new entry inserted. If combine_func not specified, the existing entry is left alone. If combine_func
-- is specified, the return value will be written over the existing value (i.e. the existing list will be modified
-- in-place).
--
-- FIXME: General enough to consider moving to [[Module:table]].
local function insert_if_not_by_key(list, new_entry, keyfunc, combine_func)
	local new_entry_key = keyfunc(new_entry)
	for i, item in ipairs(list) do
		local item_key = keyfunc(item)
		if m_table.deepEquals(item_key, new_entry_key) then
			if combine_func then
				list[i] = combine_func(item, new_entry)
			end
			return false
		end
	end
	table.insert(list, new_entry)
	return true
end

--------------------------------------------------------------------------
--                        Used for manual translit                      --
--------------------------------------------------------------------------

function export.combine_stem_and_suffix(stem, tr, suf, rules, old)
	local first = usub(suf, 1, 1)
	if rules then
		local conv = rules[first]
		if conv then
			local ending = usub(suf, 2)
			-- The following regexp is not quite the same as com.vowels. For one thing
			-- it includes й, which is important. It leaves out ы, which may or may not
			-- be important.
			if old and conv == "и" and rfind(ending, "^́?[аеёиійоуэюяѣ]") then
				conv = "і"
			end
			suf = conv .. ending
		end
	end
	-- If <adj> is present in the suffix, it means we need to translate it
	-- specially; do that now.
	local is_adj = rfind(suf, "<adj>")
	suf = rsub(suf, "<adj>", "")
	local suftr = is_adj and m_ru_translit.tr_adj(suf, "mono")
	return com.concat_russian_tr(stem, tr, suf, suftr, "dopair"), suf
end

--------------------------------------------------------------------------
--                      Formatting forms for display                    --
--------------------------------------------------------------------------

-- Generate a string to substitute into a particular form in a Wiki-markup table. `forms` is the list of forms,
-- generated by concat_word_forms(). `is_lemma` is true if we're formatting the entry for use in displaying the lemma
-- in the declension table title. In this case, we don't include the translit, and remove monosyllabic accents from the
-- Cyrillic (but not in multiword expressions). `accel_form` is the form code to speicfy in the accelerator, e.g.
-- 'nom|m|s', or nil for no accelerator. `lemma_forms` is the list of {RU, TR} lemma forms for use in the accelerator,
-- or nil if `accel_form` is nil. `remove_monosyllabic_accents_lemma_only` indicates that monosyllabic accents should
-- be removed only in the lemma; otherwise we remove them from all forms. (FIXME: Rethink why we have this flag; we
-- should be consistent.)
function export.show_form(forms, is_lemma, accel_form, lemma_forms, remove_monosyllabic_accents_lemma_only)
	local russianvals = {}
	local latinvals = {}
	local lemmavals = {}

	-- First fetch the lemma forms and translit. If there are adjacent forms that have identical Russian including
	-- stress but different translit (e.g. азербайджа́нец with translits 'azerbajdžánec' and 'azɛrbajdžánec'), we
	-- combine the translits, comma-separating them. (This is necessary because there is currently only one tr=
	-- field per term.) We don't do this when processing the forms below; instead we handle this in a different
	-- and more general fashion (see below).
	local lemmaru, lemmatr
	if accel_form and lemma_forms and lemma_forms[1] ~= "-" then
		lemma_forms = com.combine_translit_of_duplicate_forms(com.strip_notes_from_forms(lemma_forms))
		for i, form in ipairs(lemma_forms) do
			local ru, tr = unpack(lemma_forms[i])
			ru, tr = com.remove_monosyllabic_accents(ru, tr)
			lemma_forms[i] = {ru, tr}
		end
		lemmaru, lemmatr = com.unzip_forms(lemma_forms)
	end

	-- Accumulate separately the Russian and transliteration into RUSSIANVALS and LATINVALS, then concatenate each down
	-- below. We need a fair amount of logic here:
	-- (1) to separate out footnote symbols;
	-- (2) to separate out the hypothetical marker (a footnote symbol but causes display of the Russian and translit
	--     in a special font);
	-- (3) to maybe remove monosyllabic accents;
	-- (4) to deduplicate repeated forms.
	-- We used to generate the display (HTML) as we went, but this prevented proper deduplication because the
	-- accelerator classes included in the HTML were different for otherwise identical forms. (Specifically, if two
	-- forms are the same in the Russian but different in the translit, the Russian will have different display forms
	-- because the translit is included in the accelerator classes.) So what we do is accumulate, separately for the
	-- Russian and translit, objects containing the entry (Russian or translit), the separated footnote symbols,
	-- whether the entry is hypothetical, and (for Russian only) the corresponding translit(s), for accelerator
	-- generation. As we accumulate, we duduplicate based only on comparing the entries of two objects. If we need to
	-- deduplicate two objects, we also need to combine their footnotes and transliteration (in the latter case, by
	-- comma-separating; we do this because there is only one tr= field for each term).
	for _, form in ipairs(forms) do
		local ru, tr = form[1], form[2]
		local ruentry, runotes = m_table_tools.separate_notes(ru)
		local trentry, trnotes
		if tr then
			trentry, trnotes = m_table_tools.separate_notes(tr)
			trnotes = rsub(trnotes, HYPMARKER, "")
		end
		if (is_lemma or not remove_monosyllabic_accents_lemma_only) then
			ruentry, trentry = com.remove_monosyllabic_accents(ruentry, trentry)
		end
		local ishyp = rfind(runotes, HYPMARKER)
		if ishyp then
			runotes = rsub(runotes, HYPMARKER, "")
		end
		local ruobj = {entry = ruentry, tr = {trentry or true}, ishyp = ishyp, notes = runotes}
		if not trentry then
			trentry = com.translit_no_links(ruentry)
		end
		if not trnotes then
			trnotes = com.translit_no_links(runotes)
		end
		local trobj = {entry = trentry, ishyp = ishyp, notes = trnotes}

		local function keyfunc(obj)
			return obj.entry
		end
		local function combine_func_ru(obj1, obj2)
			for _, tr in ipairs(obj2.tr) do
				m_table.insertIfNot(obj1.tr, tr)
			end
			obj1.notes = obj1.notes .. obj2.notes
			obj1.ishyp = obj1.ishyp or obj2.ishyp
			return obj1
		end
		local function combine_func_tr(obj1, obj2)
			obj1.notes = obj1.notes .. obj2.notes
			obj1.ishyp = obj1.ishyp or obj2.ishyp
			return obj1
		end
		if is_lemma then
			-- m_table.insertIfNot(lemmavals, ruspan .. " (" .. trspan .. ")")
			insert_if_not_by_key(lemmavals, ruobj, keyfunc, combine_func_ru)
		else
			insert_if_not_by_key(russianvals, ruobj, keyfunc, combine_func_ru)
			insert_if_not_by_key(latinvals, trobj, keyfunc, combine_func_tr)
		end
	end

	-- Now finally format each object and concatenate them together.
	local function concatenate_ru(objs)
		local is_missing = false
		for i, obj in ipairs(objs) do
			local accel = nil
			if lemmaru then
				local translit = nil
				if #obj.tr == 1 and obj.tr[1] == true then
					-- no translit
				else
					for j, tr in ipairs(obj.tr) do
						if tr == true then
							obj.tr[j] = com.translit_no_links(obj.entry)
						end
					end
					translit = table.concat(obj.tr, ", ")
				end
				accel = {form = accel_form, translit = translit, lemma = lemmaru, lemma_translit = lemmatr}
			end
			if obj.entry == "-" and #forms == 1 then
				objs[i] = "&mdash;"
				is_missing = true
			end
			if obj.ishyp then
				-- no accelerator for hypothetical forms
				objs[i] = m_links.full_link({lang = lang, term = nil, alt = obj.entry, tr = "-"}, "hypothetical")
			else
				objs[i] = m_links.full_link({lang = lang, term = obj.entry, tr = "-", accel = accel})
			end
			objs[i] = objs[i] .. m_table_tools.superscript_notes(obj.notes)
		end
		return table.concat(objs, ", "), is_missing
	end

	local function concatenate_tr(objs)
		local scriptutils = require("Module:script utilities")
		for i, obj in ipairs(objs) do
			local trspan = m_links.remove_links(obj.entry) .. m_table_tools.superscript_notes(obj.notes)
			if obj.ishyp then
				-- FIXME, in the old [[Module:ru-noun]] code, notes were omitted from hypothetical entries. Correct?
				objs[i] = scriptutils.tag_text(trspan, lang, require("Module:scripts").getByCode("Latn"),
					"hypothetical")
			else
				objs[i] = scriptutils.tag_translit(trspan, lang, "default", " style=\"color: #888;\"")
			end
		end
		return table.concat(objs, ", ")
	end

	if is_lemma then
		local russian_span, is_missing = concatenate_ru(lemmavals)
		return russian_span
	else
		local russian_span, is_missing = concatenate_ru(russianvals)
		if is_missing then
			return russian_span
		end
		local latin_span = concatenate_tr(latinvals)
		return russian_span .. "<br />" .. latin_span
	end
end

--------------------------------------------------------------------------
--                          Sibilant/Velar/ц rules                      --
--------------------------------------------------------------------------

local stressed_sibilant_rules = {
	["я"] = "а",
	["ы"] = "и",
	["ё"] = "о́",
	["ю"] = "у",
}

local stressed_c_rules = {
	["я"] = "а",
	["ё"] = "о́",
	["ю"] = "у",
}

local unstressed_sibilant_rules = {
	["я"] = "а",
	["ы"] = "и",
	["о"] = "е",
	["ю"] = "у",
}

local unstressed_c_rules = {
	["я"] = "а",
	["о"] = "е",
	["ю"] = "у",
}

local velar_rules = {
	["ы"] = "и",
}

export.stressed_rules = {
	["ш"] = stressed_sibilant_rules,
	["щ"] = stressed_sibilant_rules,
	["ч"] = stressed_sibilant_rules,
	["ж"] = stressed_sibilant_rules,
	["ц"] = stressed_c_rules,
	["к"] = velar_rules,
	["г"] = velar_rules,
	["х"] = velar_rules,
}

export.unstressed_rules = {
	["ш"] = unstressed_sibilant_rules,
	["щ"] = unstressed_sibilant_rules,
	["ч"] = unstressed_sibilant_rules,
	["ж"] = unstressed_sibilant_rules,
	["ц"] = unstressed_c_rules,
	["к"] = velar_rules,
	["г"] = velar_rules,
	["х"] = velar_rules,
}

export.nonsyllabic_suffixes = m_table.listToSet({"", "ъ", "ь", "й"})

export.sibilant_suffixes = m_table.listToSet({"ш", "щ", "ч", "ж"})

return export