Jump to content

Module:sw-utilities/sandbox

From Wiktionary, the free dictionary


local m_links = require("Module:links")
local m_head = require("Module:headword")

local lang = require("Module:languages").getByCode("sw")

-- Uppercase first letter.
local function ucfirst(text)
	return string.upper(mw.ustring.sub(text, 1, 1)) .. string.sub(text, 2)
end
-- Lowercase first letter.
local function lcfirst(text)
	return string.lower(mw.ustring.sub(text, 1, 1)) .. string.sub(text, 2)
end

local export = {}

local class_codes = {
	["vi"] = 7,
	["vy"] = 7,
	["ch-vy"] = 7,
	["ki"] = 7,
	["ki-vi"] = 7,
	["mi"] = 3,
	["mu-mi"] = 3,
	["mw-mi"] = 3,
	["m-mi"] = 3,
	["wa"] = 1,
	["mw-wa"] = 1,
	["m-wa"] = 1,
	["n"] = 9,
	["ji-ma"] = 5,
	["ja-ma"] = 5,
	["j-m"] = 5,
	["ji-me"] = 5,
	["ma"] = 5, -- the original template took this to be ji-ma, but with plural formed with just prefix ma-
	["u-n"] = 11,
	["u-ma"] = 11,
	["w"] = 11,
	["w-ny"] = 11,
	["u-m"] = 11,
	["ul-nd"] = 11,
	["ur-nd"] = 11,
	["uw-mb"] = 11,
	["u"] = 11,
	["pa"] = 16,
	["1"] = 1, ["I"] = 1,
	["2"] = 2, ["II"] = 2,
	["3"] = 3, ["III"] = 3,
	["4"] = 4, ["IV"] = 4,
	["5"] = 5, ["V"] = 5,
	["6"] = 6, ["VI"] = 6,
	["7"] = 7, ["VII"] = 7,
	["8"] = 8, ["VIII"] = 8,
	["9"] = 9, ["IX"] = 9,
	["10"] = 10, ["X"] = 10,
	["11"] = 11, ["XI"] = 11,
	["59"] = 59 --for words that can optionally be ji-(V) or n-(IX) in singular
}

local a_forms = {
	I = "wa", II = "wa",
	III = "wa", IV = "ya",
	V = "la", VI = "ya",
	VII = "cha", VIII = "vya",
	IX = "ya", X = "za",
	XI = "wa"
}

function export.plural(frame)
    local word = frame.args[1]
    local sg_class = frame.args[2]

    word = word:gsub("^" .. sg_class .. (sg_class == "m" and "u?" or ""), "")

    return word
end

function link_words(input_string)
	local contains_words = false
	local spacingPunctuation = "([%s%p]+)"
	local notWordPunc = "([^-־׳״'.·*]+)"
	local function workaround_to_exclude_chars(s)
		return mw.ustring.gsub(s, notWordPunc, "]]%1[[")
	end
	for possibleWordBreak in mw.ustring.gmatch(input_string, spacingPunctuation) do
		if mw.ustring.find(possibleWordBreak, notWordPunc) then
			contains_words = true
			break
		end
	end
	if contains_words then
		return "[["
				.. mw.ustring.gsub(
					input_string,
					spacingPunctuation,
					workaround_to_exclude_chars
					)
				.. "]]"
	else
		return input_string
	end
end

function pluralize_a(plural_guess,sing_class,plur_class)
	local corrected_plural = plural_guess
	if a_forms[sing_class] ~= nil and a_forms[plur_class] ~= nil then
		corrected_plural = string.gsub(plural_guess, " " .. a_forms[sing_class] .. " " , " " .. a_forms[plur_class] .. " " ,1)
	end
	return corrected_plural
end

function export.noun_headword(frame)
	local parent_args = frame:getParent().args
	
	local params = {
		["head"] = {true, template_default = "kitu"},
		[1] = {default = "?", template_default = "ki-vi"}, --class prefix(es) or number
		[2] = true, --plural if not predicted correctly
		["anim"] = true, --animate
		["coll"] = true, --second plural in ma-
		["pl2"] = true, ["pl2cl"] = true, --second plural
		--to specify everything:
		["cl"] = true, ["cl2"] = true, ["pl"] = true, ["plcl"] = true
	}
	
	local child_args = require("Module:parameters").process(parent_args, params)
	
	local lemma = child_args["head"] or mw.title.getCurrentTitle().text
	local class_code = child_args[1]
	local upper_case = string.find(string.sub(lemma,1,1),"%u") -- the first letter is upper case
	
	local plural = child_args[2] or ""
	local got_plural = false -- to track whether the plural was correctly generated
	if plural ~= "" then got_plural = true end
	
	local class
	local plural_class
	if class_codes[class_code] ~= nil then
		local sing_class_number = class_codes[class_code]
		local plural_class_number = sing_class_number+1 -- default
		if sing_class_number % 2 == 0 then plural_class_number = sing_class_number end -- this happens
		if sing_class_number == 11 then plural_class_number = 10 end -- plural in ma-(VI) will be dealth with later
		
		class = require("Module:roman numerals").arabic_to_roman(sing_class_number)
		plural_class = require("Module:roman numerals").arabic_to_roman(plural_class_number)
		
		if sing_class_number == 9 and plural == "plural" then class = "X" end
		if sing_class_number == 5 and plural == "plural" then class = "VI" end -- maji and the like
	else
		class = "?"
		plural_class = "?"
	end
	
	-- arguments concerning singular to be given to head, animate nouns will be dealth with
	local final_args = { "sw", "noun", head = child_args["head"], g = "c" .. class }
	
	-- if the noun class includes a hyphen, we can guess the plural
	local hyphen_pos = string.find( class_code, "-")
	if not got_plural and hyphen_pos then
		local orig_pref = string.sub(class_code,1,hyphen_pos-1)
		if upper_case then orig_pref = ucfirst(orig_pref) end
		if string.find( lemma, "^" .. orig_pref) then
			local plur_pref = string.sub(class_code,hyphen_pos+1)
			if plur_pref == "ma" then plural_class = "VI" end -- ma- plurals always class ma-(VI)
			if upper_case then plur_pref = ucfirst(plur_pref) end
			plural = string.gsub( lemma, "^" .. orig_pref, plur_pref)
			got_plural = true
		end
	end
	-- if the noun class is just ma, the plural is formed by adding prefix ma-
	if not got_plural and class_code == "ma" then
		plural = "ma" .. lemma
		if upper_case then plural = "Ma" .. lcfirst(lemma) end
		got_plural = true
	end
	-- if the noun class is just u or w, more advanced guessing is done
	if not got_plural and (class_code == "u" or class_code == "w") then
		plural = string.gsub(lcfirst(lemma),"^%a%a",{ul="nd",ur="nd",ud="nd",uj="nj",ug="ng",uy="ny",uz="nz",uw="mb",ub="mb"})
		got_plural = plural ~= lcfirst(lemma)
		if not got_plural then plural = string.gsub(lcfirst(lemma),"^%a",{u="",w="ny"}) end
		if upper_case then plural = ucfirst(plural) end
		got_plural = true
	end
	-- other cases for backward compatibility
	if not got_plural then
		if class == "VII" and string.find( lemma, "^ki") then
			plural = string.gsub( lemma, "^ki", "vi" )
			got_plural = true
		end
		if class == "VII" and string.find( lemma, "^ch") then
			plural = string.gsub( lemma, "^ch", "vy" )
			got_plural = true
		end
		if class == "III" and string.find( lemma, "^m") then -- if it starts with mu- or mw-, this is corrected next
			plural = string.gsub( lemma, "^m", "mi" )
			got_plural = true
		end
		if class == "III" and string.find( lemma, "^mu") then
			plural = string.gsub( lemma, "^mu", "mi" )
			got_plural = true
		end
		if class == "III" and string.find( lemma, "^mw") then
			plural = string.gsub( lemma, "^mw", "mi" )
			got_plural = true
		end
		if class == "I" and string.find( lemma, "^m") then -- if it starts with mw-, this is corrected next
			plural = string.gsub( lemma, "^m", "wa" )
			got_plural = true
		end
		if class == "I" and string.find( lemma, "^mw") then -- if it starts with mwa-, this is corrected next
			plural = string.gsub( lemma, "^mw", "wa" )
			got_plural = true
		end
		if class == "I" and string.find( lemma, "^mwa") then
			plural = string.gsub( lemma, "^mwa", "wa" )
			got_plural = true
		end
		if class == "IX" then
			plural = lemma
			got_plural = true
		end
		if class == "XI" and string.find( lemma, "^w") then
			plural = string.gsub( lemma, "^w", "ny" )
			got_plural = true
		end
		if class == "XVI" then
			plural = "-"
			got_plural = true
		end
	end
	
	-- if there's an -a particle, correct the plural
	if child_args[2] == nil and got_plural and plural ~= "-" and plural ~= "plural" then
		plural = pluralize_a(plural,class,plural_class)
	end
	
	--- generate the plural part of final_args
	if got_plural then
		if plural == "-" then
			table.insert(final_args, "no plural")
			final_args["cat2"] = "uncountable nouns"
		elseif plural == "plural" then
			table.insert(final_args, "plural only")
			final_args["cat2"] = "pluralia tantum"
		else
			table.insert(final_args, "plural")
			table.insert(final_args, plural)
			final_args["f1g"] = "c" .. plural_class
			final_args["f1accel-form"] = "p"
			final_args["f1accel-gender"] = "c" .. plural_class
		end
	else
		table.insert(final_args, "plural")
		final_args["f1request"] = "1"
	end
	
	local pl2 = child_args["pl2"]
	local pl2cl = child_args["pl2cl"]
	
	if not child_args["cl"] and pl2 and not got_plural then error("specify plural with second unnamed parameter instead of pl2") end
	if not child_args["cl"] and pl2cl and not pl2 then error("second plural class but no second plural specified") end
	if not child_args["cl"] and pl2 then
		table.insert(final_args, "or")
		table.insert(final_args, pl2)
		final_args["f2accel-form"] = "p"
		if pl2cl and class_codes[pl2cl] ~= nil then
			final_args["f2g"] = "c" .. require("Module:roman numerals").arabic_to_roman(class_codes[pl2cl])
			final_args["f2accel-gender"] = "c" .. require("Module:roman numerals").arabic_to_roman(class_codes[pl2cl])
		else
			final_args["f2g"] = "c" .. plural_class
			final_args["f2accel-gender"] = "c" .. plural_class
		end
	end
	
	local anim = child_args["anim"]
	
	if anim then
		if class == "IX" then
			final_args["g"] = "cI/IX"
		elseif class == "X" then
			final_args["g"] = "cII/X"
		else
			if plural == "plural" then final_args["g"] = "cII" else final_args["g"] = "cI" end
		end
	end
	if anim and got_plural and plural ~= "-" and plural ~= "plural" then
		if class == "IX" then
			final_args["f1g"] = "cII/X"
			final_args["f2accel-gender"] = "cII/X"
		else
			final_args["f1g"] = "cII"
			final_args["f2accel-gender"] = "cII"
		end
	end
	
	local coll = child_args["coll"]

	if coll and got_plural and plural ~= "-" and plural ~= "plural" then
		table.insert(final_args, "or")
		local maplural = "ma" .. lemma
		if upper_case then maplural = "Ma" .. lcfirst(lemma) end
		maplural = pluralize_a(maplural,class,"VI")
		table.insert(final_args, maplural)
		final_args["f2accel-form"] = "p"
		final_args["f2accel-gender"] = "c" .. plural_class
		if anim then
			final_args["f2g"] = "cII/X"
			final_args["f2accel-gender"] = "cII/X"
		else
			final_args["f2g"] = "cVI"
			final_args["f2accel-gender"] = "cVI"
		end
	end
	
	if class_code == "59" then -- this we do by hand
		local maplural = "ma" .. lemma
		if upper_case then maplural = "Ma" .. lcfirst(lemma) end
		final_args = { "sw", "noun", head = lemma, g = "cV or IX", nogendercat = "1",
			"plural", maplural, "or", lemma,
			f1g = "cVI", f2g = "cX", cat2 = "class V nouns", cat3 = "class IX nouns",
			["f1accel-form"] = "p", ["f2accel-form"] = "p", ["f1accel-gender"] = "cVI", ["f2accel-gender"] = "cX" }
	end
	
	if child_args["cl"] then --if everything was specified by hand
		if child_args[1] ~= "?" or child_args[2] or child_args["anim"] or child_args["coll"] then error ("either use shortcuts or specify cl") end
		local class_code = child_args["cl"] or "?"
		local twoclasses = false
		local plural = child_args["pl"]
		local second_plural = child_args["pl2"]
		local plural_class = child_args["plcl"] or "?"
		local second_plural_class = plural_class
		if child_args["pl2cl"] then second_plural_class = child_args["pl2cl"] end
		if child_args["cl2"] then
			class_code = class_code .. " or " .. child_args["cl2"]
			twoclasses = true
		end
		final_args = { "sw", "noun", head = lemma, g = "c" .. class_code }
		if twoclasses then
			final_args["nogendercat"] = "1"
			final_args["cat2"] = "class " .. child_args["cl"] .. " nouns"
			final_args["cat3"] = "class " .. child_args["cl2"] .. " nouns"
		end
		if plural == "-" then
			table.insert(final_args, "no plural")
			final_args["cat2"] = "uncountable nouns"
		elseif plural then
			table.insert(final_args, "plural")
			table.insert(final_args, plural)
			final_args["f1g"] = "c" .. plural_class
			final_args["f1accel-form"] = "p"
			final_args["f1accel-gender"] = "c" .. plural_class
			if second_plural then
				table.insert(final_args, "or")
				table.insert(final_args, second_plural)
				final_args["f2g"] = "c" .. second_plural_class
				final_args["f2accel-form"] = "p"
				final_args["f2accel-gender"] = "c" .. second_plural_class
			end
		else
			table.insert(final_args, "plural")
			final_args["f1request"] = "1"
		end
	end
	
	return frame:expandTemplate{
		title = "head",
		args = final_args
	}
end

function export.verb_headword(frame)
	local params = {
		[1] = {},
		["head"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local head = args["head"] or mw.title.getCurrentTitle().text
	local inf = args[1] or "ku"..head
	
	head = '-'..link_words(head)
	inf = link_words(inf)
	
	local data = {lang = lang,
		pos_category = "verbs",
		categories = {},
		heads = {head},
		inflections = {{label="infinitive", accel = {form = "infinitive"}, inf}}}

	return m_head.full_headword(data)
end

return export