Module:sla-headword

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local u = require("Module:string/char")

local export = {}

local lang = require("Module:languages").getByCode("sla-pro")

local GRAVE = u(0x300)
local TILDE = u(0x303)
local MACRON = u(0x304)
local CARON = u(0x30C)
local DGRAVE = u(0x30F)
local INVBREVE = u(0x311)
local OGONEK = u(0x328)


-- This could be moved to the common module
local function are_accents_valid(word)
	-- Split into syllables
	local rest = word:gsub("^%*", "")
	local consonants, vowel
	local syllables = {}
	
	while true do
		consonants, rest = mw.ustring.match(rest, "^([bcčdďfgjklľmnňprřsšśtťvxzž()]*)(.-)$")
		
		-- Shift initial l, r to previous syllable where necessary
		if #syllables > 0 and mw.ustring.find(consonants, "^[lr].") then
			syllables[#syllables].vowel = syllables[#syllables].vowel .. consonants:sub(1, 1)
			consonants = consonants:sub(2)
		end
		
		rest = mw.ustring.toNFD(rest)
		vowel, rest = mw.ustring.match(rest, "^([aeiouyьъ][" .. u(0x300) .. "-" .. u(0x36F) .. "]*)(.-)$")
		
		if not vowel then
			break
		end
		
		rest = mw.ustring.toNFC(rest)
		
		-- Recombine ě, ę, ǫ
		if vowel:sub(1, 1) == "e" and vowel:find(CARON, nil, true) then
			vowel = vowel:gsub("^e", "ě")
			vowel = vowel:gsub(CARON, "")
		elseif vowel:sub(1, 1) == "e" and vowel:find(OGONEK, nil, true) then
			vowel = vowel:gsub("^e", "ę")
			vowel = vowel:gsub(OGONEK, "")
		elseif vowel:sub(1, 1) == "o" and vowel:find(OGONEK, nil, true) then
			vowel = vowel:gsub("^o", "ǫ")
			vowel = vowel:gsub(OGONEK, "")
		end
		
		local dia = mw.ustring.sub(vowel, 2)
		vowel = mw.ustring.sub(vowel, 1, 1)
		
		table.insert(syllables, {consonants = consonants, vowel = vowel, dia = dia})
	end
	
	-- Check each syllable's diacritics
	for i, syllable in ipairs(syllables) do
		if syllable.dia ~= "" then
			-- Historical short or long vowel?
			if mw.ustring.find(syllable.vowel, "^[eoьъ]$") then
				if i == 1 then
					if not (syllable.dia == GRAVE or syllable.dia == DGRAVE) then
						-- First syllable only allows short rising and falling
						return false
					end
				elseif i == #syllables and mw.ustring.find(syllable.vowel, "^[ьъ]$") then
					-- Final yer doesn't allow any diacritics
					return false
				elseif not (syllable.dia == GRAVE) then
					-- Any remaining vowels allow only short rising
					return false
				end
			else
				-- Acute and macron allowed on any long syllable
				if not (syllable.dia == GRAVE or syllable.dia == MACRON) then
					if i == 1 then
						if not (syllable.dia == TILDE or syllable.dia == INVBREVE) then
							-- First syllable only allows neoacute and circumflex
							return false
						end
					elseif i == #syllables then
						-- Last syllable doesn't allow any of the remaining diacritics
						return false
					elseif not (syllable.dia == TILDE) then
						-- Medial syllable only allows neoacute
						return false
					end
				end
			end
		end
	end
	
	return true
end

-- Checks if the accents in the headword conform to [[WT:ASLA]].
local function check_accents(heads, categories)
	for _, head in ipairs(heads) do
		if not are_accents_valid(head) then
			table.insert(categories, lang:getCanonicalName() .. " entries with invalid diacritics")
		end
	end
end


function export.adjective(frame)
	local params = {
		[1] = {list = true},
		
		["cat"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "adjectives", categories = {}, heads = args["head"], inflections = {}}
	
	if args["cat"] == "adjective-forming suffixes" then
		data.pos_category = "suffixes"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Comparative
	if args[1][1] then
		args[1].label = "comparative"
		table.insert(data.inflections, args[1])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.adverb(frame)
	local params = {
		["cat"] = {},
		["cat2"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "adverbs", categories = {}, heads = args["head"]}
	
	if args["cat"] == "adverb-forming suffixes" then
		data.pos_category = "suffixes"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	if args["cat2"] then
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat2"])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.noun(frame)
	local params = {
		[1] = {list = true, default = "?"},
		
		["cat"] = {},
		["head"] = {list = true},
		["m"] = {list = true},
		["f"] = {list = true},
        ["dim"] = {list = true},
        ["aug"] = {list = true},
        ["adj"] = {list = true}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "nouns", categories = {}, heads = args["head"], genders = {}, inflections = {}}
	
	if args["cat"] == "noun-forming suffixes" then
		data.pos_category = "suffixes"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Genders
	for i, val in ipairs(args[1]) do
		if val == "m" or val == "f" or val == "n" or val == "m-d" or val == "f-d" or val == "n-d" or val == "m-p" or val == "f-p" or val == "n-p" then
			table.insert(data.genders, val)
			
			if val == "m-d" or val == "f-d" or val == "n-d" then
				table.insert(data.categories, lang:getCanonicalName() .. " dualia tantum")
			elseif val == "m-p" or val == "f-p" or val == "n-p" then
				table.insert(data.categories, lang:getCanonicalName() .. " pluralia tantum")
			end
		else
			table.insert(data.genders, "?")
		end
	end
	
	-- add parameters
	for _, val in pairs {{ "f", "feminine" }, { "m", "masculine" }, {"dim", "diminutive"}, {"aug", "augmentative"}, {"adj", "related adjective"}} do
		local param_name, label = unpack(val)
		local forms = args[param_name]
		if forms[1] then
			forms.label = label
			table.insert(data.inflections, forms)
		end
	end

	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end


function export.verb(frame)
	local params = {
		["a"] = {list = true},
		["cat"] = {},
		["head"] = {list = true},
		["impf"] = {list = true},
		["pf"] = {list = true},
		["det"] = {list = true},
		["indet"] = {list = true},
		["freq"] = {list = true}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "verbs", categories = {}, heads = args["head"], genders = {}, inflections = {}}
	
	if args["cat"] == "verb-forming suffixes" then
		data.pos_category = "suffixes"
		table.insert(data.categories, lang:getCanonicalName() .. " " .. args["cat"])
	end
	
	-- Aspects
	for i, val in ipairs(args["a"]) do
		if val == "pf" then
			table.insert(data.genders, val)
			table.insert(data.categories, lang:getCanonicalName() .. " perfective verbs")
		elseif (val == "impf") or (val == "impf-det") or (val == "impf-indet") or (val == "impf-freq") then	
			table.insert(data.genders, "impf")
			table.insert(data.categories, lang:getCanonicalName() .. " imperfective verbs")
		else
			table.insert(data.genders, "?")
		end
		
		if val == "impf-det" then
			table.insert(data.inflections, {label = "determinate"})
		elseif val == "impf-indet" then
			table.insert(data.inflections, {label = "indeterminate"})
		elseif val  == "impf-freq" then
			table.insert(data.inflections, {label = "indeterminate"})
			table.insert(data.inflections, {label = "frequentative"})
		end

	end
	
	-- Imperfective equivalent
	if args["impf"][1] then
		args["impf"].label = "imperfective"
		table.insert(data.inflections, args["impf"])
	end
	
	-- Perfective equivalent
	if args["pf"][1] then
		args["pf"].label = "perfective"
		table.insert(data.inflections, args["pf"])
	end
	
	--Indeterminate equivalent
	if args["indet"][1] then
		args["indet"].label = "indeterminate"
		table.insert(data.inflections, args["indet"])
	end
	
	--Frequentative equivalent
	if args["freq"][1] then
		args["freq"].label = "frequentative"
		table.insert(data.inflections, args["freq"])
	end
	
	--Imperfective determinate equivalent
	if args["det"][1] then
		args["det"].label = "imperfective determinate"
		table.insert(data.inflections, args["det"])
	end
	
	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end

function export.root(frame)
	local params = {
		[1] = {list = true, default = "?"},
		
		["cat"] = {},
		["head"] = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local data = {lang = lang, pos_category = "roots", categories = {}, heads = args["head"], genders = {}}

	check_accents(data.heads, data.categories)
	
	return require("Module:headword").full_headword(data)
end

return export