Jump to content

Module:sms-common

From Wiktionary, the free dictionary


local export = {}

local langdata = {
	consonant = {
		"^(.-[aâeẹoõuåäAÂEOÕUÅÄ])([iI][^aâeẹioõuåäAÂEIOÕUÅÄ{}-]*)$",
		"^(.-[aâeẹioõåäAÂEIOÕÅÄ])([uU][^aâeẹioõuåäAÂEIOÕUÅÄ{}-]*)$",
		"^(.-)([^aâeẹioõuåäAÂEIOÕUÅÄ{}-]*)$",
	},
	vowel = {
		"^(.-)(aa)$",
		"^(.-)(ââ)$",
		"^(.-)(e[âeä])$",
		"^(.-)(i[âeẹiõ])$",
		"^(.-)(oo)$",
		"^(.-)(õõ)$",
		"^(.-)(u[âeẹõuåä])$",
		"^(.-)(åå)$",
		"^(.-)(ää)$",
		"^(.-)([aâeioõuåä]?)$",
	},
	scons = {
		[1] = {
		},
		[2] = {
			{"^(ʹ?)([đjlmnrv])%2$", "%1%2", "%1%2ˈ%2"},
			{"^(ʹ?)nnj$", "%1nj", "%1nˈnj"},
			
			{"^(ʹ?)cc$", "%1ʒʒ", "%1cˈc"},
			{"^(ʹ?)čč$", "%1jj", "%1čˈč"},
			{"^(ʹ?)kk$", "%1ǥǥ", "%1kˈk"},
			{"^(ʹ?)ǩǩ$", "%1ɉɉ", "%1ǩˈǩ"},
			{"^(ʹ?)pp$", "%1v", "%1pˈp"},
			{"^(ʹ?)ss$", "%1zz", "%1sˈs"},
			{"^(ʹ?)šš$", "%1žž", "%1šˈš"},
			{"^(ʹ?)tt$", "%1đ", "%1tˈt"},
		},
		[3] = {
			{"^(ʹ?)([bʒǯdgǧ])ˈ%2$", "%1%2%2"},
			{"^(ʹ?)lˈlj$", "%1llj"},
			
			{"^(ʹ?)h([čsšt])%2$", "u%1%2"},
			
			{"^(ʹ?)(b)([ǯdlr])%3$", "%1%2%3"},
			{"^(ʹ?)(c)([ǩ])%3$", "%1%2%3"},
			{"^(ʹ?)(d)(ǧ)%3$", "%1đɉ"},
			{"^(i)(ʹ?)([bdkǩnrtv])%3$", "%1%2%3"},
			{"^(i)(ʹ?)(g)%3$", "%1%2ǥ"},
			{"^(i)(ʹ?)(ǧ)%3$", "%1%2ɉ"},
			{"^(ʹ?)(l)([bdgkǩnmtv])%3$", "%1%2%3"},
			{"^(ʹ?)(l)(ǧ)%3$", "%1%2ɉ"},
			{"^(ʹ?)(m)([n])%3$", "%1%2%3"},
			{"^(ʹ?)(ŋ)([g])%3$", "%1%2%3"},
			{"^(ʹ?)(p)([st])%3$", "%1%2%3"},
			{"^(ʹ?)(r)([bdjmnv])%3$", "%1%2%3"},
			{"^(ʹ?)(r)(g)%3$", "%1%2ǥ"},
			{"^(ʹ?)(r)(ǧ)%3$", "%1%2ɉ"},
			{"^(ʹ?)(s)([kǩnt])%3$", "%1%2%3"},
			{"^(ʹ?)(š)([kǩnt])%3$", "%1%2%3"},
			{"^(ʹ?)(t)([k])%3$", "%1%2%3"},
			{"^(u)(ʹ?)([ʒdjǩlnŋrt])%3$", "%1%2%3"},
			{"^(ʹ?)(v)([dj])%3$", "%1%2%3"},
		},
	},
	vowel_variants = {
		normal        = {                                                             },
		normal0       = {["a"] = {""}       , ["â"] = {""}       , ["e"] = {""}       },
		e             = {["a"] = {"u", "AU"}, ["â"] = {"e", "AI"}, ["e"] = {"a", "EA"}},
		past_ptcp     = {                                          ["e"] = {"a", "EA"}},
		j             = {                                          ["e"] = {"e", "EI"}},
		j0            = {                     ["â"] = {""}       , ["e"] = {"" , "EI"}},
		j_contr       = {["a"] = {"u", "AU"}, ["â"] = {"e", "AI"}, ["e"] = {"e", "EI"}},
		j_contr2      = {["a"] = {"u", "AU"}, ["â"] = {"i", "AI"}, ["e"] = {"i", "EI"}},
		pres_12sg     = {                     ["â"] = {"a"}      , ["e"] = {"a", "EA"}},
		pres_3sg_even = {                     ["â"] = {"" , "AA"}, ["e"] = {"" , "EA"}},
		pres_3sg_odd  = {                     ["â"] = {"a"}      , ["e"] = {"a"}      },
		pres_3pl      = {                     ["â"] = {"e", "AE"}                     },
		impr          = {                     ["â"] = {"a", "AA"}, ["e"] = {"a", "EA"}},
	},
	preprocess = function(self)
		-- Add overlong mark after a short vowel
		if mw.ustring.len(self.svowel) == 1 then
			self.scons = mw.ustring.gsub(self.scons, "^(ʹ?)llj$", "%1lˈlj")
			self.scons = mw.ustring.gsub(self.scons, "^(ʹ?)(.)%2$", "%1%2ˈ%2")
		end
		
		-- Canonicalise some diphthongs
		if self.svowel == "iẹ" then
			self.svowel = "eä"
		elseif self.svowel == "uẹ" then
			self.svowel = "uä"
		end
		
		-- Undouble vowels before doing any processing
		self.svowel = mw.ustring.gsub(self.svowel, "^(.)%1$", "%1")
	end,
	postprocess = function(form, vowel_effect)
		if vowel_effect == "AU" then
			if form.svowel == "â" then
				form.svowel = "õ"
			elseif form.svowel == "e" then
				form.svowel = "i"
			elseif form.svowel == "o" then
				form.svowel = "u"
			elseif form.svowel == "å" then
				form.svowel = "o"
			elseif form.svowel == "ä" then
				form.svowel = "a"
			elseif form.svowel == "eâ" then
				form.svowel = "iõ"
			elseif form.svowel == "uâ" then
				form.svowel = "uõ"
			elseif form.svowel == "eä" then
				form.svowel = "iâ"
			elseif form.svowel == "uä" then
				form.svowel = "uå"
			end
		elseif vowel_effect == "AA" then
			if form.svowel == "i" then
				form.svowel = "e"
			elseif form.svowel == "o" then
				form.svowel = "å"
			elseif form.svowel == "õ" then
				form.svowel = "â"
			elseif form.svowel == "u" then
				form.svowel = "o"
			elseif form.svowel == "a" then
				form.svowel = "ä"
			elseif form.svowel == "iâ" then
				form.svowel = "eä"
			elseif form.svowel == "iõ" then
				form.svowel = "eâ"
			elseif form.svowel == "uå" then
				form.svowel = "uä"
			elseif form.svowel == "uõ" then
				form.svowel = "uâ"
			end
		elseif vowel_effect == "AE" then
			form.scons = form.scons:gsub("^([iu]?)", "%1ʹ")
			form.scons = form.scons:gsub("g", "ǧ")
			form.scons = form.scons:gsub("k", "ǩ")
			form.scons = form.scons:gsub("ǥ", "ɉ")
			
			if form.svowel == "i" then
				form.svowel = "e"
			elseif form.svowel == "o" then
				form.svowel = "å"
			elseif form.svowel == "õ" then
				form.svowel = "â"
			elseif form.svowel == "u" then
				form.svowel = "o"
			elseif form.svowel == "a" then
				form.svowel = "ä"
			elseif form.svowel == "iâ" then
				form.svowel = "eä"
			elseif form.svowel == "iõ" then
				form.svowel = "ie"
			elseif form.svowel == "uå" then
				form.svowel = "uä"
			elseif form.svowel == "uõ" then
				form.svowel = "ue"
			end
		elseif vowel_effect == "AI" then
			form.scons = form.scons:gsub("^([iu]?)", "%1ʹ")
			form.scons = form.scons:gsub("g", "ǧ")
			form.scons = form.scons:gsub("k", "ǩ")
			form.scons = form.scons:gsub("ɉ", "ǥ")
			
			if form.svowel == "iâ" then
				form.svowel = "ie"
			elseif form.svowel == "uå" then
				form.svowel = "ue"
			end
		elseif vowel_effect == "EI" then
			if form.svowel == "â" then
				form.svowel = "õ"
			elseif form.svowel == "e" then
				form.svowel = "i"
			elseif form.svowel == "å" then
				form.svowel = "o"
			elseif form.svowel == "ä" then
				form.svowel = "a"
			elseif form.svowel == "eä" then
				form.svowel = "ie"
			elseif form.svowel == "ie" then
				form.svowel = "iõ"
			elseif form.svowel == "ue" then
				form.svowel = "uõ"
			elseif form.svowel == "uä" then
				form.svowel = "ue"
			end
		elseif vowel_effect == "EA" then
			form.scons = form.scons:gsub("ʹ", "")
			form.scons = form.scons:gsub("ǧ", "g")
			form.scons = form.scons:gsub("ǩ", "k")
			form.scons = form.scons:gsub("ɉ", "ǥ")
			
			if form.svowel == "i" then
				form.svowel = "e"
			elseif form.svowel == "u" then
				form.svowel = "o"
			elseif form.svowel == "ie" then
				form.svowel = "eâ"
			elseif form.svowel == "ue" then
				form.svowel = "uâ"
			end
		end
		
		-- Changes to diphthongs before quantity 2
		if form.scons:find("ʹ") and form.quantity == 2 then
			if form.svowel == "eä" then
				form.svowel = "iẹ"
			elseif form.svowel == "uä" then
				form.svowel = "uẹ"
			end
		end
		
		-- Do some things for vowels depending on quantity
		if mw.ustring.len(form.svowel) == 1 then
			if form.quantity == 3 then
				-- Remove the overlong mark, because the single vowel already indicates quantity
				form.scons = mw.ustring.gsub(form.scons, "ˈ", "")
			else
				-- Double the vowel
				form.svowel = mw.ustring.gsub(form.svowel, "^(.)$", "%1%1")
			end
		end
		
		-- Add apostrophe as consonant separator,
		-- if ucons or ending has the same consonant as the stem
		if form.uvowel == "" then
			if form.ucons == "" then
				if mw.ustring.sub(form.scons, -1) == mw.ustring.sub(form.ending, 1, 1) then
					form.ending = "ʼ" .. form.ending
				end
			else
				if mw.ustring.sub(form.scons, -1) == mw.ustring.sub(form.ucons, 1, 1) or mw.ustring.find(form.scons, "[ln]$") and mw.ustring.sub(form.ucons, 1, 1) == "j" then
					form.ucons = "ʼ" .. form.ucons
				end
			end
		end
		
		if form.ucons == "j" and form.ending == "" then
			form.ucons = "i"
		end
		
		-- Change ɉ to j
		form.scons = form.scons:gsub("ɉ", "j")
	end,
}

export.Stem = require("Module:smi-common").make_constructor(langdata)

return export