Module:fi-pronunciation

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Implements Template:fi-pronunciation; relies on Module:fi-IPA for IPA generation and Module:fi-hyphenation for automatic hyphenation.


local export = {}

local m_IPA = require("Module:IPA")
local m_fi_IPA = require("Module:fi-IPA") -- <= the module you want to edit if the IPA transcription is wrong
local m_hyph = require("Module:fi-hyphenation") -- <= the module you want to edit if the automatic hyphenation is wrong

local bit32 = require("bit32")

local langcode = "fi"
local lang = require("Module:languages").getByCode(langcode)

local vowels = "aeiouyåäö"
local vowel = "[" .. vowels .. "]"
local consonants = "bcdfghjklmnpqrstvwxzšžʔ"
local consonant = "[" .. consonants .. "]"
local apostrophe = "'"
local tertiary = m_fi_IPA.tertiary
local ipa_symb = "ˣˈˌ"..tertiary.."̯̝̞̠̪" -- include ˣ because final gemination does not affect rhymes

local function cleanup_title(x)
	return mw.ustring.lower(mw.ustring.gsub(x, "–", "-"))
end

local function cartesian_make(parts, n)
	local result = parts[1][1]
	local k = 1
	for i = 2, #parts do
		if bit32.band(n, k) > 0 then
			result = result .. parts[i - 1][3] .. parts[i][1]
		else
			result = result .. parts[i - 1][2] .. parts[i][1]
		end
		k = bit32.lshift(k, 1)
	end
	return result
end

local function cartesian_combine(parts)
	local n = bit32.lshift(1, #parts - 1)
	local results = {}
	for i = 0, n - 1 do
		table.insert(results, cartesian_make(parts, i))
	end
	return results
end

local potential_diphthongs = {
	["aa"] = true, ["ee"] = true, ["ii"] = true, ["oo"] = true,
	["uu"] = true, ["yy"] = true, ["ää"] = true, ["öö"] = true,
	["ai"] = true, ["ei"] = true,                ["oi"] = true,
	["ui"] = true, ["yi"] = true, ["äi"] = true, ["öi"] = true,
	["au"] = true, ["eu"] = true, ["iu"] = true, ["ou"] = true,
	["äy"] = true, ["ey"] = true, ["iy"] = true, ["öy"] = true,
	               ["ie"] = true, ["uo"] = true, ["yö"] = true,
}

local function is_potential_diphthong(d)
	return potential_diphthongs[d]
end

local function split_by_optional_break(word, only_breaking_diphthongs)
	local parts = {}
	local i = 1
	local found = false

	while true do
		local j, je = mw.ustring.find(word, "%([.-]%)", i)
		if j == nil then break end
		local allow_break = true
		if only_breaking_diphthongs then
			local prefix = mw.ustring.sub(word, i, j - 1)
			local diphthong = mw.ustring.sub(word, j - 1, j - 1) .. mw.ustring.sub(word, j + 3, j + 3)
			-- if a dot, only when breaks a diphthong
			allow_break = mw.ustring.sub(word, j + 1, j + 1) ~= "." or (
				-- never a diphthong if two vowels precede
				not mw.ustring.match(prefix, "[aeiouyäö][aeiouyäö]$")
				and is_potential_diphthong(diphthong))
		end
		if allow_break then
			found = true
			table.insert(parts, { mw.ustring.sub(word, i, j - 1), "", mw.ustring.sub(word, j + 1, j + 1) })
		end
		i = je + 1
	end

	if not found then return { word } end

	table.insert(parts, { mw.ustring.sub(word, i), "", "" })

	return cartesian_combine(parts)
end
export.p=split_by_optional_break

local function get_autohyphenate_forms(word, title)
	word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "")
	word = mw.ustring.gsub(word, "(.)ː", "%1%1")
	word = mw.ustring.gsub(word, "[" .. ipa_symb .. "ˣ*]", "")
	word = mw.ustring.gsub(word, "[/+]", "-")
	word = mw.ustring.gsub(word, "^-", "")
	word = mw.ustring.gsub(word, "ŋn", "gn")

	if mw.ustring.lower(title) == title then
		word = mw.ustring.lower(word)
	else
		-- find letters in title
		local letters = {}
		for letter in mw.ustring.gmatch(title, "%a") do
			table.insert(letters, letter)
		end

		local respelled = ""
		local letter_index = 1

		for character in mw.ustring.gmatch(word, ".") do
			if mw.ustring.match(character, "%a") then
				local next_letter = letters[letter_index]
				if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then
					respelled = respelled .. next_letter
					letter_index = letter_index + 1
				else
					respelled = respelled .. character
				end
			else
				respelled = respelled .. character
			end
		end

		word = respelled
	end

	return split_by_optional_break(word)
end

-- applies gemination mid-word for rhymes
local function apply_gemination(word)
	word = mw.ustring.gsub(word, "[*ˣ](" .. vowel .. ")", "ʔ%1")
	word = mw.ustring.gsub(word, "[*ˣ](" .. consonant .. ")", "%1ː")
	return word
end

local function get_autorhyme_forms(word)
	word = mw.ustring.lower(word)
	word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "")
	word = apply_gemination(word)
	word = mw.ustring.gsub(word, "(.)ː", "%1%1")
	word = mw.ustring.gsub(word, "[" .. ipa_symb .. "]", "")
	word = mw.ustring.gsub(word, "[/+]", "-")
	return split_by_optional_break(word)
end

function export.generate_rhyme(word)
	-- convert syllable weight to hyphen for next routine
	-- (just in case these are included manually... even if they shouldn't be)
	local fmtword = mw.ustring.gsub(word, "[ˈˌ"..tertiary.."]", "-")
	fmtword = mw.ustring.gsub(word, "'", ".")
	
	local sylcount = #m_hyph.generate_hyphenation(fmtword, ".")
	
	-- get final part of a compound word
	local last_hyph = mw.ustring.find(fmtword, "%-[^%-]*$") or 0
	local last_part = mw.ustring.sub(fmtword, last_hyph + 1)
	
	-- split to syllables, keep . in case we have a syllable break
	local hyph = m_hyph.generate_hyphenation(last_part, ".")
	local last_index = #hyph
	local last_stressed = 1
	local prev_stress = false
	
	-- find last stressed syllable
	for index, syllable in ipairs(hyph) do
		local stressed = false
		
		if index == 1 then
			stressed = true
		elseif not prev_stress and index < last_index then
			-- shift stress if current syllable light and a heavy syllable occurs later
			stressed = index == last_index - 1 or not m_fi_IPA.is_light_syllable(syllable) or not m_fi_IPA.has_later_heavy_syllable(hyph, index + 1)
		end
		
		if stressed then
			last_stressed = index
		end
		prev_stress = stressed
	end
	
	local res = {}
	for i = last_stressed, #hyph, 1 do 
		table.insert(res, hyph[i])	
	end
	
	res = table.concat(res)
	
	-- remove initial consonants, convert to IPA, remove IPA symbols
	res = mw.ustring.gsub(res, "^%.", "")
	res = mw.ustring.gsub(res, "^" .. consonant .. "+", "")
	res = m_fi_IPA.IPA_wordparts(res, false)
	res = mw.ustring.gsub(res, "[" .. ipa_symb .. "]", "")
	res = mw.ustring.gsub(res, "^%.", "")
	
	return res, sylcount
end

local function add_trivowel_dots(pron)
	-- find sequences of >= 3 vowels and record their indices
	local toggles = {}
	local scan = 1
	while true do
		local i0, i1 = mw.ustring.find(pron, "[aeiouyäö][aeiouyäö][aeiouyäö]+", scan)
		if i0 == nil then break end
		table.insert(toggles, i0 + 1)
		table.insert(toggles, i1)
		scan = i1 + 1
	end
	table.insert(toggles, mw.ustring.len(pron) + 1)

	-- generate hyphenation, and add dots within multivowel sequences
	local dots = {}
	local hyph = m_hyph.generate_hyphenation(pron, true)
	local index = 0
	local recons = ""
	scan = 1
	for _, hpart in ipairs(hyph) do
		index = index + mw.ustring.len(hpart)
		while index >= toggles[scan] do
			scan = scan + 1
		end
		recons = recons .. hpart
		if scan % 2 == 0 then
			recons = recons .. "."
		end
	end

	return recons
end

local function pron_equal(title, pron)
	if not pron or pron == "" then
		return true
	end
	
	-- handle slashes and pluses as hyphens
	pron = mw.ustring.gsub(pron, "[/+]", "-")
	-- remove optional lengthening/shortening/syllable break/gemination, should not cause any issues
	pron = mw.ustring.gsub(pron, "%([*ˣ.:ː-]%)", "")
	-- remove gemination asterisks and syllable separating dots
	pron = mw.ustring.gsub(pron, "*", "")
	pron = mw.ustring.gsub(pron, "%.", "")
	-- map existing glottal stops to apostrophes
	pron = mw.ustring.gsub(pron, "%(?ʔ%)?", apostrophe)
	-- /ŋn/ for /gn/ is fine
	pron = mw.ustring.gsub(pron, "ŋn", "gn")
	-- remove hyphens but also apostrophes right after hyphens
	-- (so that glottal stop is allowed after hyphen separating two same vowels)
	pron = mw.ustring.gsub(pron, "-" .. apostrophe .. "?", "")
	title = mw.ustring.gsub(cleanup_title(title), "-", "")
	
	return pron == mw.ustring.lower(title)
end

local function pron_equal_special_cases(title)
	-- very common exception - support it
	return mw.ustring.gsub(title, "ruoan", "ruuan")
end

function export.show(frame)
	local title = mw.title.getCurrentTitle().text
	local pronunciation = { "" }
	local ipa = { nil }
	local rhymes = { nil }
	local hyphenation = { nil }
	local audio = { }
	local qualifiers = { }
	local hyphlabels = { }
	local rhymlabels = { }
	local homophones = { }
	local homophonelabels = { }
	local nohyphen = false
	local norhymes = false
	local csuffix = false
	local categories = { }
	
	if type(frame) == "table" then
		local params = {
			[1] = { list = true, default = "", allow_holes = true },
			
			["ipa"] = { list = true, default = nil, allow_holes = true },
			["h"] = { list = true, default = nil, allow_holes = true }, ["hyphen"] = {},
			["r"] = { list = true, default = nil, allow_holes = true }, ["rhymes"] = {},
			["a"] = { list = true, default = nil }, ["audio"] = {},
			["ac"] = { list = true, default = nil }, ["caption"] = {},
			["hh"] = { default = "" }, ["homophones"] = {},
			
			["q"] = { list = true, default = nil, allow_holes = true },
			["hp"] = { list = true, default = nil, allow_holes = true },
			["rp"] = { list = true, default = nil, allow_holes = true },
			["hhp"] = { list = true, default = nil, allow_holes = true },
			
			["nohyphen"] = { type = "boolean", default = false },
			["norhymes"] = { type = "boolean", default = false },
			["csuffix"] = { type = "boolean", default = false },
			
			["title"] = {}, -- for debugging or demonstration only
		}
		
		local args, further = require("Module:parameters").process(frame:getParent().args, params, true)
		
		title = args["title"] or title
		pronunciation = args[1]
		ipa = args["ipa"]
		hyphenation = args["h"]
		rhymes = args["r"]
		qualifiers = args["q"]
		hyphlabels = args["hp"]
		rhymlabels = args["rp"]
		nohyphen = args["nohyphen"]
		norhymes = args["norhymes"]
		csuffix = args["csuffix"]
		homophones = mw.text.split(args["hh"], ",")
		homophonelabels = args["hhp"]
		
		-- hacks
		if args[2] and args[1] == nil then args[1] = "" end
		if ipa[2] and ipa[1] == nil then ipa[1] = "" end
		
		if #homophones == 1 and homophones[1] == "" then homophones = {} end
		if args["hyphen"] then hyphenation[1] = args["hyphen"] end
		if args["rhymes"] then rhymes[1] = args["rhymes"] end
		if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
		
		local audios = args["a"]
		local captions = args["ac"]
		if args["audio"] then audios[1] = args["audio"] end
		if args["captions"] then captions[1] = args["caption"] end
		
		for i, audiofile in ipairs(audios) do
			if audiofile then
				table.insert(audio, {lang = lang, file = audiofile, caption = captions[i]})
			end
		end
	end

	for i, p in ipairs(pronunciation) do
		if p == "" or p == "^" then
			pronunciation[i] = cleanup_title(title)
		elseif p == "*" or p == "(*)" then
			pronunciation[i] = cleanup_title(title) .. p
		elseif mw.ustring.find(p, "[!#]") then
			p = mw.ustring.gsub(p, "t!s", "ts")
			p = mw.ustring.gsub(p, "t#s", "ts")
			pronunciation[i] = p
		end
	end
	
	-- make sure #pronunciation >= #IPA
	for i, p in ipairs(ipa) do
		if not pronunciation[i] then
			pronunciation[i] = ""
		end
	end
	
	local manual_hr = false
	local ripa = {}
	local model_pronunciation = pronunciation[1]
	local autohyph = false
	local autorhyme = false

	-- preprocessing
	local i = 1
	local ruis = false
	while i <= #pronunciation do
		if mw.ustring.find(pronunciation[i], "%", 1, true) then
			local original = pronunciation[i]
			local short = mw.ustring.gsub(original, "%%", "")
			local long = mw.ustring.gsub(original, "(.)%%", "%1%1")
			pronunciation[i] = short
			if model_pronunciation == original then
				model_pronunciation = long
			end
			i = i + 1
			table.insert(pronunciation, i, long)
		end
		if mw.ustring.find(pronunciation[i], "[aeiouyäö]%(.%)[aeiouyäö]", 1) then
			ruis = true
		end
		i = i + 1
	end

	if ruis then
		local new_pronunciation = {}
		for _, p in ipairs(pronunciation) do
			local split_i = split_by_optional_break(p, true)
			for _, np in ipairs(split_i) do
				table.insert(new_pronunciation, np)
			end
		end
		pronunciation = new_pronunciation
	end
	
	local has_spaces = mw.ustring.match(title, " ") or (pronunciation[1] and mw.ustring.match(pronunciation[1], " "))
	local is_suffix = mw.ustring.match(title, "^-")
	local is_prefix_or_suffix = not csuffix and (mw.ustring.match(title, "-$") or is_suffix)
	for i, p in ipairs(pronunciation) do
		local qual = qualifiers[i] or ""
		
		if #qual > 0 then
			qual = " " .. require("Module:qualifier").format_qualifier(qualifiers[i])
		end
		
		if ipa[i] and ipa[i] ~= "" then
			table.insert(ripa, "* " .. m_IPA.format_IPA_full {
				lang = lang,
				items = {{pron = ipa[i]}},
				no_count = has_spaces,
			} .. qual)
			manual_hr = true
		else
			if mw.ustring.find(p, ":") then p = mw.ustring.gsub(p, ":", "ː") end
			if mw.ustring.find(p, "%+") then p = mw.ustring.gsub(p, "%+", "-") end
					
			-- some fixes
			if mw.ustring.find(p, "[aeouyäö]ii") then
				p = mw.ustring.gsub(p, "([aeouyäö])(ii)", "%1.%2")
			end
			-- add clarifying dots
			if mw.ustring.find(p, "[aeiouyäö][aeiouyäö][aeiouyäö]") then
				p = add_trivowel_dots(p)
			end
			
			local IPA_narrow = m_fi_IPA.IPA_wordparts(p, true)
			local IPA = m_fi_IPA.IPA_wordparts(p, false)
			
			-- multi-word stress
			if has_spaces then
				IPA_narrow = mw.ustring.gsub(IPA_narrow, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
				IPA = mw.ustring.gsub(IPA, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
			end
			
			-- remove initial stress if suffix
			if is_suffix then
				if csuffix then
					IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1ˌ")
					IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1ˌ")
				else
					IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1")
					IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1")
				end
			end
			
			table.insert(ripa, "* " .. m_IPA.format_IPA_full {
				lang = lang,
				items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},
				no_count = has_spaces,
			} .. qual)
		end
	end
	
	local results = mw.clone(ripa)
	manual_hr = manual_hr or has_spaces or is_prefix_or_suffix or not (pron_equal(title, mw.ustring.lower(model_pronunciation)) or pron_equal(pron_equal_special_cases(title), mw.ustring.lower(model_pronunciation)))
	
	if not hyphenation[1] and not manual_hr then
		autohyph = true
		local forms = get_autohyphenate_forms(model_pronunciation, title)
		local seenhyphs = {}
		local i = 1
		for _, form in ipairs(forms) do
			if hyphenation[i] then break end
			local genhyph = m_hyph.generate_hyphenation(form, false)
			local genhyphj = table.concat(genhyph, "\n")
			if not seenhyphs[genhyphj] then
				hyphenation[i] = genhyph
				seenhyphs[genhyphj] = true
				i = i + 1
			end
		end
	elseif #hyphenation == 1 and hyphenation[1] == "-" then
		hyphenation = {}
	end

	if not rhymes[1] and not manual_hr then
		autorhyme = true
		local forms = get_autorhyme_forms(model_pronunciation)
		for i, form in ipairs(forms) do
			if rhymes[i] then break end
			rhymes[i] = { export.generate_rhyme(form) }
		end
	elseif #rhymes == 1 and rhymes[1] == "-" then
		rhymes = {}
	end

	if not has_spaces and not is_prefix_or_suffix and not (hyphenation[1] and rhymes[1]) then
		table.insert(categories, "fi-pronunciation missing hyphenation or rhymes")
	end	
	
	for i, h in ipairs(hyphenation) do
		if type(h) == "string" then
			hyphenation[i] = mw.text.split(h, '[' .. m_hyph.sep_symbols .. ']')
		end
	end
	
	for i, a in ipairs(audio) do
		table.insert(results, "* " .. require("Module:audio").format_audio(a))
	end
	
	if not norhymes then
		if #rhymes > 0 then
			-- merge rhymes if they have identical labels
			local last_label = false
			local new_rhymes = {}
			local new_labels = {}
			local current_list = {}
			
			for i, r in ipairs(rhymes) do
				local label = rhymlabels[i]
				if last_label == label then
					table.insert(current_list, r)
				else
					if #current_list > 0 then
						table.insert(new_rhymes, current_list)
					end
					if last_label ~= false then
						table.insert(new_labels, last_label)
					end
					current_list = { r }
					last_label = label
				end
			end
			
			table.insert(new_rhymes, current_list)
			table.insert(new_labels, last_label)
			rhymes = new_rhymes
			rhymlabels = new_labels
		end
		
		for i, r in ipairs(rhymes) do
			local label = ""
			if rhymlabels[i] then
				label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i])
			end
			if #r >= 1 then
				local sylcounts = nil
				local rhymeobjs = {}
				local rhymesseen = {}
				local explicitsylcounts = true
				for _, rhyme in ipairs(r) do
					if type(rhyme) == "table" then
						local rhymeis, sylcount = unpack(rhyme)
						local rhymeobj = rhymesseen[rhymeis]
						if not rhymeobj then
							local newrhyme = { rhyme = rhymeis, num_syl = {sylcount} }
							table.insert(rhymeobjs, newrhyme)
							rhymesseen[rhymeis] = { [sylcount] = true, object = newrhyme }
						elseif not rhymeobj[sylcount] then
							table.insert(rhymeobj.object.num_syl, sylcount)
							rhymeobj[sylcount] = true
						end
					else
						explicitsylcounts = false
						if not rhymesseen[rhyme] then
							local newrhyme = { rhyme = rhyme }
							table.insert(rhymeobjs, newrhyme)
							rhymesseen[rhyme] = { object = newrhyme }
						end
					end
				end
				if not explicitsylcounts then
					sylcounts = {}
					local sylkeys = {}
					-- get all possible syllable counts from syllabifications
					for i, h in ipairs(hyphenation) do
						local hl = #h
						if hl > 0 and not sylkeys[hl] then
							table.insert(sylcounts, hl)
							sylkeys[hl] = true
						end
					end
				end
				table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
					{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label)
			end
		end
	end
	if #homophones > 0 then
		local homophonedata = {}
		for i, h in ipairs(homophones) do
			table.insert(homophonedata, { ["term"] = h, ["qualifiers"] = homophonelabels[i] and { homophonelabels[i] } or nil })
		end
		table.insert(results, "* " .. require("Module:homophones").format_homophones(
			{ lang = lang, homophones = homophonedata }))
	end
	if not nohyphen and #hyphenation > 0 then
		local hyphs = {}
		for i, h in ipairs(hyphenation) do
			table.insert(hyphs, { ["hyph"] = h, ["qualifiers"] = hyphlabels[i] and { hyphlabels[i] } or nil })
		end
		table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
			{ lang = lang, hyphs = hyphs, caption = "Syllabification<sup>([[Appendix:Finnish hyphenation|key]])</sup>" }))
	end
	
	return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end

return export