Module:fi-pronunciation
Jump to navigation
Jump to search
- The following documentation is located at Module:fi-pronunciation/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Implements Template:fi-pronunciation; relies on Module:fi-IPA for IPA generation and Module:fi-hyphenation for automatic hyphenation.
local export = {}
local m_IPA = require("Module:IPA")
local m_fi_IPA = require("Module:fi-IPA") -- <= the module you want to edit if the IPA transcription is wrong
local m_hyph = require("Module:fi-hyphenation") -- <= the module you want to edit if the automatic hyphenation is wrong
local bit32 = require("bit32")
local langcode = "fi"
local lang = require("Module:languages").getByCode(langcode)
local vowels = "aeiouyåäö"
local vowel = "[" .. vowels .. "]"
local consonants = "bcdfghjklmnpqrstvwxzšžʔ"
local consonant = "[" .. consonants .. "]"
local apostrophe = "'"
local tertiary = m_fi_IPA.tertiary
local ipa_symb = "ˣˈˌ"..tertiary.."̯̝̞̠̪" -- include ˣ because final gemination does not affect rhymes
local function cleanup_title(x)
return mw.ustring.lower(mw.ustring.gsub(x, "–", "-"))
end
local function cartesian_make(parts, n)
local result = parts[1][1]
local k = 1
for i = 2, #parts do
if bit32.band(n, k) > 0 then
result = result .. parts[i - 1][3] .. parts[i][1]
else
result = result .. parts[i - 1][2] .. parts[i][1]
end
k = bit32.lshift(k, 1)
end
return result
end
local function cartesian_combine(parts)
local n = bit32.lshift(1, #parts - 1)
local results = {}
for i = 0, n - 1 do
table.insert(results, cartesian_make(parts, i))
end
return results
end
local potential_diphthongs = {
["aa"] = true, ["ee"] = true, ["ii"] = true, ["oo"] = true,
["uu"] = true, ["yy"] = true, ["ää"] = true, ["öö"] = true,
["ai"] = true, ["ei"] = true, ["oi"] = true,
["ui"] = true, ["yi"] = true, ["äi"] = true, ["öi"] = true,
["au"] = true, ["eu"] = true, ["iu"] = true, ["ou"] = true,
["äy"] = true, ["ey"] = true, ["iy"] = true, ["öy"] = true,
["ie"] = true, ["uo"] = true, ["yö"] = true,
}
local function is_potential_diphthong(d)
return potential_diphthongs[d]
end
local function split_by_optional_break(word, only_breaking_diphthongs)
local parts = {}
local i = 1
local found = false
while true do
local j, je = mw.ustring.find(word, "%([.-]%)", i)
if j == nil then break end
local allow_break = true
if only_breaking_diphthongs then
local prefix = mw.ustring.sub(word, i, j - 1)
local diphthong = mw.ustring.sub(word, j - 1, j - 1) .. mw.ustring.sub(word, j + 3, j + 3)
-- if a dot, only when breaks a diphthong
allow_break = mw.ustring.sub(word, j + 1, j + 1) ~= "." or (
-- never a diphthong if two vowels precede
not mw.ustring.match(prefix, "[aeiouyäö][aeiouyäö]$")
and is_potential_diphthong(diphthong))
end
if allow_break then
found = true
table.insert(parts, { mw.ustring.sub(word, i, j - 1), "", mw.ustring.sub(word, j + 1, j + 1) })
end
i = je + 1
end
if not found then return { word } end
table.insert(parts, { mw.ustring.sub(word, i), "", "" })
return cartesian_combine(parts)
end
export.p=split_by_optional_break
local function get_autohyphenate_forms(word, title)
word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "")
word = mw.ustring.gsub(word, "(.)ː", "%1%1")
word = mw.ustring.gsub(word, "[" .. ipa_symb .. "ˣ*]", "")
word = mw.ustring.gsub(word, "[/+]", "-")
word = mw.ustring.gsub(word, "^-", "")
word = mw.ustring.gsub(word, "ŋn", "gn")
if mw.ustring.lower(title) == title then
word = mw.ustring.lower(word)
else
-- find letters in title
local letters = {}
for letter in mw.ustring.gmatch(title, "%a") do
table.insert(letters, letter)
end
local respelled = ""
local letter_index = 1
for character in mw.ustring.gmatch(word, ".") do
if mw.ustring.match(character, "%a") then
local next_letter = letters[letter_index]
if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then
respelled = respelled .. next_letter
letter_index = letter_index + 1
else
respelled = respelled .. character
end
else
respelled = respelled .. character
end
end
word = respelled
end
return split_by_optional_break(word)
end
-- applies gemination mid-word for rhymes
local function apply_gemination(word)
word = mw.ustring.gsub(word, "[*ˣ](" .. vowel .. ")", "ʔ%1")
word = mw.ustring.gsub(word, "[*ˣ](" .. consonant .. ")", "%1ː")
return word
end
local function get_autorhyme_forms(word)
word = mw.ustring.lower(word)
word = mw.ustring.gsub(word, "%([*ˣ:ː]%)", "")
word = apply_gemination(word)
word = mw.ustring.gsub(word, "(.)ː", "%1%1")
word = mw.ustring.gsub(word, "[" .. ipa_symb .. "]", "")
word = mw.ustring.gsub(word, "[/+]", "-")
return split_by_optional_break(word)
end
function export.generate_rhyme(word)
-- convert syllable weight to hyphen for next routine
-- (just in case these are included manually... even if they shouldn't be)
local fmtword = mw.ustring.gsub(word, "[ˈˌ"..tertiary.."]", "-")
fmtword = mw.ustring.gsub(word, "'", ".")
local sylcount = #m_hyph.generate_hyphenation(fmtword, ".")
-- get final part of a compound word
local last_hyph = mw.ustring.find(fmtword, "%-[^%-]*$") or 0
local last_part = mw.ustring.sub(fmtword, last_hyph + 1)
-- split to syllables, keep . in case we have a syllable break
local hyph = m_hyph.generate_hyphenation(last_part, ".")
local last_index = #hyph
local last_stressed = 1
local prev_stress = false
-- find last stressed syllable
for index, syllable in ipairs(hyph) do
local stressed = false
if index == 1 then
stressed = true
elseif not prev_stress and index < last_index then
-- shift stress if current syllable light and a heavy syllable occurs later
stressed = index == last_index - 1 or not m_fi_IPA.is_light_syllable(syllable) or not m_fi_IPA.has_later_heavy_syllable(hyph, index + 1)
end
if stressed then
last_stressed = index
end
prev_stress = stressed
end
local res = {}
for i = last_stressed, #hyph, 1 do
table.insert(res, hyph[i])
end
res = table.concat(res)
-- remove initial consonants, convert to IPA, remove IPA symbols
res = mw.ustring.gsub(res, "^%.", "")
res = mw.ustring.gsub(res, "^" .. consonant .. "+", "")
res = m_fi_IPA.IPA_wordparts(res, false)
res = mw.ustring.gsub(res, "[" .. ipa_symb .. "]", "")
res = mw.ustring.gsub(res, "^%.", "")
return res, sylcount
end
local function add_trivowel_dots(pron)
-- find sequences of >= 3 vowels and record their indices
local toggles = {}
local scan = 1
while true do
local i0, i1 = mw.ustring.find(pron, "[aeiouyäö][aeiouyäö][aeiouyäö]+", scan)
if i0 == nil then break end
table.insert(toggles, i0 + 1)
table.insert(toggles, i1)
scan = i1 + 1
end
table.insert(toggles, mw.ustring.len(pron) + 1)
-- generate hyphenation, and add dots within multivowel sequences
local dots = {}
local hyph = m_hyph.generate_hyphenation(pron, true)
local index = 0
local recons = ""
scan = 1
for _, hpart in ipairs(hyph) do
index = index + mw.ustring.len(hpart)
while index >= toggles[scan] do
scan = scan + 1
end
recons = recons .. hpart
if scan % 2 == 0 then
recons = recons .. "."
end
end
return recons
end
local function pron_equal(title, pron)
if not pron or pron == "" then
return true
end
-- handle slashes and pluses as hyphens
pron = mw.ustring.gsub(pron, "[/+]", "-")
-- remove optional lengthening/shortening/syllable break/gemination, should not cause any issues
pron = mw.ustring.gsub(pron, "%([*ˣ.:ː-]%)", "")
-- remove gemination asterisks and syllable separating dots
pron = mw.ustring.gsub(pron, "*", "")
pron = mw.ustring.gsub(pron, "%.", "")
-- map existing glottal stops to apostrophes
pron = mw.ustring.gsub(pron, "%(?ʔ%)?", apostrophe)
-- /ŋn/ for /gn/ is fine
pron = mw.ustring.gsub(pron, "ŋn", "gn")
-- remove hyphens but also apostrophes right after hyphens
-- (so that glottal stop is allowed after hyphen separating two same vowels)
pron = mw.ustring.gsub(pron, "-" .. apostrophe .. "?", "")
title = mw.ustring.gsub(cleanup_title(title), "-", "")
return pron == mw.ustring.lower(title)
end
local function pron_equal_special_cases(title)
-- very common exception - support it
return mw.ustring.gsub(title, "ruoan", "ruuan")
end
function export.show(frame)
local title = mw.title.getCurrentTitle().text
local pronunciation = { "" }
local ipa = { nil }
local rhymes = { nil }
local hyphenation = { nil }
local audio = { }
local qualifiers = { }
local hyphlabels = { }
local rhymlabels = { }
local homophones = { }
local homophonelabels = { }
local nohyphen = false
local norhymes = false
local csuffix = false
local categories = { }
if type(frame) == "table" then
local params = {
[1] = { list = true, default = "", allow_holes = true },
["ipa"] = { list = true, default = nil, allow_holes = true },
["h"] = { list = true, default = nil, allow_holes = true }, ["hyphen"] = {},
["r"] = { list = true, default = nil, allow_holes = true }, ["rhymes"] = {},
["a"] = { list = true, default = nil }, ["audio"] = {},
["ac"] = { list = true, default = nil }, ["caption"] = {},
["hh"] = { default = "" }, ["homophones"] = {},
["q"] = { list = true, default = nil, allow_holes = true },
["hp"] = { list = true, default = nil, allow_holes = true },
["rp"] = { list = true, default = nil, allow_holes = true },
["hhp"] = { list = true, default = nil, allow_holes = true },
["nohyphen"] = { type = "boolean", default = false },
["norhymes"] = { type = "boolean", default = false },
["csuffix"] = { type = "boolean", default = false },
["title"] = {}, -- for debugging or demonstration only
}
local args, further = require("Module:parameters").process(frame:getParent().args, params, true)
title = args["title"] or title
pronunciation = args[1]
ipa = args["ipa"]
hyphenation = args["h"]
rhymes = args["r"]
qualifiers = args["q"]
hyphlabels = args["hp"]
rhymlabels = args["rp"]
nohyphen = args["nohyphen"]
norhymes = args["norhymes"]
csuffix = args["csuffix"]
homophones = mw.text.split(args["hh"], ",")
homophonelabels = args["hhp"]
-- hacks
if args[2] and args[1] == nil then args[1] = "" end
if ipa[2] and ipa[1] == nil then ipa[1] = "" end
if #homophones == 1 and homophones[1] == "" then homophones = {} end
if args["hyphen"] then hyphenation[1] = args["hyphen"] end
if args["rhymes"] then rhymes[1] = args["rhymes"] end
if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
local audios = args["a"]
local captions = args["ac"]
if args["audio"] then audios[1] = args["audio"] end
if args["captions"] then captions[1] = args["caption"] end
for i, audiofile in ipairs(audios) do
if audiofile then
table.insert(audio, {lang = lang, file = audiofile, caption = captions[i]})
end
end
end
for i, p in ipairs(pronunciation) do
if p == "" or p == "^" then
pronunciation[i] = cleanup_title(title)
elseif p == "*" or p == "(*)" then
pronunciation[i] = cleanup_title(title) .. p
elseif mw.ustring.find(p, "[!#]") then
p = mw.ustring.gsub(p, "t!s", "ts")
p = mw.ustring.gsub(p, "t#s", "ts")
pronunciation[i] = p
end
end
-- make sure #pronunciation >= #IPA
for i, p in ipairs(ipa) do
if not pronunciation[i] then
pronunciation[i] = ""
end
end
local manual_hr = false
local ripa = {}
local model_pronunciation = pronunciation[1]
local autohyph = false
local autorhyme = false
-- preprocessing
local i = 1
local ruis = false
while i <= #pronunciation do
if mw.ustring.find(pronunciation[i], "%", 1, true) then
local original = pronunciation[i]
local short = mw.ustring.gsub(original, "%%", "")
local long = mw.ustring.gsub(original, "(.)%%", "%1%1")
pronunciation[i] = short
if model_pronunciation == original then
model_pronunciation = long
end
i = i + 1
table.insert(pronunciation, i, long)
end
if mw.ustring.find(pronunciation[i], "[aeiouyäö]%(.%)[aeiouyäö]", 1) then
ruis = true
end
i = i + 1
end
if ruis then
local new_pronunciation = {}
for _, p in ipairs(pronunciation) do
local split_i = split_by_optional_break(p, true)
for _, np in ipairs(split_i) do
table.insert(new_pronunciation, np)
end
end
pronunciation = new_pronunciation
end
local has_spaces = mw.ustring.match(title, " ") or (pronunciation[1] and mw.ustring.match(pronunciation[1], " "))
local is_suffix = mw.ustring.match(title, "^-")
local is_prefix_or_suffix = not csuffix and (mw.ustring.match(title, "-$") or is_suffix)
for i, p in ipairs(pronunciation) do
local qual = qualifiers[i] or ""
if #qual > 0 then
qual = " " .. require("Module:qualifier").format_qualifier(qualifiers[i])
end
if ipa[i] and ipa[i] ~= "" then
table.insert(ripa, "* " .. m_IPA.format_IPA_full {
lang = lang,
items = {{pron = ipa[i]}},
no_count = has_spaces,
} .. qual)
manual_hr = true
else
if mw.ustring.find(p, ":") then p = mw.ustring.gsub(p, ":", "ː") end
if mw.ustring.find(p, "%+") then p = mw.ustring.gsub(p, "%+", "-") end
-- some fixes
if mw.ustring.find(p, "[aeouyäö]ii") then
p = mw.ustring.gsub(p, "([aeouyäö])(ii)", "%1.%2")
end
-- add clarifying dots
if mw.ustring.find(p, "[aeiouyäö][aeiouyäö][aeiouyäö]") then
p = add_trivowel_dots(p)
end
local IPA_narrow = m_fi_IPA.IPA_wordparts(p, true)
local IPA = m_fi_IPA.IPA_wordparts(p, false)
-- multi-word stress
if has_spaces then
IPA_narrow = mw.ustring.gsub(IPA_narrow, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
IPA = mw.ustring.gsub(IPA, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
end
-- remove initial stress if suffix
if is_suffix then
if csuffix then
IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1ˌ")
IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1ˌ")
else
IPA_narrow = mw.ustring.gsub(IPA_narrow, "^(%-?)ˈ", "%1")
IPA = mw.ustring.gsub(IPA, "^(%-?)ˈ", "%1")
end
end
table.insert(ripa, "* " .. m_IPA.format_IPA_full {
lang = lang,
items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},
no_count = has_spaces,
} .. qual)
end
end
local results = mw.clone(ripa)
manual_hr = manual_hr or has_spaces or is_prefix_or_suffix or not (pron_equal(title, mw.ustring.lower(model_pronunciation)) or pron_equal(pron_equal_special_cases(title), mw.ustring.lower(model_pronunciation)))
if not hyphenation[1] and not manual_hr then
autohyph = true
local forms = get_autohyphenate_forms(model_pronunciation, title)
local seenhyphs = {}
local i = 1
for _, form in ipairs(forms) do
if hyphenation[i] then break end
local genhyph = m_hyph.generate_hyphenation(form, false)
local genhyphj = table.concat(genhyph, "\n")
if not seenhyphs[genhyphj] then
hyphenation[i] = genhyph
seenhyphs[genhyphj] = true
i = i + 1
end
end
elseif #hyphenation == 1 and hyphenation[1] == "-" then
hyphenation = {}
end
if not rhymes[1] and not manual_hr then
autorhyme = true
local forms = get_autorhyme_forms(model_pronunciation)
for i, form in ipairs(forms) do
if rhymes[i] then break end
rhymes[i] = { export.generate_rhyme(form) }
end
elseif #rhymes == 1 and rhymes[1] == "-" then
rhymes = {}
end
if not has_spaces and not is_prefix_or_suffix and not (hyphenation[1] and rhymes[1]) then
table.insert(categories, "fi-pronunciation missing hyphenation or rhymes")
end
for i, h in ipairs(hyphenation) do
if type(h) == "string" then
hyphenation[i] = mw.text.split(h, '[' .. m_hyph.sep_symbols .. ']')
end
end
for i, a in ipairs(audio) do
table.insert(results, "* " .. require("Module:audio").format_audio(a))
end
if not norhymes then
if #rhymes > 0 then
-- merge rhymes if they have identical labels
local last_label = false
local new_rhymes = {}
local new_labels = {}
local current_list = {}
for i, r in ipairs(rhymes) do
local label = rhymlabels[i]
if last_label == label then
table.insert(current_list, r)
else
if #current_list > 0 then
table.insert(new_rhymes, current_list)
end
if last_label ~= false then
table.insert(new_labels, last_label)
end
current_list = { r }
last_label = label
end
end
table.insert(new_rhymes, current_list)
table.insert(new_labels, last_label)
rhymes = new_rhymes
rhymlabels = new_labels
end
for i, r in ipairs(rhymes) do
local label = ""
if rhymlabels[i] then
label = " " .. require("Module:qualifier").format_qualifier(rhymlabels[i])
end
if #r >= 1 then
local sylcounts = nil
local rhymeobjs = {}
local rhymesseen = {}
local explicitsylcounts = true
for _, rhyme in ipairs(r) do
if type(rhyme) == "table" then
local rhymeis, sylcount = unpack(rhyme)
local rhymeobj = rhymesseen[rhymeis]
if not rhymeobj then
local newrhyme = { rhyme = rhymeis, num_syl = {sylcount} }
table.insert(rhymeobjs, newrhyme)
rhymesseen[rhymeis] = { [sylcount] = true, object = newrhyme }
elseif not rhymeobj[sylcount] then
table.insert(rhymeobj.object.num_syl, sylcount)
rhymeobj[sylcount] = true
end
else
explicitsylcounts = false
if not rhymesseen[rhyme] then
local newrhyme = { rhyme = rhyme }
table.insert(rhymeobjs, newrhyme)
rhymesseen[rhyme] = { object = newrhyme }
end
end
end
if not explicitsylcounts then
sylcounts = {}
local sylkeys = {}
-- get all possible syllable counts from syllabifications
for i, h in ipairs(hyphenation) do
local hl = #h
if hl > 0 and not sylkeys[hl] then
table.insert(sylcounts, hl)
sylkeys[hl] = true
end
end
end
table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label)
end
end
end
if #homophones > 0 then
local homophonedata = {}
for i, h in ipairs(homophones) do
table.insert(homophonedata, { ["term"] = h, ["qualifiers"] = homophonelabels[i] and { homophonelabels[i] } or nil })
end
table.insert(results, "* " .. require("Module:homophones").format_homophones(
{ lang = lang, homophones = homophonedata }))
end
if not nohyphen and #hyphenation > 0 then
local hyphs = {}
for i, h in ipairs(hyphenation) do
table.insert(hyphs, { ["hyph"] = h, ["qualifiers"] = hyphlabels[i] and { hyphlabels[i] } or nil })
end
table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
{ lang = lang, hyphs = hyphs, caption = "Syllabification<sup>([[Appendix:Finnish hyphenation|key]])</sup>" }))
end
return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end
return export