Module:fi-pronunciation
Appearance
- The following documentation is located at Module:fi-pronunciation/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Implements Template:fi-pronunciation and Template:fi-IPA.
local export = {}
-- used by cartesian_make and cartesian_combine. loaded as necessary
local m_bit32
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local lower = m_str_utils.lower
local match = m_str_utils.match
local sub = m_str_utils.sub
local U = m_str_utils.char
local gsub_lookahead = require("Module:gsub lookahead")
local langcode = "fi"
-- spelled vowels
local vowels = "aeiouyåäö"
-- any of 'vowels' as a pattern
local vowel = "[" .. vowels .. "]"
-- spelled consonants
local consonants = "bcdfghjklmnpqrstvwxzšžʔ"
-- any of 'consonants' as a pattern
local consonant = "[" .. consonants .. "]"
-- straight apostrophe
local apostrophe = "'"
-- "tertiary stress", a weaker secondary stress (either rhythmic or in some compound words). is there a better way to represent this?
local tertiary = "ˌ"
-- include ˣ because final gemination does not affect rhymes
local ipa_symb = "ˣˈˌ"..tertiary.."̯̝̞̠̪"
-- orthographic symbols that signify separation of syllables
local sep_symbols = "'’./ +\"-"
-- these signify that the next syllable is an "initial" syllable in a word
-- all symbols from here should also be in sep_symbols
local stress_indicators = "ˈˌ" .. tertiary .. "/ +-"
local stress_indicator = "[" .. stress_indicators .. "]"
-- <<SYLLABIFICATION START>>
-- syllabification must also consider * a consonant
local syl_consonants = consonants .. "*"
local syl_consonant = "[" .. syl_consonants .. "]"
-- diphthongs and long vowels
-- 1 = long vowel, 2 = Vi, 3 = VU, 4 = ie/UO
-- 1 and 2 are long vowels/diphthongs everywhere
-- 3 are diphthongs in wordlet-initial syllables or open syllables
-- 4 are diphthongs in wordlet-initial syllables only
local vowel_sequences = {
["aa"] = 1, ["ee"] = 1, ["ii"] = 1, ["oo"] = 1,
["uu"] = 1, ["yy"] = 1, ["ää"] = 1, ["öö"] = 1,
["ai"] = 2, ["ei"] = 2, ["oi"] = 2,
["ui"] = 2, ["yi"] = 2, ["äi"] = 2, ["öi"] = 2,
["au"] = 3, ["eu"] = 3, ["iu"] = 3, ["ou"] = 3,
["äy"] = 3, ["ey"] = 3, ["iy"] = 3, ["öy"] = 3,
["ie"] = 4, ["uo"] = 4, ["yö"] = 4
}
-- sonority by consonant (higher number is more sonorous)
local sonorities = {
["j"] = -1, ["l"] = -1, ["r"] = -1, ["v"] = -1, ["w"] = -1,
["m"] = -2, ["n"] = -2, ["ŋ"] = -2,
["h"] = -3, ["z"] = -3, ["ž"] = -3,
["s"] = -4, ["š"] = -4, ["f"] = -4,
["b"] = -5, ["d"] = -5, ["g"] = -5,
["k"] = -6, ["p"] = -6, ["t"] = -6,
}
-- 'native' consonant clusters
local native_consonants = {
["lkk"] = true, ["nkk"] = true, ["rkk"] = true,
["nks"] = true,
["lpp"] = true, ["mpp"] = true, ["rpp"] = true,
["lsk"] = true, ["nsk"] = true, ["rsk"] = true,
["lss"] = true, ["nss"] = true, ["rss"] = true,
["lst"] = true, ["nst"] = true, ["rst"] = true,
["lts"] = true, ["rts"] = true,
["ltt"] = true, ["ntt"] = true, ["rtt"] = true,
}
local function split_by_sonority(cluster)
local i = len(cluster)
-- always split two-consonant clusters evenly
if i <= 2 then
return i - 1
end
-- always split 'native' three-consonant clusters to AB.C
if i == 3 and native_consonants[cluster] then
return 2
end
local ng = find(cluster, "ng")
-- never split ng
if ng then
return ng + 1 + split_by_sonority(sub(cluster, ng + 2))
end
local max_sonority = 0
while i > 1 do
local sonority = sonorities[sub(cluster, i, i)]
if sonority == nil then return len(cluster) - 1 end
if sonority >= max_sonority then break end
max_sonority = sonority
i = i - 1
end
return i
end
-- used for syllabification overrides (any text between these is not syllabified)
local syl_override_start = U(0xEFF0)
local syl_override_end = U(0xEFF1)
local syl_is_consonant = {}
local syl_is_vowel = {}
local syl_is_sep = {}
local syl_is_stressed = {}
for c in gmatch(syl_consonants, ".") do syl_is_consonant[c] = true end
for c in gmatch(vowels, ".") do syl_is_vowel[c] = true end
for c in gmatch(sep_symbols, ".") do syl_is_sep[c] = true end
for c in gmatch(stress_indicators, ".") do syl_is_stressed[c] = true end
-- mode may be one of the following:
-- default: naive hyphenation using Finnish hyphenation rules
-- "dots": use default rules, but add dots between syllable boundaries if allowed by keep_sep_symbols
-- "sonority": split consonant clusters according to the sonority sequencing principle
-- add dots for new splits (i.e. not those incurred by symbols) followed by multiple consonants if keep_sep_symbols allows it.
local function syllabify(word, keep_sep_symbols, mode)
local result = {} -- output syllables
local syllable_prefix = nil -- separator symbol if kept
local syllable_start = 1
local has_syllable = false
local cursor = 1
local found_vowel = false
local initial_syllable = true -- note: initial in wordlet, not word
local lower_word = lower(word)
local function keep_sep(k)
return (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and find(keep_sep_symbols, k, 1, true)))
end
local dots = ((mode == "dots") and keep_sep(".")) and "." or nil
local sonority = mode == "sonority"
local function add_syllable()
-- add a syllable if not empty
if has_syllable then
local syllable_text = sub(word, syllable_start, cursor - 1)
if syllable_prefix then
syllable_text = syllable_prefix .. syllable_text
end
table.insert(result, syllable_text)
-- reset state
syllable_prefix = dots
initial_syllable = false
syllable_start = cursor
has_syllable = false
found_vowel = false
end
end
local len_word = len(word)
-- prebuild patterns
local pattern_cc = "^" .. syl_consonant .. "+"
local pattern_vnc = "^" .. vowel .. "+[^" .. syl_consonants .. "]"
local pattern_vcv = "^" .. vowel .. "+" .. syl_consonant .. vowel
local pattern_ccv = pattern_cc .. vowel
local pattern_sep = "^%(?([" .. sep_symbols .. "])%)?"
local pattern_stressed = "^%(?[" .. stress_indicators .. "]%)?"
while cursor <= len_word do
local symbol = sub(lower_word, cursor, cursor)
if syl_is_consonant[symbol] then
-- C...
local symbol2 = sub(lower_word, cursor + 1, cursor + 1)
if syl_is_vowel[symbol2] then
-- CV: end current syllable if we have found a vowel
if found_vowel then
add_syllable()
found_vowel = false
end
cursor = cursor + 1
elseif (sonority and found_vowel and syllable_start < cursor
and syl_is_consonant[symbol2] and match(lower_word, pattern_ccv, cursor)) then
-- (V)C+V: split by sonority
local cluster = match(lower_word, pattern_cc, cursor)
local split_point = split_by_sonority(cluster)
if split_point ~= nil then
-- split at the specified position.
cursor = cursor + split_point
add_syllable()
local len_cluster = len(cluster)
-- add a dot if dots are allowed, and if the split point is not before the final consonant
local add_dot = keep_sep(".") and split_point ~= len_cluster - 1
if add_dot then syllable_prefix = "." end
cursor = cursor + len_cluster - split_point
found_vowel = false
end
else
-- C: continue
cursor = cursor + 1
end
has_syllable = true
elseif syl_is_vowel[symbol] then
if found_vowel then
-- already found a vowel, end current syllable
add_syllable()
end
local vowel_length = 1
found_vowel = true
local vv = sub(lower_word, cursor, cursor + 1)
local rank = vowel_sequences[vv]
if rank then
-- check for diphthongs or long vowels
local max_rank
if initial_syllable then
max_rank = 4
else
-- check for an open syllable
-- the syllable is open if it is followed by a consonant, except if it is followed by a vowel
local f1 = sub(lower_word, cursor + 2, cursor + 2)
local f2 = sub(lower_word, cursor + 3, cursor + 3)
if syl_is_consonant[f1] and not syl_is_vowel[f2] then
-- closed syllable
max_rank = 2
else
-- open syllable
max_rank = 3
end
end
if rank <= max_rank then
vowel_length = 2
end
end
cursor = cursor + vowel_length
has_syllable = true
else
local sepchar
if symbol == "(" then
-- look for e.g. (.)
sepchar = match(lower_word, pattern_sep, cursor)
else
sepchar = syl_is_sep[symbol] and symbol
end
if sepchar then
-- separates syllables
add_syllable()
syllable_prefix = nil
initial_syllable = syl_is_stressed[sepchar]
cursor = cursor + (symbol == "(" and 3 or 1)
if not keep_sep(sepchar) then
syllable_start = cursor
end
found_vowel = false
elseif symbol == syl_override_start then
-- override. separate syllables
if has_syllable then
add_syllable()
elseif syllable_start < cursor then
-- has retained separator character, add to prefix
syllable_prefix = (syllable_prefix or "") .. sub(lower_word, syllable_start, cursor - 1)
end
syllable_start = cursor + (keep_sep(syl_override_start) and 0 or 1)
local override_end = find(lower_word, syl_override_end, syllable_start)
if not override_end then error("syl_override_start must end in syl_override_end") end
has_syllable = true
if keep_sep(syl_override_end) then
cursor = override_end + 1
add_syllable()
else
cursor = override_end
add_syllable()
cursor = cursor + 1
end
syllable_start = cursor
else
-- ?: continue
cursor = cursor + 1
has_syllable = true
end
end
end
add_syllable()
return result
end
export.syllabify = syllabify
-- <<SYLLABIFICATION END>>
-- <<IPA START>>
-- IPA data
-- Finnish vowels in IPA
local ipa_vowels = "ɑeiouyæø"
local ipa_vowel = "[" .. ipa_vowels .. "]"
-- Finnish consonants in IPA
local ipa_consonants = "kptɡgbdfʔsnmŋlrhʋʃʒrjçɦx"
local ipa_consonant = "[" .. ipa_consonants .. "]"
-- IPA diacritics used in transcriptions of Finnish
local ipa_diacritics = "̝̞̠̪"
local ipa_diacritic = "[" .. ipa_diacritics .. "]"
local nonsyllabic = U(0x32F) -- inverted breve below
local unreleased = U(0x31A)
local nasalized = U(0x303)
local long = "ː"
-- IPA: either a spelled consonant/vowel or an IPA consonant/vowel
local spelled_consonant = "[" .. ipa_consonants .. "cšvwxzž]"
local spelled_vowel = "[" .. ipa_vowels .. "aäö]"
local plosives = "kptbdɡ"
local wordlet_sep = "[/-]"
local stress_p = "[ˈˌ" .. tertiary .. "]"
local stress_s = "[ˌ" .. tertiary .. "]"
local stress_pd = "[ˈˌ" .. tertiary .. "%.]"
local stress_sd = "[ˌ" .. tertiary .. "%.]"
local letter_sequence_phonemes = {
-- ALL FIRST PARTS MUST START WITH ^. longest sequence must come first.
{ "^ng", "ŋː" },
{ "^qu", "kʋ" },
{ "^zz", "ts" },
}
local letters_phonemes = {
-- symbols outside this list are not changed
["a"] = "ɑ",
["ä"] = "æ",
["ö"] = "ø",
["å"] = "o",
["c"] = "k",
["g"] = "ɡ",
["q"] = "k",
["v"] = "ʋ",
["š"] = "ʃ",
["ž"] = "ʒ",
["x"] = "ks",
["*"] = "ˣ",
["'"] = ".",
}
local replacements_narrow = {
["ɑ"] = "ɑ̝",
["e"] = "e̞",
["ø"] = "ø̞",
["o"] = "o̞",
["t"] = "t̪",
["s"] = "s̠"
}
-- This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc.
for letter in gmatch("eiouydhfjklmnprstu", ".") do
letters_phonemes[letter] = letter
end
local function apply_post_fixes(p)
-- initial <gn> is /gn/
p = gsub(p, "ˈŋn", "ˈɡn")
-- ŋ is short before consonant (by default)
p = gsub(p, "ŋ"..long.."("..ipa_consonant..")", "ŋ%1")
-- dissimilation of vowels by sandhi
p = gsub(p, "("..ipa_vowel..ipa_diacritic.."*)(["..long..nonsyllabic.."]?)("..stress_s..")%1", "%1%2%3(ʔ)%1")
-- C1(.):C2 -> C1(.)(:)C2
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)"..long.."("..ipa_consonant..ipa_diacritic.."*)", "%1("..long..")%2")
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)%.%1("..ipa_consonant..ipa_diacritic.."*)", "%1.(%1)%2")
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)%(%.?%)%1("..ipa_consonant..ipa_diacritic.."*)", "%1(.%1)%2")
return p
end
local function apply_post_fixes_narrow(p)
-- C1:+C2 -> C1+(C1)C2
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)"..long..tertiary.."("..ipa_consonant..ipa_diacritic.."*)", "%1"..tertiary.."(%1)%2")
-- C1+C1C2 -> C1+(C1)C2
-- but C1C2+C2C3 --> C1(C2)+C2C3
p = gsub(p, "("..ipa_consonant..ipa_diacritic..")("..ipa_consonant..ipa_diacritic.."*)"..tertiary.."(%.?)%2("..ipa_consonant..ipa_diacritic.."*)", "%1(%2)"..tertiary.."%2%3%4")
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)"..tertiary.."(%.?)%1("..ipa_consonant..ipa_diacritic.."*)", "%1"..tertiary.."(%1)%2%3")
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)"..tertiary.."%(?%.?%)%1("..ipa_consonant..ipa_diacritic.."*)", "%1"..tertiary.."(.%1)%2")
-- t is alveolar in /ts/ and /st/
p = gsub(p, "t̪("..stress_pd.."?%s*)s̠", "t%1s̠")
p = gsub(p, "s̠("..stress_pd.."?%s*)t̪", "s̠%1t")
-- n and l become dental in /lt/ /nt/ /tl/ /tn/
p = gsub(p, "([ln])("..stress_pd.."?%s*)t̪", "%1̪%2t̪")
p = gsub(p, "t̪("..stress_pd.."?%s*)([ln])", "t̪%1%2̪")
-- long j, v after i, u diphthong
p = gsub(p, "(i"..nonsyllabic..")j("..ipa_vowel..")", "%1j("..long..")%2")
-- /ʋ/ after /u/ usually realized as /w/ (see Suomi, Toivanen and Ylitalo 2008, p. )
p = gsub(p, "(u"..nonsyllabic..")ʋ("..ipa_vowel..")", "%1w("..long..")%2")
-- cleanup
p = gsub(p, "("..stress_s..")%.", "%1")
p = gsub(p, "("..stress_pd..")"..stress_s, "%1")
-- tautosyllabic nasals nasalize vowels between them (see Suomi, Toivanen and Ylitalo 2008, p. 22)
--p = gsub(p, "([mnŋ]"..long.."?)("..ipa_vowel..")("..ipa_diacritic.."*)([mnŋ])(.?)", function (n0, nv, nvd, n1, anchor)
-- -- this cannot be simplified to "(.?)" => "([^" .. ipa_vowels .. "]?)", otherwise a vowel after would match
-- if not find(anchor, ipa_vowel) then
-- return n0 .. nv .. nasalized .. nvd .. n1 .. anchor
-- end
--end)
-- sandhi: nm > mm, np > mp, nb > mb, nk > ŋk, ng > ŋg
p = gsub(p, "nm", "m"..long)
p = gsub(p, "n("..stress_pd.."?%s*)([ɡk])", "ŋ%1%2")
p = gsub(p, "n("..stress_pd.."?%s*)([mpb])", "m%1%2")
p = gsub(p, "[nm]("..stress_pd.."?%s*)([f])", "ɱ%1%2")
-- handle potentially long consonants over secondary stresses
p = gsub(p, "("..ipa_consonant..ipa_diacritic.."*)%("..long.."%)("..stress_s..")", "%2%1("..long..")")
p = gsub(p, "("..stress_s..")("..ipa_consonant..ipa_diacritic.."*)%("..long.."%)", "(%2)%1%2")
p = gsub(p, "(["..plosives.."]"..ipa_diacritic.."*)("..stress_s..")%1", "%1"..unreleased.."%2%1")
p = gsub(p, "%((["..plosives.."]"..ipa_diacritic.."*)%)("..stress_s..")%1", "(%1"..unreleased..")%2%1")
-- [k] allophone before front vowels (see Suomi, Toivanen and Ylitalo 2008, p. 27)
p = gsub(p, "k([eiyæø])", "k̟%1")
return p
end
-- -1 = degenerate
-- 0 = light
-- 1 = heavy with coda
-- 2 = heavy without coda (long vowel/diphthong)
-- 99 = stressed
local class_stressed_syllable = 99
local function classify_syllable(syllable)
if match(syllable, "^" .. stress_indicator) then
return class_stressed_syllable
end
local cleaned = gsub(syllable, "%(?%*?%)?$", "")
if match(cleaned, spelled_vowel .. spelled_vowel .. "$") then
return 2
elseif match(cleaned, spelled_consonant .. "$") then
return match(cleaned, spelled_vowel) and 1 or -1
elseif match(cleaned, spelled_vowel) then
return 0
end
return -1
end
local function has_hiatus(left, right)
return match(left, spelled_vowel .. "$") and match(right, "^%(?%.?%)?" .. spelled_vowel)
end
local trad_lookahead = 2 -- how many unstressed syllables at most in a single unit, thus max consecutive unstressed syllables
local function add_rhythmic_stress_ltr(hyph, classes)
-- traditional syllable stress assigning algorithm (e.g. in (V)ISK)
-- assign from left to right, but shift to later syllables if a light syllable is followed by a heavy syllable
local add_stress = {}
local n = #classes
local prev_stress
for index, class in ipairs(classes) do
local stressed = class == class_stressed_syllable
if not stressed and not prev_stress and index < n then
-- shift stress if current syllable light and a heavy syllable occurs later (except as the last syllable)
local has_later_heavy_syllable
local lookahead_end = math.min(index + trad_lookahead, n - 1)
for i = index + 1, lookahead_end do
if classes[i] > 0 then
has_later_heavy_syllable = true
break
end
end
stressed = index == n - 1 or (class > 0) or not has_later_heavy_syllable
end
-- check if next syllable already stressed, and do not stress this syllable if so
if stressed and index < n then
stressed = stressed and classes[index + 1] ~= class_stressed_syllable
end
if index > 1 and stressed then
add_stress[index] = true
end
prev_stress = stressed
end
return add_stress
end
local function add_rhythmic_stress_rtl(hyph, classes)
-- based on Karvonen (2005) "Word prosody in Finnish"
-- run algorithm for assigning stresses:
-- (1) analyze each segment started by an already stressed syllable
-- as a separate prosodic word
-- (2) assign from *right to left*
-- (3) final syllable is never stressed (not by this algorithm)
-- (4) stress may shift back by a single (unstressed) syllable
-- to a better candidate:
-- (C)VC is better than (C)V
-- (C)VV is better than (C)V and (C)VC
-- (5) if the final syllable in a prosodic word begins with a vowel
-- and the previous syllable ends in a vowel, skip it
local add_stress = {}
local foot = 0
local word_final = true
local index = #classes
while index > 0 do
local stressed = classes[index] == class_stressed_syllable
if stressed then
-- reset
foot = 0
word_final = true
elseif word_final and index > 1 and has_hiatus(hyph[index - 1], hyph[index]) then
-- hiatus break at the end of a prosodic word: do nothing
elseif foot == 0 then
foot = foot + 1
-- try to assign a stress.
-- is the previous syllable stressed?
elseif index > 1 and classes[index - 1] == class_stressed_syllable then
-- no two stresses in a row, so do nothing.
else
-- we will assign a stress somewhere.
local class = classes[index]
-- is this syllable preceded by an unstressed syllable
-- with more priority?
if index > 2 and classes[index - 2] ~= class_stressed_syllable and
classes[index - 1] > classes[index] then
-- shift back by one syllable
index = index - 1
end
-- assign stress, and continue.
add_stress[index] = true
foot = 0
end
index = index - 1
word_final = false
end
return add_stress
end
local function add_rhythmic_stress_collect(hyph, classes, add_stress)
-- add stress marks
local rhythmic_stress = tertiary
local collected = {}
for index = 1, #hyph do
if add_stress[index] and classes[index] < class_stressed_syllable then
local syllable = hyph[index]
if index > 1 and match(syllable, "^g") and match(hyph[index - 1], "[nŋ]$") then
-- make sure [nŋ].g > ŋ.ŋ
collected[index - 1] = gsub(collected[index - 1], "n$", "ŋ")
syllable = gsub(syllable, "^g", "ŋ")
end
collected[index] = rhythmic_stress .. gsub(syllable, "^%.", "")
else
collected[index] = hyph[index]
end
end
return table.concat(collected)
end
-- applied *before* IPA conversion
-- returns one result, or possibly two results
local function add_rhythmic_stress(word, assign_rtl)
-- keep_sep_symbols = true
local hyph = syllabify(word, true, "sonority")
local final_heavy = false
local collected
local add_rhythmic_stress_impl = assign_rtl and add_rhythmic_stress_rtl or add_rhythmic_stress_ltr
-- find stressed syllables and classify all syllables
local classes = {}
for index, syllable in ipairs(hyph) do
if index == 1 then
classes[index] = class_stressed_syllable
else
classes[index] = classify_syllable(syllable)
end
end
local add_stress
-- final heavy syllable may be stressed if preceded by an unstressed
-- light syllable (in words with at least 3 syllables)
final_heavy = #classes >= 3 and classes[#classes] > 0 and classes[#classes - 1] == 0
add_stress = add_rhythmic_stress_impl(hyph, classes)
local res1 = add_rhythmic_stress_collect(hyph, classes, add_stress)
local res2
if final_heavy then
-- make final syllable stressed
local actual_final = classes[#classes]
classes[#classes] = class_stressed_syllable
add_stress = add_rhythmic_stress_impl(hyph, classes)
-- so that add_rhythmic_stress_collect actually adds it
classes[#classes] = actual_final
add_stress[#classes] = true
res2 = add_rhythmic_stress_collect(hyph, classes, add_stress)
end
return res1, res2
end
local pattern_syllable_c = "^" .. consonant
local pattern_syllable_open = "^" .. consonant .. vowel
local function IPA_handle_diphthongs_replace_open_noninitial(diphthong, after1, after2)
if syl_is_consonant[after1] then
-- consonant after diphthong
-- must be followed by vowel so that it's part of the
-- following syllable, else it's in this syllable
-- and thus this syllabie is closed
if syl_is_vowel[after2] then
-- single consonant followed by vowel -> this syllable is open
return diphthong .. nonsyllabic, after1 .. after2
else
-- single consonant not followed by vowel -> this syllable is closed
return diphthong, after1 .. after2
end
end
if after1 == nonsyllabic then
-- already marked as nonsyllabic
return diphthong, after1 .. after2
end
-- no consonant after diphthong -> this syllable is open
return diphthong .. nonsyllabic, after1 .. after2
end
local pattern_word_initial = "^[^" .. stress_indicators .. vowels .. "]*"
local pattern_after_sep = stress_indicator .. "[^" .. vowels .. "]*"
local function IPA_handle_diphthongs_word_initial(IPA, diphthong)
IPA = gsub(IPA, pattern_word_initial .. diphthong, "%0" .. nonsyllabic)
IPA = gsub(IPA, pattern_after_sep .. diphthong, "%0" .. nonsyllabic)
return IPA
end
local function IPA_handle_diphthongs_word_initial_or_open(IPA, diphthong)
IPA = gsub(IPA, pattern_word_initial .. diphthong, "%0" .. nonsyllabic)
IPA = gsub(IPA, pattern_after_sep .. diphthong, "%0" .. nonsyllabic)
IPA = gsub_lookahead(IPA, "(" .. diphthong .. ")(.?)(.?)", IPA_handle_diphthongs_replace_open_noninitial)
return IPA
end
-- Add nonsyllabic diacritic after last vowel of diphthong.
local function IPA_handle_diphthongs(IPA)
-- /Vi/ everywhere
IPA = gsub(IPA, "[aeouyäö]i", "%0" .. nonsyllabic)
-- /ie/, /uo/, /yö/ root-initially
IPA = IPA_handle_diphthongs_word_initial(IPA, "ie")
IPA = IPA_handle_diphthongs_word_initial(IPA, "uo")
IPA = IPA_handle_diphthongs_word_initial(IPA, "yö")
-- /VU/ root-initially or in open syllables
IPA = IPA_handle_diphthongs_word_initial_or_open(IPA, "[aoei]u")
IPA = IPA_handle_diphthongs_word_initial_or_open(IPA, "[äöei]y")
return IPA
end
local function fi_IPA_wordlet(term, is_narrow)
local cursor = 1
local len_term = len(term)
local phonemes = {}
while cursor <= len_term do
local target
-- try letter_sequence_phonemes first
for _, replacement in ipairs(letter_sequence_phonemes) do
if match(term, replacement[1], cursor) then
target = replacement[2]
cursor = cursor + len(replacement[1]) - 1 -- account for ^
break
end
end
-- then letters_phonemes
if not target then
local letter = sub(term, cursor, cursor)
target = letters_phonemes[letter] or letter
cursor = cursor + 1
end
table.insert(phonemes, target or letter)
end
local result = table.concat(phonemes)
result = gsub(result, "nk", "ŋk")
result = gsub(result, "ŋ[ŋ" .. long .. "]$", "ŋ")
if is_narrow then
-- articulation of h (Suomi, Toivanen & Ylitalo 2008, p. 28)
result = gsub_lookahead(result, "(.?)h(.?)",
function (before, after)
local h
if after ~= "" and after ~= "h" then
if before ~= "" and (ipa_vowels:find(before) or before == nonsyllabic) then
if ipa_consonants:find(after) then
-- vihma, yhtiö
if before == "i" or before == "y" then
h = "ç"
-- mahti, kohme, tuhka
elseif before == "ɑ" or before == "o" or before == "u" then
h = "x"
end
-- maha
elseif ipa_vowels:find(after) then
h = "ɦ"
end
end
end
if h then
return before .. h, after
end
end)
-- double letter replacement must be handled earlier here
result = gsub(result, "(%a)%1", "%1" .. long)
for letter, phoneme in pairs(replacements_narrow) do
result = gsub(result, letter, phoneme)
end
end
return result
end
local function fi_IPA_termwordlets(term, is_narrow, hyphenstress)
local wordlet_start = 1
local wordlet_IPAs = {}
local split_pattern = "(" .. wordlet_sep .. ")"
while true do
local wordlet_sep_at, _, wordlet_sep = find(term, split_pattern, wordlet_start)
local wordlet_end = wordlet_sep_at and (wordlet_sep_at - 1)
local part = sub(term, wordlet_start, wordlet_end)
local stress
if wordlet_start == 1 then
stress = "ˈ" -- primary
elseif wordlet_sep == "/" then
stress = "ˌ" -- secondary
else
stress = hyphenstress
end
table.insert(wordlet_IPAs, stress .. fi_IPA_wordlet(part, is_narrow))
if wordlet_sep_at == nil then
break
else
wordlet_start = wordlet_sep_at + 1
end
end
return wordlet_IPAs
end
-- Generates the broad and optionally also the narrow IPA transcription.
-- If make_narrow is true, the narrow transcription is provided;
-- otherwise it is invalid.
local function fi_IPA_combined_term(term, make_narrow)
local hyphenstress = "ˌ" -- secondary by default
local is_prefix, is_suffix
if find(term, "%/") then
hyphenstress = tertiary -- tertiary if we have slashes
end
term = lower(term)
term, is_suffix = gsub(term, "^%-+", "")
term, is_prefix = gsub(term, "%-+$", "")
term = IPA_handle_diphthongs(term)
-- Run IPA for wordlets.
local IPA = table.concat(fi_IPA_termwordlets(term, false, hyphenstress))
-- Replace double letters (vowels or consonants) with single letter plus length sign,
-- and apply post fixes.
IPA = apply_post_fixes(gsub(IPA, "(%a)%1", "%1" .. long))
local IPA_narrow
if make_narrow then
local term_narrow = add_rhythmic_stress(term)
IPA_narrow = table.concat(fi_IPA_termwordlets(term_narrow, true, hyphenstress))
-- Handle * in narrow transcription.
if find(IPA_narrow, "ˣ", nil, true) then
IPA_narrow = gsub(IPA_narrow, "ˣ(%)?%s*"..stress_p.."?)((.?)" .. ipa_diacritic .. "*)",
function (post, after, potential_consonant)
if potential_consonant == "" then
if find(post, "^%)") then
return "ʔ" .. post .. after
else
return post .. "(ʔ)" .. after
end
elseif ipa_consonants:find(potential_consonant) then
if #post > 0 then
local amark = ""
if plosives:find(sub(after, 1, 1)) then
amark = unreleased
end
return after .. amark .. post .. after
else
return post .. after .. long
end
else
return post .. "(ʔ)" .. after
end
end)
IPA_narrow = gsub(IPA_narrow, "%(%)", "")
end
-- Apply post fixes.
IPA_narrow = apply_post_fixes_narrow(apply_post_fixes(IPA_narrow))
if is_prefix > 0 then
IPA_narrow = IPA_narrow .. "-"
end
if is_suffix > 0 then
IPA_narrow = "-" .. IPA_narrow
end
end
if is_prefix > 0 then
IPA = IPA .. "-"
end
if is_suffix > 0 then
IPA = "-" .. IPA
end
return IPA, IPA_narrow
end
local function fi_IPA_term(term, is_narrow)
local broad, narrow = fi_IPA_combined_term(term, is_narrow)
return is_narrow and narrow or broad
end
-- exports for tests etc.
export.add_rhythmic_stress = add_rhythmic_stress
export.fi_IPA_term = fi_IPA_term
-- {{fi-IPA}}
function export.fi_IPA_entrypoint(term)
if type(term) == "table" then
term = term:getParent().args[1]
end
local title = mw.title.getCurrentTitle().text
if not term then
term = title
elseif term == "*" then
term = title .. "*"
end
local no_count = match(term, " ")
IPA_narrow = fi_IPA_term(term, true)
IPA = fi_IPA_term(term, false)
return m_IPA.format_IPA_full {
lang = require("Module:languages").getByCode(langcode),
items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},
no_count = no_count,
}
end
-- <<IPA END>>
local function cleanup_title(x)
return lower(gsub(x, "–", "-"))
end
-- these two functions are used to build Cartesian products of triplets XAB,
-- such that e.g. (X1, A1, B1), (X2, A2, B2), (X3, _, _)
-- creates the combinations
-- X1A1X2A2X3, X1A1X2B2X3, X1B1X2A2X3, X1B1X2B2X3
local function cartesian_make(parts, n)
local result = parts[1][1]
local k = 1
for i = 2, #parts do
if m_bit32.band(n, k) > 0 then
result = result .. parts[i - 1][3] .. parts[i][1]
else
result = result .. parts[i - 1][2] .. parts[i][1]
end
k = m_bit32.lshift(k, 1)
end
return result
end
local function cartesian_combine(parts)
m_bit32 = m_bit32 or require("Module:bit32")
local n = m_bit32.lshift(1, #parts - 1)
local results = {}
for i = 0, n - 1 do
table.insert(results, cartesian_make(parts, i))
end
return results
end
local function split_by_optional_break(word, only_breaking_diphthongs, prefer_hyphen_first)
local parts = {}
local i = 1
local found = false
while true do
local j, je = find(word, "%([.-]%)", i)
if j == nil then break end
local allow_break = true
if only_breaking_diphthongs then
local prefix = sub(word, i, j - 1)
local diphthong = sub(word, j - 1, j - 1) .. sub(word, j + 3, j + 3)
-- if a dot, only when breaks a diphthong
allow_break = sub(word, j + 1, j + 1) ~= "." or (
-- never a diphthong if two vowels precede
not match(prefix, vowel .. vowel .. "$")
and vowel_sequences[diphthong])
end
if allow_break then
found = true
-- prefer dots last if vowels on both sides
local prev = sub(word, i, j - 1)
local symbol = sub(word, j + 1, j + 1)
local prefer_first
if symbol == "." then
prefer_first = not (match(prev, vowel .. "$") and match(word, "^" .. vowel, j + 3))
elseif symbol == "-" then
prefer_first = prefer_hyphen_first
end
if prefer_first then
table.insert(parts, { prev, symbol, "" })
else
table.insert(parts, { prev, "", symbol })
end
end
i = je + 1
end
if not found then return { word } end
table.insert(parts, { sub(word, i), "", "" })
return cartesian_combine(parts)
end
local function get_autohyphenate_forms(word, title)
word = gsub(word, "%([*ˣ:ː]%)", "")
word = gsub(word, "(.)ː", "%1%1")
word = gsub(word, "[" .. ipa_symb .. "ˣ*]", "")
word = gsub(word, "/", "-")
word = gsub(word, "^-", "")
word = gsub(word, "ŋn", "gn")
if lower(title) == title then
word = lower(word)
else
-- find letters in title and try to match them
local letters = {}
for letter in gmatch(title, "%a") do
table.insert(letters, letter)
end
local respelled = ""
local letter_index = 1
for character in gmatch(word, ".") do
if match(character, "%a") then
local next_letter = letters[letter_index]
if lower(next_letter) == lower(character) then
respelled = respelled .. next_letter
letter_index = letter_index + 1
else
respelled = respelled .. character
end
else
respelled = respelled .. character
end
end
word = respelled
end
return split_by_optional_break(word, nil, true)
end
local function get_autorhyme_forms(word)
word = lower(word)
word = gsub(word, "%([*ˣ:ː]%)", "")
-- applies gemination mid-word for rhymes
word = gsub(word, "[*ˣ](" .. consonant .. ")", "%1ː")
word = gsub(word, "[*ˣ]", "")
-- convert long vowel marks
word = gsub(word, "(.)ː", "%1%1")
-- remove IPA symbols and normalize +/ to -
word = gsub(word, "[" .. ipa_symb .. "]", "")
word = gsub(word, "[/+]", "-")
return split_by_optional_break(word, nil, true)
end
local function generate_rhyme(word)
-- convert syllable weight to hyphen for next routine
-- (just in case these are included manually... even if they shouldn't be)
local fmtword = gsub(word, "[ˈˌ"..tertiary.."]", "-")
fmtword = gsub(word, "'", ".")
local sylcount = #syllabify(fmtword, ".")
-- get final wordlet of a compound word
local last_hyph = find(fmtword, "%-[^%-]*$") or 0
local last_wordlet = sub(fmtword, last_hyph + 1)
-- use fi-IPA rhythmic stress generator
local stressed = add_rhythmic_stress(last_wordlet)
-- find last stress mark and extract everything after that
local res = match(stressed, ".*" .. stress_indicator .. "(.*)")
if not res or res == "" then res = last_wordlet end
-- remove initial consonants, convert to IPA, remove IPA symbols
res = gsub(res, "^%.", "")
res = gsub(res, "^" .. consonant .. "+", "")
res = fi_IPA_combined_term(res, false)
res = gsub(res, "[" .. ipa_symb .. "]", "")
res = gsub(res, "^%.", "")
return res, sylcount
end
-- for testing
export.generate_rhyme = generate_rhyme
local function add_trivowel_dots(pron)
-- find sequences of >= 3 vowels and record their indices
local toggles = {}
local scan = 1
while true do
local i0, i1 = find(pron, "[aeiouyäö][aeiouyäö][aeiouyäö]+", scan)
if i0 == nil then break end
table.insert(toggles, i0 + 1)
table.insert(toggles, i1)
scan = i1 + 1
end
table.insert(toggles, len(pron) + 1)
-- generate hyphenation, and add dots within multivowel sequences
local dots = {}
local hyph = syllabify(pron, true)
local index = 0
local recons = ""
scan = 1
for _, hpart in ipairs(hyph) do
index = index + len(hpart)
while index >= toggles[scan] do
scan = scan + 1
end
recons = recons .. hpart
if scan % 2 == 0 then
recons = recons .. "."
end
end
return recons
end
local function pron_equal(title, pron)
if not pron or pron == "" then
return true
end
-- handle slashes, pluses and quotes as hyphens
pron = gsub(pron, '[/+"]', "-")
-- remove optional lengthening/shortening/syllable break/gemination, should not cause any issues
pron = gsub(pron, "%([*ˣ.:ː-]%)", "")
-- remove gemination asterisks and syllable separating dots
pron = gsub(pron, "[*%." .. syl_override_start .. syl_override_end .. "]", "")
-- map existing glottal stops to apostrophes
pron = gsub(pron, "%(?ʔ%)?", apostrophe)
-- /ŋn/ for /gn/ is fine
pron = gsub(pron, "ŋn", "gn")
-- remove hyphens but also apostrophes right after hyphens
-- (so that glottal stop is allowed after hyphen separating two same vowels)
pron = gsub(pron, "-" .. apostrophe .. "?", "")
title = gsub(cleanup_title(title), "-", "")
return pron == lower(title)
end
local function pron_equal_special_cases(title, pronunciation)
if find(title, "ruoa") then
-- very common exception - support it
local pronunciation_ruua = gsub(pronunciation, "ruua", "ruoa")
if pron_equal(title, lower(pronunciation_ruua)) then
return pronunciation_ruua
end
end
-- fall through to return nil
end
local bracket_delimiters = {
["/"] = "[%/]",
["-"] = "[%/%-]",
['"'] = '[%/%-%"]',
}
local function treat_brackets(pronunciation_with_brackets)
local result = {}
local cursor = 1
while true do
local bracket_start, bracket_end, bracket_text = find(pronunciation_with_brackets, "(%b[])", cursor)
if bracket_start == nil then
table.insert(result, sub(pronunciation_with_brackets, cursor))
break
end
local delimiter = match(bracket_text, '^.([%/"])')
-- find last slash/hyphen
local terminate = match(sub(pronunciation_with_brackets, 1, bracket_start), "^.*()" .. bracket_delimiters[delimiter or "-"])
if cursor > 1 and (not terminate or terminate < cursor) then
error("bracket {} replacement string had no matching delimiter " .. (delimiter or "-"))
end
if terminate then
table.insert(result, sub(pronunciation_with_brackets, cursor, terminate))
end
-- tag replacement with overrides for syllabification
table.insert(result, syl_override_start .. sub(bracket_text, delimiter and 3 or 2, -2) .. syl_override_end)
cursor = bracket_end + 1
end
return table.concat(result)
end
local function convert_quotes_to(syllabification_cache, text, stress, nostress)
if find(text, '"') then
if nostress and stress ~= nostress then
-- syllabify, and determine which way to go
-- output nostress if the preceding syllable was stressed,
-- otherwise output stress
local syllables = syllabification_cache[text]
if not syllables then
syllables = syllabify(text, true)
syllabification_cache[text] = syllables
end
local last_stressed = false
for i, syl in ipairs(syllables) do
-- first syllable always stressed
if i == 1 or match(syl, "^" .. stress_indicator) then
last_stressed = true
else
if match(syl, '^"') then
local next_stressed = i == #syllables or match(syllables[i + 1], "^" .. stress_indicator)
syllables[i] = gsub(syl, '^"', (last_stressed or next_stressed) and nostress or stress)
end
last_stressed = false
end
end
text = table.concat(syllables)
else
text = gsub(text, '"', stress)
end
end
return text
end
local function fi_hyphenation_from_syllabification(syl)
syl = gsub(syl, '[ /"]', "-")
syl = gsub(syl, "[+'’]", ".")
-- all syllable boundaries are now - or .
-- first, ban any dots adjacent to a hyphen
syl = gsub(syl, "%.([^.]-)%-", "%1-")
syl = gsub(syl, "%-([^.]-)%.", "-%1")
-- then ban any dots that only have a single vowel
syl = gsub(syl, "^(" .. vowel .. ")%.", "%1")
syl = gsub(syl, "%.(" .. vowel .. ")$", "%1")
-- in the case x.V.y, technically both xV.y and x.Vy are valid
-- but here, we only allow xV.y, because there is no way to mark
-- "allow this or this" without listing all allowed options separately
-- which can result in exponentially many hyphenations shown
syl = mw.ustring.gsub(syl, "%.(" .. vowel .. ")%.", "%1.")
return mw.text.split(syl, "[.-]")
end
function export.show(frame)
local title = mw.title.getCurrentTitle().text
local lang = require("Module:languages").getByCode(langcode)
local pronunciation = { "" }
local ipa = { nil }
local audio = { }
local qualifiers = { }
local rhymes = { nil }
local syllabifications = { nil }
local homophones = { }
local syllabification_labels = { }
local rhyme_labels = { }
local homophone_labels = { }
local nohyphen = false
local norhymes = false
local compound_suffix = false
local categories = { }
if type(frame) == "table" then
local params = {
[1] = { list = true, default = "", allow_holes = true },
["ipa"] = { list = true, default = nil, allow_holes = true },
["h"] = { list = true, default = nil, allow_holes = true }, ["hyphen"] = {},
["r"] = { list = true, default = nil, allow_holes = true }, ["rhymes"] = {},
["a"] = { list = true, default = nil }, ["audio"] = {},
["ac"] = { list = true, default = nil }, ["caption"] = {},
["hh"] = { default = "" }, ["homophones"] = {},
["q"] = { list = true, default = nil, allow_holes = true },
["hp"] = { list = true, default = nil, allow_holes = true },
["rp"] = { list = true, default = nil, allow_holes = true },
["hhp"] = { list = true, default = nil, allow_holes = true },
["nohyphen"] = { type = "boolean", default = false },
["norhymes"] = { type = "boolean", default = false },
["csuffix"] = { type = "boolean", default = false },
["title"] = {}, -- for debugging or demonstration only
}
local args, further = require("Module:parameters").process(frame:getParent().args, params, true)
title = args["title"] or title
pronunciation = args[1]
ipa = args["ipa"]
syllabifications = args["h"]
rhymes = args["r"]
qualifiers = args["q"]
syllabification_labels = args["hp"]
rhyme_labels = args["rp"]
nohyphen = args["nohyphen"]
norhymes = args["norhymes"]
compound_suffix = args["csuffix"]
homophones = mw.text.split(args["hh"], ",")
homophone_labels = args["hhp"]
-- hacks; if the first parameter is empty then we want an empty string
if args[2] and args[1] == nil then args[1] = "" end
if ipa[2] and ipa[1] == nil then ipa[1] = "" end
-- clear homophones if empty
if #homophones == 1 and homophones[1] == "" then homophones = {} end
if args["hyphen"] then syllabifications[1] = args["hyphen"] end
if args["rhymes"] then rhymes[1] = args["rhymes"] end
if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
local audios = args["a"]
local captions = args["ac"]
if args["audio"] then audios[1] = args["audio"] end
if args["captions"] then captions[1] = args["caption"] end
for i, audiofile in ipairs(audios) do
if audiofile then
table.insert(audio, {lang = lang, file = audiofile, caption = captions[i]})
end
end
end
for i, p in ipairs(pronunciation) do
if p == "" or p == "^" then
pronunciation[i] = cleanup_title(title)
elseif p == "*" or p == "(*)" then
pronunciation[i] = cleanup_title(title) .. p
elseif find(p, "[!#]") then
p = gsub(p, "t!s", "ts")
p = gsub(p, "t#s", "ts")
-- TODO deprecate and remove the two codes below
p = gsub(p, "%.%!", "(.)")
p = gsub(p, "%-%!", "-")
pronunciation[i] = p
end
end
-- make sure #pronunciation >= #IPA, because we use #pronunciation
-- as the source of how many pronunciations we have
for i, p in ipairs(ipa) do
if not pronunciation[i] then
pronunciation[i] = ""
end
end
-- whether hyphenation and rhyme data can be generated automatically
local can_auto_hyphenate = true
local can_auto_rhyme = true
-- when generating rhyme or syllabification data automatically, use
-- model_pronunciation as the source of pronunciation data
local model_pronunciation
local i
-- preprocessing
local split_optional_breaks = false
local do_not_split_optional_breaks = false
local model_pronunciation_index = 1
local brackets = false
i = 1
while i <= #pronunciation do
-- split (.), but only if there is only one
local dot_start, dot_end = find(pronunciation[i], "%(%.%)")
if dot_start then
split_optional_breaks = true
if find(pronunciation[i], "%(%.%)", dot_end) then
do_not_split_optional_breaks = true
end
end
-- add syllable break Vii -> V.ii
if find(pronunciation[i], "[aeouyäö]ii") then
pronunciation[i] = gsub(pronunciation[i], "([aeouyäö])(ii)", "%1.%2")
end
-- handle % (long consonant usually pronounced as short)
if find(pronunciation[i], "%", 1, true) then
local original = pronunciation[i]
local short = gsub(original, "%%", "")
local long = gsub(original, "(.)%%", "%1%1")
pronunciation[i] = short
-- use long consonant in model_pronunciation
if i == model_pronunciation_index then
model_pronunciation_index = i + 1
end
i = i + 1
table.insert(pronunciation, i, long)
do_not_split_optional_breaks = true
end
if find(pronunciation[i], "[", 1, true) then
brackets = true
end
i = i + 1
end
local model_pronunciation = pronunciation[model_pronunciation_index]
if brackets then
brackets = model_pronunciation
-- remove brackets from all pronunciation, keeping only in brackets
-- for later processing
for i, p in ipairs(pronunciation) do
pronunciation[i] = gsub(p, "%b[]", "")
end
model_pronunciation = pronunciation[model_pronunciation_index]
end
local model_pronunciation_max_index = model_pronunciation_index
-- split a single optional break into two pronunciations
-- model_pronunciation will still have the (.)
-- syllabification and rhyme logic can handle it
if #pronunciation == 1 and split_optional_breaks and not do_not_split_optional_breaks then
local new_pronunciation = {}
for _, p in ipairs(pronunciation) do
-- local split_i = split_by_optional_break(p, true)
local split_i = split_by_optional_break(p)
for _, np in ipairs(split_i) do
table.insert(new_pronunciation, np)
end
end
pronunciation = new_pronunciation
model_pronunciation_max_index = #pronunciation
end
local syllabification_cache = {}
-- generate IPA rows
local results = {}
local has_spaces = match(title, " ") or (model_pronunciation and match(model_pronunciation, " "))
local is_suffix = match(title, "^-")
local is_affix = not compound_suffix and (match(title, "-$") or is_suffix)
for i, p in ipairs(pronunciation) do
local qual = qualifiers[i] or ""
if #qual > 0 then
qual = " " .. require("Module:qualifier").format_qualifier(qualifiers[i])
end
if ipa[i] and ipa[i] ~= "" then
table.insert(results, "* " .. m_IPA.format_IPA_full {
lang = lang,
items = {{pron = ipa[i]}},
no_count = has_spaces,
} .. qual)
-- cannot do automatic rhyme/hyph with IPA pronunciations
if i <= model_pronunciation_max_index then
can_auto_hyphenate = false
can_auto_rhyme = false
end
else
-- do basic cleanup to IPA parameters first:
-- replace + with - for secondary stress, and colon with long
if find(p, ":") then p = gsub(p, ":", long) end
if find(p, "%+") then p = gsub(p, "%+", "-") end
-- add clarifying dots to three-vowel sequences
if find(p, "[aeiouyäö][aeiouyäö][aeiouyäö]") then
p = add_trivowel_dots(p)
end
-- handle quotes
if find(p, '"') then
p = convert_quotes_to(syllabification_cache, p, "-", ".")
end
local IPA, IPA_narrow = fi_IPA_combined_term(p, true)
-- multi-word stress
if has_spaces then
IPA_narrow = gsub(IPA_narrow, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
IPA = gsub(IPA, " ([^ˈˌ"..tertiary.."])", " ˈ%1")
end
-- remove initial stress if suffix
if is_suffix then
if compound_suffix then
IPA_narrow = gsub(IPA_narrow, "^(%-?)ˈ", "%1ˌ")
IPA = gsub(IPA, "^(%-?)ˈ", "%1ˌ")
else
IPA_narrow = gsub(IPA_narrow, "^(%-?)ˈ", "%1")
IPA = gsub(IPA, "^(%-?)ˈ", "%1")
end
end
table.insert(results, "* " .. m_IPA.format_IPA_full {
lang = lang,
items = {{pron = "/" .. IPA .. "/"}, {pron = "[" .. IPA_narrow .. "]"}},
no_count = has_spaces,
} .. qual)
end
end
local should_have_automatic_hyphenation = not (has_spaces or is_affix)
can_auto_rhyme = can_auto_rhyme and should_have_automatic_hyphenation
-- terms with spaces, or affixes, never have automatic syllabification/rhymes
local model_pronunciation_hyphenation
if not should_have_automatic_hyphenation then
can_auto_hyphenate = false
elseif can_auto_hyphenate then
can_auto_hyphenate = pron_equal(title, lower(model_pronunciation))
if not can_auto_hyphenate then
model_pronunciation_hyphenation = model_pronunciation
if brackets then
-- check for brackets
model_pronunciation_hyphenation = treat_brackets(brackets)
can_auto_hyphenate = pron_equal(title, lower(model_pronunciation_hyphenation))
end
if not can_auto_hyphenate then
-- handle special cases
local special_cases_checked = pron_equal_special_cases(title, model_pronunciation_hyphenation)
if special_cases_checked then
can_auto_hyphenate = true
model_pronunciation_hyphenation = special_cases_checked
end
end
end
end
local automatic_syllabifications = false
if can_auto_hyphenate and not syllabifications[1] then
-- generate automatic syllabifications
automatic_syllabifications = true
local quotes_converted = convert_quotes_to(syllabification_cache, model_pronunciation_hyphenation or model_pronunciation, "(-)")
local forms = get_autohyphenate_forms(quotes_converted, title)
local seen_syllabifications = {}
local syllabification_count = 1
for _, form in ipairs(forms) do
if syllabifications[syllabification_count] then break end
local syllabified = table.concat(syllabify(form, sep_symbols, "dots"))
if not seen_syllabifications[syllabified] then
syllabifications[syllabification_count] = syllabified
seen_syllabifications[syllabified] = true
syllabification_count = syllabification_count + 1
end
end
elseif #syllabifications == 1 and syllabifications[1] == "-" then
syllabifications = {}
should_have_automatic_hyphenation = false
end
if can_auto_rhyme and not rhymes[1] then
-- generate automatic rhymes
local forms = get_autorhyme_forms(convert_quotes_to(syllabification_cache, model_pronunciation, "-", "."))
for i, form in ipairs(forms) do
if rhymes[i] then break end
rhymes[i] = { generate_rhyme(form) }
end
elseif #rhymes == 1 and rhymes[1] == "-" then
rhymes = {}
end
for i, a in ipairs(audio) do
table.insert(results, "* " .. require("Module:audio").format_audio(a))
end
-- generate rhyme rows from rhyme data
if not norhymes then
local syllabification_split_counts = nil
-- generate rhyme rows
if #rhymes > 0 then
-- merge rhymes if they have identical labels
local last_label = false
local new_rhymes = {}
local new_rhyme_labels = {}
local current_list = {}
for i, r in ipairs(rhymes) do
local label = rhyme_labels[i] or ""
if last_label == label then
table.insert(current_list, r)
else
if #current_list > 0 then
table.insert(new_rhymes, current_list)
end
if last_label ~= false then
table.insert(new_rhyme_labels, last_label)
end
current_list = { r }
last_label = label
end
end
table.insert(new_rhymes, current_list)
table.insert(new_rhyme_labels, last_label)
rhymes = new_rhymes
rhyme_labels = new_rhyme_labels
end
-- add rhymes
for i, subrhymes in ipairs(rhymes) do
local label = ""
if rhyme_labels[i] and #rhyme_labels[i] > 0 then
label = " " .. require("Module:qualifier").format_qualifier(rhyme_labels[i])
end
if #subrhymes >= 1 then
-- indexed by rhyme first and syllable count second
local existing_rhymes = {}
local seen_rhymes = {}
-- if at least one rhyme doesn't specify the syllable count,
-- we will try to determine it automatically
local must_syllabify = false
local syllable_count
-- final syllable counts not passed as part of any rhyme
local fallback_syllable_counts = nil
for _, rhyme in ipairs(subrhymes) do
if type(rhyme) == "table" then
rhyme, syllable_count = unpack(rhyme)
elseif find(rhyme, "%/") then
-- parse syllable count
rhyme, syllable_count = match(rhyme, "(.+)/(.+)")
syllable_count = tonumber(syllable_count)
else
syllable_count = nil
end
if syllable_count then
local existing_rhyme = seen_rhymes[rhyme]
if not existing_rhyme then
-- add the rhyme if not present
local new_rhyme = { rhyme = rhyme, num_syl = {syllable_count} }
table.insert(existing_rhymes, new_rhyme)
seen_rhymes[rhyme] = { [syllable_count] = true, object = new_rhyme }
elseif not existing_rhyme[syllable_count] then
-- add the syllable count to an existing rhyme
table.insert(existing_rhyme.object.num_syl, syllable_count)
existing_rhyme[syllable_count] = true
end
else
must_syllabify = true
local existing_rhyme = seen_rhymes[rhyme]
if not existing_rhyme then
local new_rhyme = { rhyme = rhyme }
table.insert(existing_rhymes, new_rhyme)
seen_rhymes[rhyme] = { object = new_rhyme }
end
end
end
if must_syllabify and (can_auto_hyphenate or not automatic_syllabifications) then
fallback_syllable_counts = {}
local seen_syllable_counts = {}
if not manual_syllable_counts then
-- generate syllable counts from provided syllabifications
manual_syllable_counts = {}
for i, syl in ipairs(syllabifications) do
if type(syl) == "string" then
manual_syllable_counts[i] = #mw.text.split(syl, '[' .. sep_symbols .. ']')
else
manual_syllable_counts[i] = #syllabifications[i]
end
end
end
-- get all possible syllable counts from syllabifications
for i, n in ipairs(manual_syllable_counts) do
if n > 0 and not seen_syllable_counts[n] then
table.insert(fallback_syllable_counts, n)
seen_syllable_counts[n] = true
end
end
end
table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
{ lang = lang, rhymes = existing_rhymes, num_syl = fallback_syllable_counts }) .. label)
end
end
end
-- generate homophone rows from homophone data
if #homophones > 0 then
local homophone_rows = {}
for i, homophone in ipairs(homophones) do
table.insert(homophone_rows, {
["term"] = homophone,
["qualifiers"] = homophone_labels[i] and { homophone_labels[i] } or nil
})
end
table.insert(results, "* " .. require("Module:homophones").format_homophones(
{ lang = lang, homophones = homophone_rows }))
end
-- generate hyphenation rows from syllabification data
if not nohyphen and #syllabifications > 0 then
local hyphenation_rows = {}
local seen_hyphenations = {}
for i, syllabification in ipairs(syllabifications) do
local hyphenation = fi_hyphenation_from_syllabification(syllabification)
local hyphenation_key = table.concat(hyphenation, ".")
-- deduplicate hyphenations
local hyphenation_row = seen_hyphenations[hyphenation_key]
if not hyphenation_row then
hyphenation_row = {
["hyph"] = hyphenation,
["qualifiers"] = nil
}
seen_hyphenations[hyphenation_key] = hyphenation_row
table.insert(hyphenation_rows, hyphenation_row)
end
if syllabification_labels[i] then
hyphenation_row.qualifiers = hyphenation_row.qualifiers or { }
table.insert(hyphenation_row.qualifiers, syllabification_labels[i])
end
end
table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
{ lang = lang, hyphs = hyphenation_rows, caption = "Hyphenation<sup>([[Appendix:Finnish hyphenation|key]])</sup>" }))
end
if should_have_automatic_hyphenation and not syllabifications[1] then
table.insert(categories, "fi-pronunciation missing automatic hyphenation")
end
return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end
return export