Module:tl-pron/sandbox
Jump to navigation
Jump to search
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
-- Based on [[Module:es-pronunc]] by Benwing2.
-- Adaptation by TagaSanPedroAko.
local export = {}
local m_IPA = require("Module:IPA")
local m_table = require("Module:table")
local put_module = "Module:parse utilities"
local lang = require("Module:languages").getByCode("tl")
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
local unfd = mw.ustring.toNFD
local unfc = mw.ustring.toNFC
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀
local CFLEX = u(0x0302) -- circumflex = ̂
local TILDE = u(0x0303) -- tilde = ̃
local DIA = u(0x0308) -- diaeresis = ̈
local MACRON = u(0x0304) -- macron
local SYLDIV = u(0xFFF0)
local SYLDIV2 = u(0xFFF1)
local vowel = "aeiouAEIOU" -- vowel
local V = "[" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local stress_c = "[" .. AC .. GR .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local sylsep = "%-." .. SYLDIV -- hyphen included for syllabifying from spelling
local sylsep_c = "[" .. sylsep .. "]"
local wordsep = "# "
local separator_not_wordsep = accent .. ipa_stress .. sylsep
local separator = separator_not_wordsep .. wordsep
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]"
local C_OR_WORDSEP = "[^" .. vowel .. separator_not_wordsep .. "]" -- consonants or word separator
local unstressed_words = require("Module:table").listToSet({
"ang", "sa", "nang", "si", "ni", "kay", -- case markers. "Nang" here is for written "ng", but can also work with nang as in the contraction na'ng and the conjunction "nang"
"a", "ar", "ba", "bi", "da", "di", "e", "ef", "eks", "dyi", "jey", "key", "em", "ma", "en", "pi", "ra", "es", "ta", "ti", "u", "wa", "way", "ya", "yu", "zey", "zi", -- letter names (abakada and modern Filipino)
"ko", "mo", "ka", --single-syllable personal pronouns
"na",-- linker, also temporal particle
"daw", "ga", "ha", "pa", -- particles
"di7", "de7", -- negation words
"may", -- single-syllable existential
"pag", "kung", -- subordinating conjunctions
"at", "o", -- coordinating conjunctions
"hay", -- interjections
"de", "del", "el", "la", "las", "los", -- in some Spanish-derived terms and names
"-an", "-en", "-han", "hi-", "-hin", "hin-", "hing-", "-in", "mag-", "mang-", "pa-", "pag-", "pang-"-- affixes
})
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]
--
local function decompose(text)
-- decompose everything but ñ and ü
text = unfd(text)
text = rsub(text, ".[" .. TILDE .. DIA .. "]", {
["n" .. TILDE] = "ñ",
["N" .. TILDE] = "Ñ",
["u" .. DIA] = "ü",
["U" .. DIA] = "Ü",
})
return text
end
local function split_on_comma(term)
if term:find(",%s") then
return require(put_module).split_on_comma(term)
else
return rsplit(term, ",")
end
end
-- Remove any HTML from the formatted text and resolve links, since the extra characters don't contribute to the
-- displayed length.
local function convert_to_raw_text(text)
text = rsub(text, "<.->", "")
if text:find("%[%[") then
text = require("Module:links").remove_links(text)
end
return text
end
-- Return the approximate displayed length in characters.
local function textual_len(text)
return ulen(convert_to_raw_text(text))
end
-- Main syllable-division algorithm. Can be called either directly on spelling (when hyphenating) or after
-- non-trivial processing of respelling in the direction of pronunciation (when generating pronunciation).
local function syllabify_from_spelling_or_pronun(text, is_spelling)
-- Part 1: Divide before the last consonant in a cluster of consonants between vowels
text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. V .. ")", "%1.%2")
-- NOTE: When run on pronun, we have already eliminated c and v, but not when run on spelling.
-- When run on pronun, don't include r, which at this point represents the trill.
-- Don't divide ch, sh, and ph.
text = rsub(text, "([cs])%.h", ".%1h")
-- Don't divide ll or rr.
text = rsub(text, "([lr])%.%1", ".%1%1")
-- Don't divide ts where pronounced like "ch" ([[tsika]], [[tsaleko]], [[Tsina]]) or at end of word (e.g. [[gets]], [[tropatuts]]). This can be overriden when it's actually pronounced separately (e.g. [[tatsulok]])
text = rsub(text, "t%.s", ".ts")
-- Part 2: Divide double vowels ([[saan]], [[leeg]], [[giit]], [[poot]]).
text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)([aeiou])", "%1.%2")
text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)(" .. V .. stress_c .. ")", "%1.%2")
return text
end
local function syllabify_from_spelling(text)
text = decompose(text)
-- start at FFF2 because FFF0 and FFF1 is used for SYLDIV and SYLDIV2
-- Temporary replacements for characters we want treated as default consonants. The C and related consonant regexes
-- treat all unknown characters as consonants.
local TEMP_I = u(0xFFF2)
local TEMP_U = u(0xFFF3)
local TEMP_Y_CONS = u(0xFFF4)
local TEMP_W_CONS = u(0xFFF5)
local TEMP_QU = u(0xFFF6)
local TEMP_QU_CAPS = u(0xFFF7)
local TEMP_GU = u(0xFFF8)
local TEMP_GU_CAPS = u(0xFFF9)
-- Change user-specified into SYLDIV so we don't shuffle it around when dividing into syllables.
text = text:gsub("%.", SYLDIV)
text = text:gsub("7", SYLDIV2)
text = rsub(text, "y(" .. V .. ")", TEMP_Y_CONS .. "%1")
text = rsub(text, "w(" .. V .. ")", TEMP_W_CONS .. "%1")
text = syllabify_from_spelling_or_pronun(text, "is spelling")
text = text:gsub(SYLDIV, ".")
text = text:gsub(SYLDIV2, "7")
text = text:gsub(TEMP_Y_CONS, "y")
text = text:gsub(TEMP_W_CONS, "w")
text = text:gsub(TEMP_QU, "qu")
text = text:gsub(TEMP_QU_CAPS, "Qu")
text = text:gsub(TEMP_GU, "gu")
text = text:gsub(TEMP_GU_CAPS, "Gu")
return text
end
-- Generate the IPA of a given respelling, where a respelling is the representation of the pronunciation of a given
-- Tagalog term using Tagalog spelling conventions
-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]
function export.IPA(text, phonetic)
text = ulower(text or mw.title.getCurrentTitle().text)
-- decompose everything but ñ and ü
text = decompose(text)
-- convert commas and en/en dashes to IPA foot boundaries
text = rsub(text, "%s*[,–—]%s*", " | ")
-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
text = canon_spaces(text)
-- Make prefixes unstressed unless they have an explicit stress marker; also make certain
-- monosyllabic words (e.g. [[ang]], [[ng]], [[si]], [[na]], etc.) without stress marks be
-- unstressed.
local words = rsplit(text, " ")
for i, word in ipairs(words) do
if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then
-- add macron to the last vowel not the first one
-- adding the macron after the 'u'
words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. MACRON)
end
end
text = table.concat(words, " ")
-- Convert hyphens to spaces
text = rsub(text, "%-", " ")
-- canonicalize multiple spaces again, which may have been introduced by hyphens
text = canon_spaces(text)
-- now eliminate punctuation
text = rsub(text, "[!?']", "")
-- put # at word beginning and end and double ## at text/foot boundary beginning/end
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
-- Add glottal stop for words starting with vowel and double vowel
text = rsub(text, "([#])([aeiou])", "%1ʔ%2")
text = rsub(text, "([aeiou])([aeiou])", "%1ʔ%2")
--determining whether "y" or "w" is a consonant or a vowel
text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
text = rsub(text,"y([ˈˌ]?)([bdɡjklmnprstw])","i%1%2")
text = rsub(text, "y#", "i")
text = rsub(text, "w(" .. V .. ")","w%1")
text = rsub(text,"w([ˈˌ]?)([bdɡjklmnprstw])","u%1%2")
text = rsub(text, "w#","u")
-- handle certain combinations; ch ng and sh handling needs to go first
text = rsub(text, "ch", "ts") --not the real sound
text = rsub(text, "ng", "ŋ")
text = rsub(text, "sh", "ʃ")
--x
text = rsub(text, "x", "ks")
--c, gü/gu+e or i, q
text = rsub(text, "c([ie])", "s%1")
text = rsub(text, "gü([ie])", "ɡw%1")
text = rsub(text, "gu([ie])", "ɡ%1")
text = rsub(text, "qu([ie])", "k%1")
text = rsub(text, "ü", "u")
--alphabet-to-phoneme
text = rsub(text, "[cfgjñqrvz7]",
--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{ ["c"] = "k", ["f"] = "p", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "ɲ", ["q"] = "k", ["r"] = "ɾ", ["v"] = "b", ["z"] = "s", ["7"] = "ʔ"})
-- trill in rr
text = rsub(text, "ɾɾ", "r")
-- ts
text = rsub(text, "ts", "ĉ") --not the real sound
--syllable division
text = syllabify_from_spelling_or_pronun(text, false)
local accent_to_stress_mark = { [AC] = "ˈ", [CFLEX] = "ˈ", [GR] = " ", [MACRON] = "" }
local function accent_word(word, syllables, last_word)
-- Now stress the word. If any accent exists in the word (including macron indicating an unaccented word),
-- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default
-- stress rule.
local stress_syllable = 0
local last_accent = ""
if rfind(word, accent_c) then
for i = 1, #syllables do
syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",
function(pre, accent, post)
last_accent = accent
if last_accent == AC then
stress_syllable = i
end
return accent_to_stress_mark[accent] .. pre .. post
end
)
end
if last_accent == CFLEX then
if last_word then
syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3")
end
if stress_syllable == 0 then
syllables[#syllables] = "ˈ" .. syllables[#syllables]
end
elseif last_accent == GR then
if last_word then
syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3")
end
if stress_syllable ~= #syllables-1 then
syllables[#syllables-1] = "ˈ" .. syllables[#syllables-1]
end
elseif last_accent == AC and stress_syllable == #syllables then
force_elongate_final = true
end
else
-- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress.
if #syllables > 1 and rfind(word, "[^aeiouʔbcĉdfɡghjɟĵklmnñŋpqrɾstvwxz#]#") or #syllables == 1 and rfind(word, "[aeiou]") then
syllables[#syllables] = "ˈ" .. syllables[#syllables]
elseif #syllables >= 2 then
syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
end
end
end
local words = rsplit(text, " ")
for j, word in ipairs(words) do
-- accentuation
local syllables = rsplit(word, "%.")
accent_word(word, syllables, j == #words)
-- Reconstruct the word.
words[j] = table.concat(syllables, ".")
-- suppress syllable mark before IPA stress indicator
words[j] = rsub(words[j], "%.(" .. ipa_stress_c .. ")", "%1")
--make all primary stresses but the last one be secondary
words[j] = rsub_repeatedly(words[j], "ˈ(.+)ˈ", "ˌ%1ˈ")
end
text = table.concat(words, " ")
--remove "ɟ" and "w" inserted on vowel pair starting with "i" and "u"
text = rsub(text,"([i])([ˈˌ]?)ɟ([aeou])","%1%2%3")
text = rsub(text,"([u])([ˈˌ]?)w([aei])","%1%2%3")
--add temporary macron for /a/, /i/ and /u/ in stressed syllables so they don't get replaced by unstressed form
text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([a])([ʔbdfɡiklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ā%6%7")
text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([i])([ʔbdfɡklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ī%6%7")
text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([u])([ʔbdfɡiklmnŋpɾst]?)([bdɡklmnpɾst]?)","%1%2%3%4ū%6%7")
--Corrections for diphthongs
text = rsub(text,"([aā])i","%1j") --ay
text = rsub(text,"([aā])u","%1w") --aw
text = rsub(text,"([e])i","%1j") --ey
text = rsub(text,"([iī])u","%1w") --iw
text = rsub(text,"([o])i","%1j") --oy
text = rsub(text,"([o])u","%1w") --ow
text = rsub(text,"([uū])i","%1j") --uy (mostly in proper nouns)
--phonetic transcription
if phonetic then
--Turn phonemic diphthongs to phonetic diphthongs
text = rsub(text, "([aāeouū])j", "%1ɪ̯")
text = rsub(text, "([aāeiīo])w", "%1ʊ̯")
--change a, i, u to unstressed equivalents (certain forms to restore)
text = rsub(text,"a","ɐ")
text = rsub(text,"i","ɪ")
text = rsub(text,"u","ʊ")
--Combine consonants (except H) followed by I/U and certain stressed vowels
text = rsub(text,"([bkdɡlmnpɾst])ɪ([ˈˌ])([āeoū])","%2%1ɟ%3")
text = rsub(text,"([bkdɡlmnpɾst])ʊ([ˈˌ])([āeīo])","%2%1w%3")
text = rsub(text,"([nŋ])([ˈˌ# .]*[bfpm])","m%2")
text = rsub(text,"([mŋ])([ˈˌ# .]*[dlst])","n%2")
text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([#]?)([ ]?)([ˈˌ#.])([k])([ɐāeɪīoʊū])","%1%2%3%4x%6") -- /k/ between vowels
text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([ˈˌ.])ɡ([ɐāeɪīoʊū])","%1%2ɣ%3") -- /ɡ/ between vowels
text = rsub(text,"d([ˈˌ.])ɟ","%1ĵ") --/d/ before /j/
text = rsub(text,"n([ˈˌ.])k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
text = rsub(text,"n([ˈˌ.])ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
text = rsub(text,"n([ˈˌ.])h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
text = rsub(text,"n([ˈˌ.])m","m%1m") -- /n/ before /m/
text = rsub(text,"n([ˈˌ.])ɟ","%1ɲ") -- /n/ before /j/
text = rsub(text,"s([ˈˌ.])ɟ","%1ʃ") -- /s/ before /j/
text = rsub(text,"t([ˈˌ.])ɟ","%1ĉ") -- /t/ before /j/
text = rsub(text,"t([ˈˌ.])s","%1ć") -- /t/ before /s/
text = rsub(text,"([ˈˌ.])d([ɟj])([ɐāeɪīoʊū])","%1ĵ%3") -- /dj/ before any vowel following stress
text = rsub(text,"([ˈˌ.])n([ɟj])([ɐāeɪīoʊū])","%1ɲ%3") -- /nj/ before any vowel following stress
text = rsub(text,"([ˈˌ.])s([ɟj])([ɐāeɪīoʊū])","%1ʃ%3") -- /sj/ before any vowel following stress
text = rsub(text,"([ˈˌ.])t([ɟj])([ɐāeɪīoʊū])","%1ĉ%3") -- /tj/ before any vowel following stress
text = rsub(text,"([oʊ])([m])([ˈ]?)([pb])","u%2%3%4") -- /o/ and /ʊ/ before /mb/ or /mp/
--final fix for phonetic diphthongs
text = rsub(text,"([ɐ])ɪ̯","aɪ̯") --ay
text = rsub(text,"([ɐ])ʊ̯","aʊ̯") --aw
text = rsub(text,"([ɪ])ʊ̯","iʊ̯") --iw
--delete temporary macron in /a/, /i/ and /u/
text = rsub(text,"ā","a")
text = rsub(text,"ī","i")
text = rsub(text,"ū","u")
end
--delete temporary macron in /a/, /i/ and /u/
text = rsub(text,"ā","a")
text = rsub(text,"ī","i")
text = rsub(text,"ū","u")
-- convert fake symbols to real ones
local final_conversions = {
["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch"
["ɟ"] = "j", -- fake "y" to real "y"
["ĵ"] = "d͡ʒ" -- fake "j" to real "j"
}
local final_conversions_phonetic = {
["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch"
["ć"] = "t͡s", -- fake "t.s" to real "t.s"
["ɟ"] = "j", -- fake "y" to real "y"
["ĵ"] = "d͡ʒ" -- fake "j" to real "j"
}
if phonetic then
text = rsub(text, "[ĉćɟĵ]", final_conversions_phonetic)
end
text = rsub(text, "[ĉɟĵ]", final_conversions)
-- remove # symbols at word and text boundaries
text = rsub(text, "#([.]?)", "")
-- resuppress syllable mark before IPA stress indicator
text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")
text = unfc(text)
local ret = {
text = text,
}
return ret
end
-- For bot usage; {{#invoke:tl-pr|IPA_string|SPELLING|phonetic=PHONETIC}}
-- where
--
-- SPELLING is the word or respelling to generate pronunciation for;
function export.IPA_string(frame)
local iparams = {
[1] = {},
["phonetic"] = {type = "boolean"},
}
local iargs = require("Module:parameters").process(frame.args, iparams)
local retval = export.IPA(iargs[1], iargs.phonetic)
return retval.text
end
-- The PRONUN table has the following form for the full phonemic/phonetic pronunciation:
--
-- {
-- phonemic = "PHONEMIC",
-- phonetic = "PHONETIC",
-- differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...},
-- }
--
-- Here, `phonemic` is the phonemic pronunciation (displayed as /.../) and `phonetic` is the phonetic pronunciation
-- (displayed as [...]).
--
-- The PRONUN table has the following form for the rhyme pronunciation:
--
-- {
-- rhyme = "RHYME_PRONUN",
-- num_syl = {NUM, NUM, ...},
-- qualifiers = nil or {QUALIFIER, QUALIFIER, ...},
-- differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...},
-- }
--
-- Here, `rhyme` is a phonemic pronunciation such as "an" for [saan]], and `num_syl` is a list of the possible numbers of syllables for the term(s) that have this rhyme
-- (e.g. {2} for [[saan]], {3} for [[paraan]] and {4,} for [[makiraan]]. `num_syl` is used to generate syllable-count categories such as
-- [[Category:Rhymes:Tagalog/an/4 syllables]] in addition to [[Category:Rhymes:Tagalog/an]]. `num_syl` may be nil to
-- suppress the generation of syllable-count categories; this is typically the case with multiword terms.
-- `qualifiers`, if non-nil, comes from the user using the syntax.
--
local function generate_pronun(args)
local ret = {
pronun = {},
}
local function format_pron (tag)
local pronunciations = {}
local formatted_pronuns = {}
local function ins(formatted_part)
table.insert(formatted_pronuns, formatted_part)
end
-- Loop through each pronunciation. For each one, add the phonemic and phonetic versions to `pronunciations`,
-- for formatting by [[Module:IPA]], and also create an approximation of the formatted version so that we can
do
-- Add tag to left qualifiers if first one
-- FIXME: Consider using accent qualifier for the tag instead.
local qs = pronun.q
if j == 1 and tag then
if qs then
qs = m_table.deepcopy(qs)
table.insert(qs, tag)
else
qs = {tag}
end
end
local first_pronun = #pronunciations + 1
if not pronun.phonemic and not pronun.phonetic then
error("Internal error: Saw neither phonemic nor phonetic pronunciation")
end
if pronun.phonemic then -- missing if 'raw:[...]' given
-- don't display syllable division markers in phonemic
local slash_pron = "/" .. pronun.phonemic:gsub("%.", "") .. "/"
table.insert(pronunciations, {
pron = slash_pron,
})
ins(slash_pron)
end
if pronun.phonetic then -- missing if 'raw:/.../' given
local bracket_pron = "[" .. pronun.phonetic .. "]"
table.insert(pronunciations, {
pron = bracket_pron,
})
ins(bracket_pron)
end
local last_pronun = #pronunciations
if qs then
pronunciations[first_pronun].q = qs
end
if pronun.a then
pronunciations[first_pronun].a = pronun.a
end
if j > 1 then
pronunciations[first_pronun].separator = ", "
ins(", ")
end
if pronun.qq then
pronunciations[last_pronun].qq = pronun.qq
end
if pronun.aa then
pronunciations[last_pronun].aa = pronun.aa
end
if qs or pronun.a or pronun.qq or pronun.aa then
local data = {
q = qs,
a = pronun.a,
qq = pronun.qq,
aa = pronun.aa
}
-- Note: This inserts the actual formatted qualifier text, including HTML and such, but the later call
-- to textual_len() removes all HTML and reduces links.
ins(require("Module:pron qualifier").format_qualifiers(data, ""))
end
if pronun.refs then
pronunciations[last_pronun].refs = pronun.refs
-- Approximate the reference using a footnote notation. This will be slightly inaccurate if there are
-- more than nine references but that is rare.
ins(string.rep("[1]", #pronun.refs))
end
if first_pronun ~= last_pronun then
pronunciations[last_pronun].separator = " "
ins(" ")
end
end
local bullet = string.rep("*", args.bullets) .. " "
-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML
-- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing
-- the toggle box with the "more" button on the right.
local pre = is_first and args.pre and args.pre .. " " or ""
local post = is_first and args.post and " " .. args.post or ""
local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations, separator = "" } .. post
local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns) .. post
return formatted, textual_len(formatted_for_len)
end
return ret
end
local function parse_respelling(respelling, pagename, parse_err)
local raw_respelling = respelling:match("^raw:(.*)$")
if raw_respelling then
local raw_phonemic, raw_phonetic = raw_respelling:match("^/(.*)/ %[(.*)%]$")
if not raw_phonemic then
raw_phonemic = raw_respelling:match("^/(.*)/$")
end
if not raw_phonemic then
raw_phonetic = raw_respelling:match("^%[(.*)%]$")
end
if not raw_phonemic and not raw_phonetic then
parse_err(("Unable to parse raw respelling '%s', should be one of /.../, [...] or /.../ [...]")
:format(raw_respelling))
end
return {
raw = true,
raw_phonemic = raw_phonemic,
raw_phonetic = raw_phonetic,
}
end
if respelling == "+" then
respelling = pagename
end
return {term = respelling}
end
-- External entry point for {{tl-IPA}}.
function export.show(frame)
local params = {
[1] = {},
["pre"] = {},
["post"] = {},
["ref"] = {},
["bullets"] = {type = "number", default = 1},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local text = args[1] or mw.title.getCurrentTitle().text
args.terms = {{term = text}}
local ret = generate_pronun(args)
return ret.text
end
-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no
-- hyphens.
local function get_num_syl_from_phonemic(phonemic)
-- Maybe we should just count vowels instead of the below code.
phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries
local words = rsplit(phonemic, " +")
for i, word in ipairs(words) do
-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
word = rsub(word, "[ˌˈ]", "")
words[i] = word
end
-- There should be a syllable boundary between words.
phonemic = table.concat(words, ".")
return ulen(rsub(phonemic, "[^.]", "")) + 1
end
-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ")
end
local function split_syllabified_spelling(spelling)
return rsplit(spelling, "%.")
end
-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and
-- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we
-- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating
-- the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
local result = {}
local syll_chars = rsplit(decompose(syllab), "")
local spelling_chars = rsplit(decompose(spelling), "")
local i = 1
local j = 1
while i <= #syll_chars or j <= #spelling_chars do
local ci = syll_chars[i]
local cj = spelling_chars[j]
if ci == cj then
table.insert(result, ci)
i = i + 1
j = j + 1
elseif ci == "." then
table.insert(result, ci)
i = i + 1
elseif ci == AC or ci == GR or ci == CFLEX then
-- skip character
i = i + 1
else
-- non-matching character
return nil
end
end
if i <= #syll_chars or j <= #spelling_chars then
-- left-over characters on one side or the other
return nil
end
return unfc(table.concat(result))
end
local function generate_hyph_obj(term)
return {syllabification = term, hyph = split_syllabified_spelling(term)}
end
-- Word should already be decomposed.
local function word_has_vowels(word)
return rfind(word, V)
end
local function all_words_have_vowels(term)
local words = rsplit(decompose(term), "[ %-]")
for i, word in ipairs(words) do
-- Allow empty word; this occurs with prefixes and suffixes.
if word ~= "" and not word_has_vowels(word) then
return false
end
end
return true
end
local function should_generate_rhyme_from_respelling(term)
local words = rsplit(decompose(term), " +")
return #words == 1 and -- no if multiple words
not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. [[Asya-Pasipiko]])
not words[1]:find("%-$") and -- no if word is a prefix
not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix
word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter)
end
local function should_generate_rhyme_from_ipa(ipa)
return not ipa:find("%s") and word_has_vowels(decompose(ipa))
end
local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma)
local retval = {}
if arg:find("<") then
if not put then
put = require(put_module)
end
local function get_valid_prefixes()
local valid_prefixes = {}
for param_mod, _ in pairs(param_mods) do
table.insert(valid_prefixes, param_mod)
end
table.insert(valid_prefixes, "q")
table.insert(valid_prefixes, "qq")
table.insert(valid_prefixes, "a")
table.insert(valid_prefixes, "aa")
table.sort(valid_prefixes)
return valid_prefixes
end
local segments = put.parse_balanced_segment_run(arg, "<", ">")
local comma_separated_groups =
no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments)
for _, group in ipairs(comma_separated_groups) do
local obj = generate_obj(group[1])
for j = 2, #group - 1, 2 do
if group[j + 1] ~= "" then
parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier")
end
local modtext = group[j]:match("^<(.*)>$")
if not modtext then
parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets")
end
local prefix, val = modtext:match("^([a-z]+):(.*)$")
if not prefix then
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. ":'"
end
parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..
m_table.serialCommaJoin(valid_prefixes))
end
if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
if not obj[prefix] then
obj[prefix] = {}
end
table.insert(obj[prefix], val)
elseif param_mods[prefix] then
local key = param_mods[prefix].item_dest or prefix
if obj[key] then
parse_err("Modifier '" .. prefix .. "' specified more than once")
end
local convert = param_mods[prefix].convert
if convert then
obj[key] = convert(val)
else
obj[key] = val
end
else
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. "'"
end
parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]
.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
end
end
table.insert(retval, obj)
end
elseif no_split_on_comma then
table.insert(retval, generate_obj(arg))
else
for _, term in ipairs(split_on_comma(arg)) do
table.insert(retval, generate_obj(term))
end
end
return retval
end
local function parse_rhyme(arg, put, parse_err)
local function generate_obj(term)
return {rhyme = term}
end
local param_mods = {
s = {
item_dest = "num_syl",
convert = function(arg)
local nsyls = rsplit(arg, ",")
for i, nsyl in ipairs(nsyls) do
if not nsyl:find("^[0-9]+$") then
parse_err("Number of syllables '" .. nsyl .. "' should be numeric")
end
nsyls[i] = tonumber(nsyl)
end
return nsyls
end,
},
}
return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end
local function parse_hyph(arg, put, parse_err)
-- None other than qualifiers
local param_mods = {}
return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods)
end
local function parse_homophone(arg, put, parse_err)
local function generate_obj(term)
return {term = term}
end
local param_mods = {
t = {
-- We need to store the <t:...> inline modifier into the "gloss" key of the parsed term,
-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
item_dest = "gloss",
},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
g = {
-- We need to store the <g:...> inline modifier into the "genders" key of the parsed term,
-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
item_dest = "genders",
convert = function(arg)
return rsplit(arg, ",")
end,
},
}
return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end
local function generate_audio_obj(arg)
local file, gloss
if arg:find("#") then
file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
else
file, gloss = arg:match("^(.-)%s*;%s*(.*)$")
end
if not file then
file = arg
gloss = "Audio"
end
return {file = file, gloss = gloss}
end
local function parse_audio(arg, put, parse_err)
-- None other than qualifiers
local param_mods = {}
-- Don't split on comma because some filenames have embedded commas not followed by a space
-- (typically followed by an underscore).
return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma")
end
-- External entry point for {{tl-pr}}.
-- External entry point for {{tl-IPA}}.
function export.show(frame)
local params = {
[1] = {},
["pre"] = {},
["post"] = {},
["ref"] = {},
["bullets"] = {type = "number", default = 1},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local text = args[1] or mw.title.getCurrentTitle().text
args.terms = {{term = text}}
local ret = generate_pronun(args)
return ret.text
end
-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no
-- hyphens.
local function get_num_syl_from_phonemic(phonemic)
-- Maybe we should just count vowels instead of the below code.
phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries
local words = rsplit(phonemic, " +")
for i, word in ipairs(words) do
-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
word = rsub(word, "[ˌˈ]", "")
words[i] = word
end
-- There should be a syllable boundary between words.
phonemic = table.concat(words, ".")
return ulen(rsub(phonemic, "[^.]", "")) + 1
end
-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ")
end
local function split_syllabified_spelling(spelling)
return rsplit(spelling, "%.")
end
-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and
-- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we
-- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating
-- the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
local result = {}
local syll_chars = rsplit(decompose(syllab), "")
local spelling_chars = rsplit(decompose(spelling), "")
local i = 1
local j = 1
while i <= #syll_chars or j <= #spelling_chars do
local ci = syll_chars[i]
local cj = spelling_chars[j]
if ci == cj then
table.insert(result, ci)
i = i + 1
j = j + 1
elseif ci == "." then
table.insert(result, ci)
i = i + 1
elseif ci == AC or ci == GR or ci == CFLEX then
-- skip character
i = i + 1
else
-- non-matching character
return nil
end
end
if i <= #syll_chars or j <= #spelling_chars then
-- left-over characters on one side or the other
return nil
end
return unfc(table.concat(result))
end
local function generate_hyph_obj(term)
return {syllabification = term, hyph = split_syllabified_spelling(term)}
end
-- Word should already be decomposed.
local function word_has_vowels(word)
return rfind(word, V)
end
local function all_words_have_vowels(term)
local words = rsplit(decompose(term), "[ %-]")
for i, word in ipairs(words) do
-- Allow empty word; this occurs with prefixes and suffixes.
if word ~= "" and not word_has_vowels(word) then
return false
end
end
return true
end
local function should_generate_rhyme_from_respelling(term)
local words = rsplit(decompose(term), " +")
return #words == 1 and -- no if multiple words
not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. [[Austria-Hungría]])
not words[1]:find("%-$") and -- no if word is a prefix
not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix
word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter)
end
local function should_generate_rhyme_from_ipa(ipa)
return not ipa:find("%s") and word_has_vowels(decompose(ipa))
end
local function do_rhymes(rhymes, hyphs, parsed_respellings, rhyme_ret)
rhyme_ret.pronun = {}
for _, rhyme in ipairs(rhymes) do
local num_syl = rhyme.num_syl
local no_num_syl = false
-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from
-- the hyphenation.
if not num_syl then
num_syl = {}
for _, hyph in ipairs(hyphs) do
if should_generate_rhyme_from_respelling(hyph.syllabification) then
local this_num_syl = 1 + ulen(rsub(hyph.syllabification, "[^.]", ""))
m_table.insertIfNot(num_syl, this_num_syl)
else
no_num_syl = true
break
end
end
if no_num_syl or #num_syl == 0 then
num_syl = nil
end
end
-- If that fails and term is single-word, try to take it from the phonemic.
if not no_num_syl and not num_syl then
for _, parsed in ipairs(parsed_respellings) do
for dialect, pronun in pairs(parsed.pronun.pronun[dialect]) do
-- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given).
if pronun.phonemic then
if not should_generate_rhyme_from_ipa(pronun.phonemic) then
no_num_syl = true
break
end
-- Count number of syllables by looking at syllable boundaries (including stress marks).
local this_num_syl = get_num_syl_from_phonemic(pronun.phonemic)
m_table.insertIfNot(num_syl, this_num_syl)
end
end
if no_num_syl then
break
end
end
if no_num_syl or #num_syl == 0 then
num_syl = nil
end
end
table.insert(rhyme_ret.pronun, {
rhyme = rhyme.rhyme,
num_syl = num_syl,
qualifiers = rhyme.qualifiers,
})
end
end
local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma)
local retval = {}
if arg:find("<") then
if not put then
put = require(put_module)
end
local function get_valid_prefixes()
local valid_prefixes = {}
for param_mod, _ in pairs(param_mods) do
table.insert(valid_prefixes, param_mod)
end
table.insert(valid_prefixes, "q")
table.insert(valid_prefixes, "qq")
table.insert(valid_prefixes, "a")
table.insert(valid_prefixes, "aa")
table.sort(valid_prefixes)
return valid_prefixes
end
local segments = put.parse_balanced_segment_run(arg, "<", ">")
local comma_separated_groups =
no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments)
for _, group in ipairs(comma_separated_groups) do
local obj = generate_obj(group[1])
for j = 2, #group - 1, 2 do
if group[j + 1] ~= "" then
parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier")
end
local modtext = group[j]:match("^<(.*)>$")
if not modtext then
parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets")
end
local prefix, val = modtext:match("^([a-z]+):(.*)$")
if not prefix then
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. ":'"
end
parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..
m_table.serialCommaJoin(valid_prefixes))
end
if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
if not obj[prefix] then
obj[prefix] = {}
end
table.insert(obj[prefix], val)
elseif param_mods[prefix] then
local key = param_mods[prefix].item_dest or prefix
if obj[key] then
parse_err("Modifier '" .. prefix .. "' specified more than once")
end
local convert = param_mods[prefix].convert
if convert then
obj[key] = convert(val)
else
obj[key] = val
end
else
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. "'"
end
parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]
.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
end
end
table.insert(retval, obj)
end
elseif no_split_on_comma then
table.insert(retval, generate_obj(arg))
else
for _, term in ipairs(split_on_comma(arg)) do
table.insert(retval, generate_obj(term))
end
end
return retval
end
local function parse_rhyme(arg, put, parse_err)
local function generate_obj(term)
return {rhyme = term}
end
local param_mods = {
s = {
item_dest = "num_syl",
convert = function(arg)
local nsyls = rsplit(arg, ",")
for i, nsyl in ipairs(nsyls) do
if not nsyl:find("^[0-9]+$") then
parse_err("Number of syllables '" .. nsyl .. "' should be numeric")
end
nsyls[i] = tonumber(nsyl)
end
return nsyls
end,
},
}
return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end
local function parse_hyph(arg, put, parse_err)
-- None other than qualifiers
local param_mods = {}
return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods)
end
local function parse_homophone(arg, put, parse_err)
local function generate_obj(term)
return {term = term}
end
local param_mods = {
t = {
-- We need to store the <t:...> inline modifier into the "gloss" key of the parsed term,
-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
item_dest = "gloss",
},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
}
return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end
local function generate_audio_obj(arg)
local file, gloss
if arg:find("#") then
file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
else
file, gloss = arg:match("^(.-)%s*;%s*(.*)$")
end
if not file then
file = arg
gloss = "Audio"
end
return {file = file, gloss = gloss}
end
local function parse_audio(arg, put, parse_err)
-- None other than qualifiers
local param_mods = {}
-- Don't split on comma because some filenames have embedded commas not followed by a space
-- (typically followed by an underscore).
return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma")
end
-- External entry point for {{tl-pr}}.
function export.show_pr(frame)
local params = {
[1] = {list = true},
["rhyme"] = {},
["hyph"] = {},
["hmp"] = {},
["audio"] = {list = true},
["pagename"] = {},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local pagename = args.pagename or mw.title.getCurrentTitle().subpageText
-- Parse the arguments.
local respellings = #args[1] > 0 and args[1] or {"+"}
local parsed_respellings = {}
local function overall_parse_err(msg, arg, val)
error(msg .. ": " .. arg .. "= " .. val)
end
local overall_rhyme = args.rhyme and
parse_rhyme(args.rhyme, nil, function(msg) overall_parse_err(msg, "rhyme", args.rhyme) end) or nil
local overall_hyph = args.hyph and
parse_hyph(args.hyph, nil, function(msg) overall_parse_err(msg, "hyph", args.hyph) end) or nil
local overall_hmp = args.hmp and
parse_homophone(args.hmp, nil, function(msg) overall_parse_err(msg, "hmp", args.hmp) end) or nil
local overall_audio
if args.audio then
overall_audio = {}
for _, audio in ipairs(args.audio) do
local parsed_audio = parse_audio(audio, nil, function(msg) overall_parse_err(msg, "audio", audio) end)
if #parsed_audio > 1 then
error("Internal error: Saw more than one object returned from parse_audio")
end
table.insert(overall_audio, parsed_audio[1])
end
end
local put
for i, respelling in ipairs(respellings) do
local function parse_err(msg)
error(msg .. ": " .. i .. "= " .. respelling)
end
if respelling:find("<") then
if not put then
put = require(put_module)
end
local param_mods = {
pre = {},
post = {},
style = {},
bullets = {
convert = function(arg)
if not arg:find("^[0-9]+$") then
parse_err("Modifier 'bullets' should have a number as argument, but saw '" .. arg .. "'")
end
return tonumber(arg)
end,
},
rhyme = {
insert = true,
flatten = true,
convert = function(arg) return parse_rhyme(arg, put, parse_err) end,
},
hyph = {
insert = true,
flatten = true,
convert = function(arg) return parse_hyph(arg, put, parse_err) end,
},
hmp = {
insert = true,
flatten = true,
convert = function(arg) return parse_homophone(arg, put, parse_err) end,
},
audio = {
insert = true,
flatten = true,
convert = function(arg) return parse_audio(arg, put, parse_err) end,
},
}
local function get_valid_prefixes()
local valid_prefixes = {}
for param_mod, _ in pairs(param_mods) do
table.insert(valid_prefixes, param_mod)
end
table.insert(valid_prefixes, "ref")
table.insert(valid_prefixes, "q")
table.insert(valid_prefixes, "qq")
table.insert(valid_prefixes, "a")
table.insert(valid_prefixes, "aa")
table.sort(valid_prefixes)
return valid_prefixes
end
local segments = put.parse_balanced_segment_run(respelling, "<", ">")
local comma_separated_groups = put.split_alternating_runs_on_comma(segments, ",")
local parsed = {terms = {}, audio = {}, rhyme = {}, hyph = {}, hmp = {}}
for j, group in ipairs(comma_separated_groups) do
local termobj = parse_respelling(group[1], pagename, parse_err)
for k = 2, #group - 1, 2 do
if group[k + 1] ~= "" then
parse_err("Extraneous text '" .. group[k + 1] .. "' after modifier")
end
local modtext = group[k]:match("^<(.*)>$")
if not modtext then
parse_err("Internal error: Modifier '" .. group[k] .. "' isn't surrounded by angle brackets")
end
local prefix, arg = modtext:match("^([a-z]+):(.*)$")
if not prefix then
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. ":'"
end
parse_err("Modifier " .. group[k] .. " lacks a prefix, should begin with one of " ..
m_table.serialCommaJoin(valid_prefixes))
end
if prefix == "ref" or prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
if not termobj[prefix] then
termobj[prefix] = {}
end
table.insert(termobj[prefix], arg)
elseif param_mods[prefix] then
if j < #comma_separated_groups then
parse_err("Modifier '" .. prefix .. "' should occur after the last comma-separated term")
end
if not param_mods[prefix].insert and parsed[prefix] then
parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. group[k])
end
local converted
if param_mods[prefix].convert then
converted = param_mods[prefix].convert(arg)
else
converted = arg
end
if param_mods[prefix].insert then
if param_mods[prefix].flatten then
for _, obj in ipairs(converted) do
table.insert(parsed[prefix], obj)
end
else
table.insert(parsed[prefix], converted)
end
else
parsed[prefix] = converted
end
else
local valid_prefixes = get_valid_prefixes()
for i, valid_prefix in ipairs(valid_prefixes) do
valid_prefixes[i] = "'" .. valid_prefix .. "'"
end
parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[k]
.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
end
end
table.insert(parsed.terms, termobj)
end
if not parsed.bullets then
parsed.bullets = 1
end
table.insert(parsed_respellings, parsed)
else
local termobjs = {}
for _, term in ipairs(split_on_comma(respelling)) do
table.insert(termobjs, parse_respelling(term, pagename, parse_err))
end
table.insert(parsed_respellings, {
terms = termobjs,
audio = {},
rhyme = {},
hyph = {},
hmp = {},
bullets = 1,
})
end
end
if overall_hyph then
local hyphs = {}
for _, hyph in ipairs(overall_hyph) do
if hyph.syllabification == "+" then
hyph.syllabification = syllabify_from_spelling(pagename)
hyph.hyph = split_syllabified_spelling(hyph.syllabification)
elseif hyph.syllabification == "-" then
overall_hyph = {}
break
end
end
end
-- Loop over individual respellings, processing each.
for _, parsed in ipairs(parsed_respellings) do
parsed.pronun = generate_pronun(parsed)
local no_auto_rhyme = false
for _, term in ipairs(parsed.terms) do
if term.raw then
if not should_generate_rhyme_from_ipa(term.raw_phonemic or term.raw_phonetic) then
no_auto_rhyme = true
break
end
elseif not should_generate_rhyme_from_respelling(term.term) then
no_auto_rhyme = true
break
end
end
if #parsed.hyph == 0 then
if not overall_hyph and all_words_have_vowels(pagename) then
for _, term in ipairs(parsed.terms) do
if not term.raw then
local syllabification = syllabify_from_spelling(term.term)
local aligned_syll = align_syllabification_to_spelling(syllabification, pagename)
if aligned_syll then
m_table.insertIfNot(parsed.hyph, generate_hyph_obj(aligned_syll))
end
end
end
end
else
for _, hyph in ipairs(parsed.hyph) do
if hyph.syllabification == "+" then
hyph.syllabification = syllabify_from_spelling(pagename)
hyph.hyph = split_syllabified_spelling(hyph.syllabification)
elseif hyph.syllabification == "-" then
parsed.hyph = {}
break
end
end
end
-- Generate the rhymes.
local function do_rhyme(rhyme_ret)
for _, pronun in ipairs(parsed.pronun.pronun) do
if pronun.phonemic then
-- Count number of syllables by looking at syllable boundaries (including stress marks).
local num_syl = get_num_syl_from_phonemic(pronun.phonemic)
-- Get the rhyme by truncating everything up through the last stress mark + any following
-- consonants, and remove syllable boundary markers.
local rhyme = convert_phonemic_to_rhyme(pronun.phonemic)
local saw_already = false
for _, existing in ipairs(rhyme_ret.pronun) do
if existing.rhyme == rhyme then
saw_already = true
-- We already saw this rhyme but possibly with a different number of syllables,
m_table.insertIfNot(existing.num_syl, num_syl)
break
end
end
end
end
if #parsed.rhyme == 0 then
if overall_rhyme or no_auto_rhyme then
parsed.rhyme = nil
else
parsed.rhyme = do_rhymes
end
else
local no_rhyme = false
for _, rhyme in ipairs(parsed.rhyme) do
if rhyme.rhyme == "-" then
no_rhyme = true
break
end
end
end
end
if overall_rhyme then
local no_overall_rhyme = false
for _, orhyme in ipairs(overall_rhyme) do
if orhyme.rhyme == "-" then
no_overall_rhyme = true
break
end
end
if no_overall_rhyme then
overall_rhyme = nil
else
local all_hyphs
if overall_hyph then
all_hyphs = overall_hyph
else
all_hyphs = {}
for _, parsed in ipairs(parsed_respellings) do
for _, hyph in ipairs(parsed.hyph) do
m_table.insertIfNot(all_hyphs, hyph)
end
end
end
end
end
-- If all sets of pronunciations have the same rhymes, display them only once at the bottom.
-- Otherwise, display rhymes beneath each set, indented.
local first_rhyme_ret
local all_rhyme_sets_eq = true
for j, parsed in ipairs(parsed_respellings) do
if j == 1 then
first_rhyme_ret = parsed.rhyme
elseif not m_table.deepEquals(first_rhyme_ret, parsed.rhyme) then
all_rhyme_sets_eq = false
break
end
end
local function format_rhyme(rhyme_ret, num_bullets)
local pronunciations = {}
local rhymes = {}
for _, pronun in ipairs(parsed.pronun) do
table.insert(rhymes, pronun)
end
local data = {
lang = lang,
rhymes = rhymes,
qualifiers = tag and {tag} or nil,
force_cat = force_cat,
}
local bullet = string.rep("*", num_bullets) .. " "
local formatted = bullet .. require("Module:rhymes").format_rhyme(data)
local formatted_for_len_parts = {}
table.insert(formatted_for_len_parts, bullet .. "Rhymes: " .. (tag and "(" .. tag .. ") " or ""))
for j, pronun in ipairs(parsed.pronun) do
if j > 1 then
table.insert(formatted_for_len_parts, ", ")
end
if pronun.qualifiers then
table.insert(formatted_for_len_parts, "(" .. table.concat(pronun.qualifiers, ", ") .. ") ")
end
table.insert(formatted_for_len_parts, "-" .. pronun.rhyme)
end
return formatted, textual_len(table.concat(formatted_for_len_parts))
end
end
-- If all sets of pronunciations have the same hyphenations, display them only once at the bottom.
-- Otherwise, display hyphenations beneath each set, indented.
local first_hyphs
local all_hyph_sets_eq = true
for j, parsed in ipairs(parsed_respellings) do
if j == 1 then
first_hyphs = parsed.hyph
elseif not m_table.deepEquals(first_hyphs, parsed.hyph) then
all_hyph_sets_eq = false
break
end
end
local function format_hyphenations(hyphs, num_bullets)
local hyphtext = require("Module:hyphenation").format_hyphenations { lang = lang, hyphs = hyphs, caption = "Syllabification" }
return string.rep("*", num_bullets) .. " " .. hyphtext
end
-- If all sets of pronunciations have the same homophones, display them only once at the bottom.
-- Otherwise, display homophones beneath each set, indented.
local first_hmps
local all_hmp_sets_eq = true
for j, parsed in ipairs(parsed_respellings) do
if j == 1 then
first_hmps = parsed.hmp
elseif not m_table.deepEquals(first_hmps, parsed.hmp) then
all_hmp_sets_eq = false
break
end
end
local function format_homophones(hmps, num_bullets)
local hmptext = require("Module:homophones").format_homophones { lang = lang, homophones = hmps }
return string.rep("*", num_bullets) .. " " .. hmptext
end
local function format_audio(audios, num_bullets)
local ret = {}
for i, audio in ipairs(audios) do
-- FIXME! There should be a module for this.
local text = frame:expandTemplate {
title = "audio", args = {"tl", audio.file, audio.gloss }
}
if audio.q and audio.q[1] or audio.qq and audio.qq[1]
or audio.a and audio.a[1] or audio.aa and audio.aa[1] then
text = require("Module:pron qualifier").format_qualifiers(audio, text)
end
table.insert(ret, string.rep("*", num_bullets) .. " " .. text)
end
return table.concat(ret, "\n")
end
local textparts = {}
local min_num_bullets = 9999
for j, parsed in ipairs(parsed_respellings) do
if parsed.bullets < min_num_bullets then
min_num_bullets = parsed.bullets
end
if j > 1 then
table.insert(textparts, "\n")
end
table.insert(textparts, parsed.pronun.text)
if #parsed.audio > 0 then
table.insert(textparts, "\n")
-- If only one pronunciation set, add the audio with the same number of bullets, otherwise
-- indent audio by one more bullet.
table.insert(textparts, format_audio(parsed.audio,
#parsed_respellings == 1 and parsed.bullets or parsed.bullets + 1))
end
end
if overall_audio and #overall_audio > 0 then
table.insert(textparts, "\n")
table.insert(textparts, format_audio(overall_audio, min_num_bullets))
end
if overall_rhyme then
table.insert(textparts, "\n")
table.insert(textparts, format_rhyme(overall_rhyme, min_num_bullets))
end
if overall_hyph and #overall_hyph > 0 then
table.insert(textparts, "\n")
table.insert(textparts, format_hyphenations(overall_hyph, min_num_bullets))
end
if overall_hmp and #overall_hmp > 0 then
table.insert(textparts, "\n")
table.insert(textparts, format_homophones(overall_hmp, min_num_bullets))
end
return table.concat(textparts)
end
return export