Module:tl-pron/sandbox

This module sandbox lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
-- Based on [[Module:es-pronunc]] by Benwing2. 
-- Adaptation by TagaSanPedroAko.

local export = {}

local m_IPA = require("Module:IPA")
local m_table = require("Module:table")
local put_module = "Module:parse utilities"

local lang = require("Module:languages").getByCode("tl")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
local unfd = mw.ustring.toNFD
local unfc = mw.ustring.toNFC

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local MACRON = u(0x0304) -- macron 

local SYLDIV = u(0xFFF0)
local SYLDIV2 = u(0xFFF1)
local vowel = "aeiouAEIOU" -- vowel
local V = "[" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local stress_c = "[" .. AC .. GR .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local sylsep = "%-." .. SYLDIV -- hyphen included for syllabifying from spelling
local sylsep_c = "[" .. sylsep .. "]"
local wordsep = "# "
local separator_not_wordsep = accent .. ipa_stress .. sylsep
local separator = separator_not_wordsep .. wordsep
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]"
local C_OR_WORDSEP = "[^" .. vowel .. separator_not_wordsep .. "]" -- consonants or word separator

local unstressed_words = require("Module:table").listToSet({
	"ang", "sa", "nang", "si", "ni", "kay", -- case markers. "Nang" here is for written "ng", but can also work with nang as in the contraction na'ng and the conjunction "nang"
	"a", "ar", "ba", "bi", "da", "di", "e", "ef", "eks", "dyi", "jey", "key", "em", "ma", "en", "pi", "ra", "es", "ta", "ti", "u", "wa", "way", "ya", "yu", "zey", "zi", -- letter names (abakada and modern Filipino)
	"ko", "mo", "ka", --single-syllable personal pronouns
	"na",-- linker, also temporal particle
    "daw", "ga", "ha", "pa", -- particles
	"di7", "de7", -- negation words
	"may", -- single-syllable existential
	"pag", "kung", -- subordinating conjunctions
	"at", "o", -- coordinating conjunctions
	"hay", -- interjections
	"de", "del", "el", "la", "las", "los", -- in some Spanish-derived terms and names
	"-an", "-en", "-han", "hi-", "-hin", "hin-", "hing-", "-in", "mag-", "mang-", "pa-", "pag-", "pang-"-- affixes
})

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]
--

local function decompose(text)
	-- decompose everything but ñ and ü
	text = unfd(text)
	text = rsub(text, ".[" .. TILDE .. DIA .. "]", {
		["n" .. TILDE] = "ñ",
		["N" .. TILDE] = "Ñ",
		["u" .. DIA] = "ü",
		["U" .. DIA] = "Ü",
	})
	return text
end

local function split_on_comma(term)
	if term:find(",%s") then
		return require(put_module).split_on_comma(term)
	else
		return rsplit(term, ",")
	end
end

-- Remove any HTML from the formatted text and resolve links, since the extra characters don't contribute to the
-- displayed length.
local function convert_to_raw_text(text)
	text = rsub(text, "<.->", "")
	if text:find("%[%[") then
		text = require("Module:links").remove_links(text)
	end
	return text
end

-- Return the approximate displayed length in characters.
local function textual_len(text)
	return ulen(convert_to_raw_text(text))
end

-- Main syllable-division algorithm. Can be called either directly on spelling (when hyphenating) or after
-- non-trivial processing of respelling in the direction of pronunciation (when generating pronunciation).
local function syllabify_from_spelling_or_pronun(text, is_spelling)
    -- Part 1: Divide before the last consonant in a cluster of consonants between vowels 
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. V .. ")", "%1.%2")
	-- NOTE: When run on pronun, we have already eliminated c and v, but not when run on spelling.
	-- When run on pronun, don't include r, which at this point represents the trill.
	-- Don't divide ch, sh, and ph.
	text = rsub(text, "([cs])%.h", ".%1h")
	-- Don't divide ll or rr.
	text = rsub(text, "([lr])%.%1", ".%1%1")
	-- Don't divide ts where pronounced like "ch" ([[tsika]], [[tsaleko]], [[Tsina]]) or at end of word (e.g. [[gets]], [[tropatuts]]). This can be overriden when it's actually pronounced separately (e.g. [[tatsulok]])
	text = rsub(text, "t%.s", ".ts")

	-- Part 2: Divide double vowels  ([[saan]], [[leeg]], [[giit]], [[poot]]).

	text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)([aeiou])", "%1.%2")
	text = rsub_repeatedly(text, "([aeiouAEIOU]" .. accent_c .. "*)(" .. V .. stress_c .. ")", "%1.%2")

	return text
end

local function syllabify_from_spelling(text)
	text = decompose(text)
	-- start at FFF2 because FFF0 and FFF1 is used for SYLDIV and SYLDIV2
	-- Temporary replacements for characters we want treated as default consonants. The C and related consonant regexes
	-- treat all unknown characters as consonants.
	local TEMP_I = u(0xFFF2)
	local TEMP_U = u(0xFFF3)
	local TEMP_Y_CONS = u(0xFFF4)
	local TEMP_W_CONS = u(0xFFF5)
	local TEMP_QU = u(0xFFF6)
	local TEMP_QU_CAPS = u(0xFFF7)
	local TEMP_GU = u(0xFFF8)
	local TEMP_GU_CAPS = u(0xFFF9)

	-- Change user-specified  into SYLDIV so we don't shuffle it around when dividing into syllables.
	text = text:gsub("%.", SYLDIV)
	text = text:gsub("7", SYLDIV2)
	text = rsub(text, "y(" .. V .. ")", TEMP_Y_CONS .. "%1")
	text = rsub(text, "w(" .. V .. ")", TEMP_W_CONS .. "%1")

	text = syllabify_from_spelling_or_pronun(text, "is spelling")

	text = text:gsub(SYLDIV, ".")
	text = text:gsub(SYLDIV2, "7")
	text = text:gsub(TEMP_Y_CONS, "y")
	text = text:gsub(TEMP_W_CONS, "w")
	text = text:gsub(TEMP_QU, "qu")
	text = text:gsub(TEMP_QU_CAPS, "Qu")
	text = text:gsub(TEMP_GU, "gu")
	text = text:gsub(TEMP_GU_CAPS, "Gu")
	return text
end

-- Generate the IPA of a given respelling, where a respelling is the representation of the pronunciation of a given
-- Tagalog term using Tagalog spelling conventions
-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]

function export.IPA(text, phonetic)

	text = ulower(text or mw.title.getCurrentTitle().text)
	-- decompose everything but ñ and ü
	text = decompose(text)

	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	-- Make prefixes unstressed unless they have an explicit stress marker; also make certain
	-- monosyllabic words (e.g. [[ang]], [[ng]], [[si]], [[na]], etc.) without stress marks be
	-- unstressed.
	local words = rsplit(text, " ")
	for i, word in ipairs(words) do
		if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then
			-- add macron to the last vowel not the first one
			-- adding the macron after the 'u'
			words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. MACRON)
		end
	end
	text = table.concat(words, " ")
	-- Convert hyphens to spaces
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[!?']", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"

	-- Add glottal stop for words starting with vowel and double vowel
	text = rsub(text, "([#])([aeiou])", "%1ʔ%2")
	text = rsub(text, "([aeiou])([aeiou])", "%1ʔ%2")

	--determining whether "y" or "w" is a consonant or a vowel
	text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	text = rsub(text,"y([ˈˌ]?)([bdɡjklmnprstw])","i%1%2")
	text = rsub(text, "y#", "i")
	text = rsub(text, "w(" .. V .. ")","w%1")
	text = rsub(text,"w([ˈˌ]?)([bdɡjklmnprstw])","u%1%2")
	text = rsub(text, "w#","u")

	-- handle certain combinations; ch ng and sh handling needs to go first
	text = rsub(text, "ch", "ts") --not the real sound
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "sh", "ʃ")

	--x
	text = rsub(text, "x", "ks")

	--c, gü/gu+e or i, q
	text = rsub(text, "c([ie])", "s%1")
	text = rsub(text, "gü([ie])", "ɡw%1")
	text = rsub(text, "gu([ie])", "ɡ%1")
	text = rsub(text, "qu([ie])", "k%1")
	text = rsub(text, "ü", "u") 

	--alphabet-to-phoneme
	text = rsub(text, "[cfgjñqrvz7]",
	--["g"]="ɡ":  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{ ["c"] = "k", ["f"] = "p", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "ɲ", ["q"] = "k", ["r"] = "ɾ", ["v"] = "b", ["z"] = "s", ["7"] = "ʔ"})

	-- trill in rr
	text = rsub(text, "ɾɾ", "r")

    -- ts
	text = rsub(text, "ts", "ĉ") --not the real sound
	
	--syllable division
	text = syllabify_from_spelling_or_pronun(text, false)

	local accent_to_stress_mark = { [AC] = "ˈ", [CFLEX] = "ˈ", [GR] = " ", [MACRON] = "" }

	local function accent_word(word, syllables, last_word)
		-- Now stress the word. If any accent exists in the word (including macron indicating an unaccented word),
		-- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default
		-- stress rule.
		
		local stress_syllable = 0
		local last_accent = ""
		
		if rfind(word, accent_c) then
			for i = 1, #syllables do
				syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",
						function(pre, accent, post)
							last_accent = accent
							if last_accent == AC then
								stress_syllable = i	
							end
							return accent_to_stress_mark[accent] .. pre .. post
						end
				)
			end
			if last_accent == CFLEX then
				if last_word then
					syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3")
				end
				
				if stress_syllable == 0 then
					syllables[#syllables] = "ˈ" .. syllables[#syllables]
				end
			elseif last_accent == GR then
				if last_word then
					syllables[#syllables] = rsub(syllables[#syllables], "(.*)(" .. V .. ")([#|$]+)", "%1%2ʔ%3")
				end
				
				if stress_syllable ~= #syllables-1 then
					syllables[#syllables-1] = "ˈ" .. syllables[#syllables-1]
				end
			elseif last_accent == AC and stress_syllable == #syllables then
				force_elongate_final = true
			end
		else
			-- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress.
			if #syllables > 1 and rfind(word, "[^aeiouʔbcĉdfɡghjɟĵklmnñŋpqrɾstvwxz#]#") or #syllables == 1 and rfind(word, "[aeiou]") 		then
				syllables[#syllables] = "ˈ" .. syllables[#syllables]
			elseif #syllables >= 2  then
				syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
			end
		end
	end

	local words = rsplit(text, " ")
	for j, word in ipairs(words) do
		-- accentuation
		local syllables = rsplit(word, "%.")

			accent_word(word, syllables, j == #words)

		-- Reconstruct the word.
		words[j] = table.concat(syllables, ".")
		
		-- suppress syllable mark before IPA stress indicator
		words[j] = rsub(words[j], "%.(" .. ipa_stress_c .. ")", "%1")
		--make all primary stresses but the last one be secondary
		words[j] = rsub_repeatedly(words[j], "ˈ(.+)ˈ", "ˌ%1ˈ")
	end

	text = table.concat(words, " ")

    --remove "ɟ" and "w" inserted on vowel pair starting with "i" and "u"
    text = rsub(text,"([i])([ˈˌ]?)ɟ([aeou])","%1%2%3")
    text = rsub(text,"([u])([ˈˌ]?)w([aei])","%1%2%3")

    --add temporary macron for /a/, /i/ and /u/ in stressed syllables so they don't get replaced by unstressed form

	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([a])([ʔbdfɡiklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ā%6%7")
	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([i])([ʔbdfɡklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ī%6%7")
	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdfɡhĵɟklmnŋpɾrstw]?)([ɟlnɾst]?)([u])([ʔbdfɡiklmnŋpɾst]?)([bdɡklmnpɾst]?)","%1%2%3%4ū%6%7")

	--Corrections for diphthongs
   	text = rsub(text,"([aā])i","%1j") --ay
   	text = rsub(text,"([aā])u","%1w") --aw
	text = rsub(text,"([e])i","%1j") --ey
	text = rsub(text,"([iī])u","%1w") --iw
	text = rsub(text,"([o])i","%1j") --oy
	text = rsub(text,"([o])u","%1w") --ow
	text = rsub(text,"([uū])i","%1j") --uy (mostly in proper nouns)

	--phonetic transcription
	if phonetic then

        --Turn phonemic diphthongs to phonetic diphthongs

		text = rsub(text, "([aāeouū])j", "%1ɪ̯")
		text = rsub(text, "([aāeiīo])w", "%1ʊ̯")

        --change a, i, u to unstressed equivalents (certain forms to restore)
	    text = rsub(text,"a","ɐ")
	    text = rsub(text,"i","ɪ")
	    text = rsub(text,"u","ʊ")

        --Combine consonants (except H) followed by I/U and certain stressed vowels
	    text = rsub(text,"([bkdɡlmnpɾst])ɪ([ˈˌ])([āeoū])","%2%1ɟ%3")
	    text = rsub(text,"([bkdɡlmnpɾst])ʊ([ˈˌ])([āeīo])","%2%1w%3")

    	text = rsub(text,"([nŋ])([ˈˌ# .]*[bfpm])","m%2")
    	text = rsub(text,"([mŋ])([ˈˌ# .]*[dlst])","n%2")
	    text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([#]?)([ ]?)([ˈˌ#.])([k])([ɐāeɪīoʊū])","%1%2%3%4x%6") -- /k/ between vowels
	    text = rsub(text,"([ɐāeɪɪ̯īoʊʊ̯ū])([ˈˌ.])ɡ([ɐāeɪīoʊū])","%1%2ɣ%3") -- /ɡ/ between vowels
        text = rsub(text,"d([ˈˌ.])ɟ","%1ĵ") --/d/ before /j/
        text = rsub(text,"n([ˈˌ.])k","ŋ%1k") -- /n/ before /k/ (some proper nouns)
        text = rsub(text,"n([ˈˌ.])ɡ","ŋ%1ɡ") -- /n/ before /ɡ/ (some proper nouns and loanwords)
        text = rsub(text,"n([ˈˌ.])h","ŋ%1h") -- /n/ before /h/ (some proper nouns)
        text = rsub(text,"n([ˈˌ.])m","m%1m") -- /n/ before /m/
        text = rsub(text,"n([ˈˌ.])ɟ","%1ɲ") -- /n/ before /j/
        text = rsub(text,"s([ˈˌ.])ɟ","%1ʃ") -- /s/ before /j/
        text = rsub(text,"t([ˈˌ.])ɟ","%1ĉ") -- /t/ before /j/
        text = rsub(text,"t([ˈˌ.])s","%1ć") -- /t/ before /s/
        text = rsub(text,"([ˈˌ.])d([ɟj])([ɐāeɪīoʊū])","%1ĵ%3") -- /dj/ before any vowel following stress
        text = rsub(text,"([ˈˌ.])n([ɟj])([ɐāeɪīoʊū])","%1ɲ%3") -- /nj/ before any vowel following stress
        text = rsub(text,"([ˈˌ.])s([ɟj])([ɐāeɪīoʊū])","%1ʃ%3") -- /sj/ before any vowel following stress
        text = rsub(text,"([ˈˌ.])t([ɟj])([ɐāeɪīoʊū])","%1ĉ%3") -- /tj/ before any vowel following stress
        text = rsub(text,"([oʊ])([m])([ˈ]?)([pb])","u%2%3%4") -- /o/ and /ʊ/ before /mb/ or /mp/

        --final fix for phonetic diphthongs

	    text = rsub(text,"([ɐ])ɪ̯","aɪ̯") --ay
	    text = rsub(text,"([ɐ])ʊ̯","aʊ̯") --aw
	    text = rsub(text,"([ɪ])ʊ̯","iʊ̯") --iw

        --delete temporary macron in /a/, /i/ and /u/

	    text = rsub(text,"ā","a")
	    text = rsub(text,"ī","i")
	    text = rsub(text,"ū","u")

    end

    --delete temporary macron in /a/, /i/ and /u/

	    text = rsub(text,"ā","a")
	    text = rsub(text,"ī","i")
	    text = rsub(text,"ū","u")

	-- convert fake symbols to real ones
    local final_conversions = {
		["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch"
		["ɟ"] =  "j", -- fake "y" to real "y"
        ["ĵ"] = "d͡ʒ" -- fake "j" to real "j"
	}

    local final_conversions_phonetic = {
		["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch"
		["ć"] = "t͡s", -- fake "t.s" to real "t.s"
		["ɟ"] =  "j", -- fake "y" to real "y"
        ["ĵ"] = "d͡ʒ" -- fake "j" to real "j"
	}

	if phonetic then
	text = rsub(text, "[ĉćɟĵ]", final_conversions_phonetic)
    	end
	text = rsub(text, "[ĉɟĵ]", final_conversions)

	-- remove # symbols at word and text boundaries
	text = rsub(text, "#([.]?)", "")
	
	-- resuppress syllable mark before IPA stress indicator
	text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")

	text = unfc(text)

	local ret = {
		text = text,
	}
	return ret
end

-- For bot usage; {{#invoke:tl-pr|IPA_string|SPELLING|phonetic=PHONETIC}}
-- where
--
--   SPELLING is the word or respelling to generate pronunciation for;

function export.IPA_string(frame)
	local iparams = {
		[1] = {},
		["phonetic"] = {type = "boolean"},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)
	local retval = export.IPA(iargs[1], iargs.phonetic)
	return retval.text
end

-- The PRONUN table has the following form for the full phonemic/phonetic pronunciation:
--
-- {
--   phonemic = "PHONEMIC",
--   phonetic = "PHONETIC",
--   differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...},
-- }
--
-- Here, `phonemic` is the phonemic pronunciation (displayed as /.../) and `phonetic` is the phonetic pronunciation
-- (displayed as [...]).
--
-- The PRONUN table has the following form for the rhyme pronunciation:
--
-- {
--   rhyme = "RHYME_PRONUN",
--   num_syl = {NUM, NUM, ...},
--   qualifiers = nil or {QUALIFIER, QUALIFIER, ...},
--   differences = {FLAG = BOOLEAN, FLAG = BOOLEAN, ...},
-- }
--
-- Here, `rhyme` is a phonemic pronunciation such as "an" for [saan]], and `num_syl` is a list of the possible numbers of syllables for the term(s) that have this rhyme
-- (e.g. {2} for [[saan]], {3} for [[paraan]] and {4,} for [[makiraan]]. `num_syl` is used to generate syllable-count categories such as
-- [[Category:Rhymes:Tagalog/an/4 syllables]] in addition to [[Category:Rhymes:Tagalog/an]]. `num_syl` may be nil to
-- suppress the generation of syllable-count categories; this is typically the case with multiword terms.
-- `qualifiers`, if non-nil, comes from the user using the syntax.
--

local function generate_pronun(args)
	
	local ret = {
		pronun = {},
	}

	local function format_pron (tag)
		local pronunciations = {}
		local formatted_pronuns = {}

		local function ins(formatted_part)
			table.insert(formatted_pronuns, formatted_part)
		end

		-- Loop through each pronunciation. For each one, add the phonemic and phonetic versions to `pronunciations`,
		-- for formatting by [[Module:IPA]], and also create an approximation of the formatted version so that we can
	    do
			-- Add tag to left qualifiers if first one
			-- FIXME: Consider using accent qualifier for the tag instead.
			local qs = pronun.q
			if j == 1 and tag then
				if qs then
					qs = m_table.deepcopy(qs)
					table.insert(qs, tag)
				else
					qs = {tag}
				end
			end

			local first_pronun = #pronunciations + 1

			if not pronun.phonemic and not pronun.phonetic then
				error("Internal error: Saw neither phonemic nor phonetic pronunciation")
			end

			if pronun.phonemic then -- missing if 'raw:[...]' given
				-- don't display syllable division markers in phonemic
				local slash_pron = "/" .. pronun.phonemic:gsub("%.", "") .. "/"
				table.insert(pronunciations, {
					pron = slash_pron,
				})
				ins(slash_pron)
			end

			if pronun.phonetic then -- missing if 'raw:/.../' given
				local bracket_pron = "[" .. pronun.phonetic .. "]"
				table.insert(pronunciations, {
					pron = bracket_pron,
				})
				ins(bracket_pron)
			end

			local last_pronun = #pronunciations

			if qs then
				pronunciations[first_pronun].q = qs
			end
			if pronun.a then
				pronunciations[first_pronun].a = pronun.a
			end
			if j > 1 then
				pronunciations[first_pronun].separator = ", "
				ins(", ")
			end
			if pronun.qq then
				pronunciations[last_pronun].qq = pronun.qq
			end
			if pronun.aa then
				pronunciations[last_pronun].aa = pronun.aa
			end
			if qs or pronun.a or pronun.qq or pronun.aa then
				local data = {
					q = qs,
					a = pronun.a,
					qq = pronun.qq,
					aa = pronun.aa
				}
				-- Note: This inserts the actual formatted qualifier text, including HTML and such, but the later call
				-- to textual_len() removes all HTML and reduces links.
				ins(require("Module:pron qualifier").format_qualifiers(data, ""))
			end

			if pronun.refs then
				pronunciations[last_pronun].refs = pronun.refs
				-- Approximate the reference using a footnote notation. This will be slightly inaccurate if there are
				-- more than nine references but that is rare.
				ins(string.rep("[1]", #pronun.refs))
			end
			if first_pronun ~= last_pronun then
				pronunciations[last_pronun].separator = " "
				ins(" ")
			end
		end

		local bullet = string.rep("*", args.bullets) .. " "
		-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML
		-- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing
		-- the toggle box with the "more" button on the right.
		local pre = is_first and args.pre and args.pre .. " " or ""
		local post = is_first and args.post and " " .. args.post or ""
		local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations, separator = "" } .. post
		local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns) .. post
		return formatted, textual_len(formatted_for_len)
	end
	
	return ret

end


local function parse_respelling(respelling, pagename, parse_err)
	local raw_respelling = respelling:match("^raw:(.*)$")
	if raw_respelling then
		local raw_phonemic, raw_phonetic = raw_respelling:match("^/(.*)/ %[(.*)%]$")
		if not raw_phonemic then
			raw_phonemic = raw_respelling:match("^/(.*)/$")
		end
		if not raw_phonemic then
			raw_phonetic = raw_respelling:match("^%[(.*)%]$")
		end
		if not raw_phonemic and not raw_phonetic then
			parse_err(("Unable to parse raw respelling '%s', should be one of /.../, [...] or /.../ [...]")
				:format(raw_respelling))
		end
		return {
			raw = true,
			raw_phonemic = raw_phonemic,
			raw_phonetic = raw_phonetic,
		}
	end
	if respelling == "+" then
		respelling = pagename
	end
	return {term = respelling}
end

-- External entry point for {{tl-IPA}}.
function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
		["post"] = {},
		["ref"] = {},
		["bullets"] = {type = "number", default = 1},
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local text = args[1] or mw.title.getCurrentTitle().text
	args.terms = {{term = text}}
	local ret = generate_pronun(args)
	return ret.text
end

-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no
-- hyphens.
local function get_num_syl_from_phonemic(phonemic)
	-- Maybe we should just count vowels instead of the below code.
	phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries
	local words = rsplit(phonemic, " +")
	for i, word in ipairs(words) do
		-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
		word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
		word = rsub(word, "[ˌˈ]", "")
		words[i] = word
	end
	-- There should be a syllable boundary between words.
	phonemic = table.concat(words, ".")
	return ulen(rsub(phonemic, "[^.]", "")) + 1
end

-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
	-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
	-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
	return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ")
end

local function split_syllabified_spelling(spelling)
	return rsplit(spelling, "%.")
end

-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and
-- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we
-- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating
-- the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
	local result = {}
	local syll_chars = rsplit(decompose(syllab), "")
	local spelling_chars = rsplit(decompose(spelling), "")
	local i = 1
	local j = 1
	while i <= #syll_chars or j <= #spelling_chars do
		local ci = syll_chars[i]
		local cj = spelling_chars[j]
		if ci == cj then
			table.insert(result, ci)
			i = i + 1
			j = j + 1
		elseif ci == "." then
			table.insert(result, ci)
			i = i + 1
		elseif ci == AC or ci == GR or ci == CFLEX then
			-- skip character
			i = i + 1
		else
			-- non-matching character
			return nil
		end
	end
	if i <= #syll_chars or j <= #spelling_chars then
		-- left-over characters on one side or the other
		return nil
	end
	return unfc(table.concat(result))
end

local function generate_hyph_obj(term)
	return {syllabification = term, hyph = split_syllabified_spelling(term)}
end

-- Word should already be decomposed.
local function word_has_vowels(word)
	return rfind(word, V)
end

local function all_words_have_vowels(term)
	local words = rsplit(decompose(term), "[ %-]")
	for i, word in ipairs(words) do
		-- Allow empty word; this occurs with prefixes and suffixes.
		if word ~= "" and not word_has_vowels(word) then
			return false
		end
	end
	return true
end

local function should_generate_rhyme_from_respelling(term)
	local words = rsplit(decompose(term), " +")
	return #words == 1 and -- no if multiple words
		not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. [[Asya-Pasipiko]])
		not words[1]:find("%-$") and -- no if word is a prefix
		not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix
		word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter)
end

local function should_generate_rhyme_from_ipa(ipa)
	return not ipa:find("%s") and word_has_vowels(decompose(ipa))
end

local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma)
	local retval = {}

	if arg:find("<") then
		if not put then
			put = require(put_module)
		end

		local function get_valid_prefixes()
			local valid_prefixes = {}
			for param_mod, _ in pairs(param_mods) do
				table.insert(valid_prefixes, param_mod)
			end
			table.insert(valid_prefixes, "q")
			table.insert(valid_prefixes, "qq")
			table.insert(valid_prefixes, "a")
			table.insert(valid_prefixes, "aa")
			table.sort(valid_prefixes)
			return valid_prefixes
		end

		local segments = put.parse_balanced_segment_run(arg, "<", ">")
		local comma_separated_groups =
			no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments)
		for _, group in ipairs(comma_separated_groups) do
			local obj = generate_obj(group[1])
			for j = 2, #group - 1, 2 do
				if group[j + 1] ~= "" then
					parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier")
				end
				local modtext = group[j]:match("^<(.*)>$")
				if not modtext then
					parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets")
				end
				local prefix, val = modtext:match("^([a-z]+):(.*)$")
				if not prefix then
					local valid_prefixes = get_valid_prefixes()
					for i, valid_prefix in ipairs(valid_prefixes) do
						valid_prefixes[i] = "'" .. valid_prefix .. ":'"
					end
					parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..
						m_table.serialCommaJoin(valid_prefixes))
				end
				if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
					if not obj[prefix] then
						obj[prefix] = {}
					end
					table.insert(obj[prefix], val)
				elseif param_mods[prefix] then
					local key = param_mods[prefix].item_dest or prefix
					if obj[key] then
						parse_err("Modifier '" .. prefix .. "' specified more than once")
					end
					local convert = param_mods[prefix].convert
					if convert then
						obj[key] = convert(val)
					else
						obj[key] = val
					end
				else
					local valid_prefixes = get_valid_prefixes()
					for i, valid_prefix in ipairs(valid_prefixes) do
						valid_prefixes[i] = "'" .. valid_prefix .. "'"
					end
					parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]
						.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
				end
			end
			table.insert(retval, obj)
		end
	elseif no_split_on_comma then
		table.insert(retval, generate_obj(arg))
	else
		for _, term in ipairs(split_on_comma(arg)) do
			table.insert(retval, generate_obj(term))
		end
	end

	return retval
end


local function parse_rhyme(arg, put, parse_err)
	local function generate_obj(term)
		return {rhyme = term}
	end
	local param_mods = {
		s = {
			item_dest = "num_syl",
			convert = function(arg)
				local nsyls = rsplit(arg, ",")
				for i, nsyl in ipairs(nsyls) do
					if not nsyl:find("^[0-9]+$") then
						parse_err("Number of syllables '" .. nsyl .. "' should be numeric")
					end
					nsyls[i] = tonumber(nsyl)
				end
				return nsyls
			end,
		},
	}

	return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end


local function parse_hyph(arg, put, parse_err)
	-- None other than qualifiers
	local param_mods = {}

	return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods)
end


local function parse_homophone(arg, put, parse_err)
	local function generate_obj(term)
		return {term = term}
	end
	local param_mods = {
		t = {
			-- We need to store the <t:...> inline modifier into the "gloss" key of the parsed term,
			-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
			item_dest = "gloss",
		},
		gloss = {},
		pos = {},
		alt = {},
		lit = {},
		id = {},
		g = {
			-- We need to store the <g:...> inline modifier into the "genders" key of the parsed term,
			-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
			item_dest = "genders",
			convert = function(arg)
				return rsplit(arg, ",")
			end,
		},
	}

	return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end


local function generate_audio_obj(arg)
	local file, gloss
	if arg:find("#") then
		file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
	else
		file, gloss = arg:match("^(.-)%s*;%s*(.*)$")
	end
	if not file then
		file = arg
		gloss = "Audio"
	end
	return {file = file, gloss = gloss}
end


local function parse_audio(arg, put, parse_err)
	-- None other than qualifiers
	local param_mods = {}

	-- Don't split on comma because some filenames have embedded commas not followed by a space
	-- (typically followed by an underscore).
	return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma")
end


-- External entry point for {{tl-pr}}.
-- External entry point for {{tl-IPA}}.
function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
		["post"] = {},
		["ref"] = {},
		["bullets"] = {type = "number", default = 1},
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local text = args[1] or mw.title.getCurrentTitle().text
	args.terms = {{term = text}}
	local ret = generate_pronun(args)
	return ret.text
end


-- Return the number of syllables of a phonemic representation, which should have syllable dividers in it but no
-- hyphens.
local function get_num_syl_from_phonemic(phonemic)
	-- Maybe we should just count vowels instead of the below code.
	phonemic = rsub(phonemic, "|", " ") -- remove IPA foot boundaries
	local words = rsplit(phonemic, " +")
	for i, word in ipairs(words) do
		-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
		word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
		word = rsub(word, "[ˌˈ]", "")
		words[i] = word
	end
	-- There should be a syllable boundary between words.
	phonemic = table.concat(words, ".")
	return ulen(rsub(phonemic, "[^.]", "")) + 1
end


-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
	-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
	-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
	return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^[^" .. vowel .. "]*", ""):gsub("%.", ""):gsub("t͡ʃ", "tʃ")
end


local function split_syllabified_spelling(spelling)
	return rsplit(spelling, "%.")
end


-- "Align" syllabification to original spelling by matching character-by-character, allowing for extra syllable and
-- accent markers in the syllabification. If we encounter an extra syllable marker (.), we allow and keep it. If we
-- encounter an extra accent marker in the syllabification, we drop it. In any other case, we return nil indicating
-- the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
	local result = {}
	local syll_chars = rsplit(decompose(syllab), "")
	local spelling_chars = rsplit(decompose(spelling), "")
	local i = 1
	local j = 1
	while i <= #syll_chars or j <= #spelling_chars do
		local ci = syll_chars[i]
		local cj = spelling_chars[j]
		if ci == cj then
			table.insert(result, ci)
			i = i + 1
			j = j + 1
		elseif ci == "." then
			table.insert(result, ci)
			i = i + 1
		elseif ci == AC or ci == GR or ci == CFLEX then
			-- skip character
			i = i + 1
		else
			-- non-matching character
			return nil
		end
	end
	if i <= #syll_chars or j <= #spelling_chars then
		-- left-over characters on one side or the other
		return nil
	end
	return unfc(table.concat(result))
end


local function generate_hyph_obj(term)
	return {syllabification = term, hyph = split_syllabified_spelling(term)}
end


-- Word should already be decomposed.
local function word_has_vowels(word)
	return rfind(word, V)
end


local function all_words_have_vowels(term)
	local words = rsplit(decompose(term), "[ %-]")
	for i, word in ipairs(words) do
		-- Allow empty word; this occurs with prefixes and suffixes.
		if word ~= "" and not word_has_vowels(word) then
			return false
		end
	end
	return true
end


local function should_generate_rhyme_from_respelling(term)
	local words = rsplit(decompose(term), " +")
	return #words == 1 and -- no if multiple words
		not words[1]:find(".%-.") and -- no if word is composed of hyphenated parts (e.g. [[Austria-Hungría]])
		not words[1]:find("%-$") and -- no if word is a prefix
		not (words[1]:find("^%-") and words[1]:find(CFLEX)) and -- no if word is an unstressed suffix
		word_has_vowels(words[1]) -- no if word has no vowels (e.g. a single letter)
end


local function should_generate_rhyme_from_ipa(ipa)
	return not ipa:find("%s") and word_has_vowels(decompose(ipa))
end


local function do_rhymes(rhymes, hyphs, parsed_respellings, rhyme_ret)
	rhyme_ret.pronun = {}
	for _, rhyme in ipairs(rhymes) do
		local num_syl = rhyme.num_syl
		local no_num_syl = false

		-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from
		-- the hyphenation.
		if not num_syl then
			num_syl = {}
			for _, hyph in ipairs(hyphs) do
				if should_generate_rhyme_from_respelling(hyph.syllabification) then
					local this_num_syl = 1 + ulen(rsub(hyph.syllabification, "[^.]", ""))
					m_table.insertIfNot(num_syl, this_num_syl)
				else
					no_num_syl = true
					break
				end
			end
			if no_num_syl or #num_syl == 0 then
				num_syl = nil
			end
		end

		-- If that fails and term is single-word, try to take it from the phonemic.
		if not no_num_syl and not num_syl then
			for _, parsed in ipairs(parsed_respellings) do
				for dialect, pronun in pairs(parsed.pronun.pronun[dialect]) do
					-- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given).
					if pronun.phonemic then
						if not should_generate_rhyme_from_ipa(pronun.phonemic) then
							no_num_syl = true
							break
						end
						-- Count number of syllables by looking at syllable boundaries (including stress marks).
						local this_num_syl = get_num_syl_from_phonemic(pronun.phonemic)
						m_table.insertIfNot(num_syl, this_num_syl)
					end
				end
				if no_num_syl then
					break
				end
			end
			if no_num_syl or #num_syl == 0 then
				num_syl = nil
			end
		end

		table.insert(rhyme_ret.pronun, {
			rhyme = rhyme.rhyme,
			num_syl = num_syl,
			qualifiers = rhyme.qualifiers,
		})
	end
end


local function parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods, no_split_on_comma)
	local retval = {}

	if arg:find("<") then
		if not put then
			put = require(put_module)
		end

		local function get_valid_prefixes()
			local valid_prefixes = {}
			for param_mod, _ in pairs(param_mods) do
				table.insert(valid_prefixes, param_mod)
			end
			table.insert(valid_prefixes, "q")
			table.insert(valid_prefixes, "qq")
			table.insert(valid_prefixes, "a")
			table.insert(valid_prefixes, "aa")
			table.sort(valid_prefixes)
			return valid_prefixes
		end

		local segments = put.parse_balanced_segment_run(arg, "<", ">")
		local comma_separated_groups =
			no_split_on_comma and {segments} or put.split_alternating_runs_on_comma(segments)
		for _, group in ipairs(comma_separated_groups) do
			local obj = generate_obj(group[1])
			for j = 2, #group - 1, 2 do
				if group[j + 1] ~= "" then
					parse_err("Extraneous text '" .. group[j + 1] .. "' after modifier")
				end
				local modtext = group[j]:match("^<(.*)>$")
				if not modtext then
					parse_err("Internal error: Modifier '" .. group[j] .. "' isn't surrounded by angle brackets")
				end
				local prefix, val = modtext:match("^([a-z]+):(.*)$")
				if not prefix then
					local valid_prefixes = get_valid_prefixes()
					for i, valid_prefix in ipairs(valid_prefixes) do
						valid_prefixes[i] = "'" .. valid_prefix .. ":'"
					end
					parse_err("Modifier " .. group[j] .. " lacks a prefix, should begin with one of " ..
						m_table.serialCommaJoin(valid_prefixes))
				end
				if prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
					if not obj[prefix] then
						obj[prefix] = {}
					end
					table.insert(obj[prefix], val)
				elseif param_mods[prefix] then
					local key = param_mods[prefix].item_dest or prefix
					if obj[key] then
						parse_err("Modifier '" .. prefix .. "' specified more than once")
					end
					local convert = param_mods[prefix].convert
					if convert then
						obj[key] = convert(val)
					else
						obj[key] = val
					end
				else
					local valid_prefixes = get_valid_prefixes()
					for i, valid_prefix in ipairs(valid_prefixes) do
						valid_prefixes[i] = "'" .. valid_prefix .. "'"
					end
					parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[j]
						.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
				end
			end
			table.insert(retval, obj)
		end
	elseif no_split_on_comma then
		table.insert(retval, generate_obj(arg))
	else
		for _, term in ipairs(split_on_comma(arg)) do
			table.insert(retval, generate_obj(term))
		end
	end

	return retval
end


local function parse_rhyme(arg, put, parse_err)
	local function generate_obj(term)
		return {rhyme = term}
	end
	local param_mods = {
		s = {
			item_dest = "num_syl",
			convert = function(arg)
				local nsyls = rsplit(arg, ",")
				for i, nsyl in ipairs(nsyls) do
					if not nsyl:find("^[0-9]+$") then
						parse_err("Number of syllables '" .. nsyl .. "' should be numeric")
					end
					nsyls[i] = tonumber(nsyl)
				end
				return nsyls
			end,
		},
	}

	return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end


local function parse_hyph(arg, put, parse_err)
	-- None other than qualifiers
	local param_mods = {}

	return parse_pron_modifier(arg, put, parse_err, generate_hyph_obj, param_mods)
end


local function parse_homophone(arg, put, parse_err)
	local function generate_obj(term)
		return {term = term}
	end
	local param_mods = {
		t = {
			-- We need to store the <t:...> inline modifier into the "gloss" key of the parsed term,
			-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
			item_dest = "gloss",
		},
		gloss = {},
		pos = {},
		alt = {},
		lit = {},
		id = {},
	}

	return parse_pron_modifier(arg, put, parse_err, generate_obj, param_mods)
end


local function generate_audio_obj(arg)
	local file, gloss
	if arg:find("#") then
		file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
	else
		file, gloss = arg:match("^(.-)%s*;%s*(.*)$")
	end
	if not file then
		file = arg
		gloss = "Audio"
	end
	return {file = file, gloss = gloss}
end


local function parse_audio(arg, put, parse_err)
	-- None other than qualifiers
	local param_mods = {}

	-- Don't split on comma because some filenames have embedded commas not followed by a space
	-- (typically followed by an underscore).
	return parse_pron_modifier(arg, put, parse_err, generate_audio_obj, param_mods, "no split on comma")
end


-- External entry point for {{tl-pr}}.
function export.show_pr(frame)
	local params = {
		[1] = {list = true},
		["rhyme"] = {},
		["hyph"] = {},
		["hmp"] = {},
		["audio"] = {list = true},
		["pagename"] = {},
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local pagename = args.pagename or mw.title.getCurrentTitle().subpageText

	-- Parse the arguments.
	local respellings = #args[1] > 0 and args[1] or {"+"}
	local parsed_respellings = {}
	local function overall_parse_err(msg, arg, val)
		error(msg .. ": " .. arg .. "= " .. val)
	end
	local overall_rhyme = args.rhyme and
		parse_rhyme(args.rhyme, nil, function(msg) overall_parse_err(msg, "rhyme", args.rhyme) end) or nil
	local overall_hyph = args.hyph and
		parse_hyph(args.hyph, nil, function(msg) overall_parse_err(msg, "hyph", args.hyph) end) or nil
	local overall_hmp = args.hmp and
		parse_homophone(args.hmp, nil, function(msg) overall_parse_err(msg, "hmp", args.hmp) end) or nil
	local overall_audio
	if args.audio then
		overall_audio = {}
		for _, audio in ipairs(args.audio) do
			local parsed_audio = parse_audio(audio, nil, function(msg) overall_parse_err(msg, "audio", audio) end)
			if #parsed_audio > 1 then
				error("Internal error: Saw more than one object returned from parse_audio")
			end
			table.insert(overall_audio, parsed_audio[1])
		end
	end
	local put

	for i, respelling in ipairs(respellings) do
		local function parse_err(msg)
			error(msg .. ": " .. i .. "= " .. respelling)
		end
		if respelling:find("<") then
			if not put then
				put = require(put_module)
			end

			local param_mods = {
				pre = {},
				post = {},
				style = {},
				bullets = {
					convert = function(arg)
						if not arg:find("^[0-9]+$") then
							parse_err("Modifier 'bullets' should have a number as argument, but saw '" .. arg .. "'")
						end
						return tonumber(arg)
					end,
				},
				rhyme = {
					insert = true,
					flatten = true,
					convert = function(arg) return parse_rhyme(arg, put, parse_err) end,
				},
				hyph = {
					insert = true,
					flatten = true,
					convert = function(arg) return parse_hyph(arg, put, parse_err) end,
				},
				hmp = {
					insert = true,
					flatten = true,
					convert = function(arg) return parse_homophone(arg, put, parse_err) end,
				},
				audio = {
					insert = true,
					flatten = true,
					convert = function(arg) return parse_audio(arg, put, parse_err) end,
				},
			}

			local function get_valid_prefixes()
				local valid_prefixes = {}
				for param_mod, _ in pairs(param_mods) do
					table.insert(valid_prefixes, param_mod)
				end
				table.insert(valid_prefixes, "ref")
				table.insert(valid_prefixes, "q")
				table.insert(valid_prefixes, "qq")
				table.insert(valid_prefixes, "a")
				table.insert(valid_prefixes, "aa")
				table.sort(valid_prefixes)
				return valid_prefixes
			end

			local segments = put.parse_balanced_segment_run(respelling, "<", ">")
			local comma_separated_groups = put.split_alternating_runs_on_comma(segments, ",")
			local parsed = {terms = {}, audio = {}, rhyme = {}, hyph = {}, hmp = {}}
			for j, group in ipairs(comma_separated_groups) do
				local termobj = parse_respelling(group[1], pagename, parse_err)
				for k = 2, #group - 1, 2 do
					if group[k + 1] ~= "" then
						parse_err("Extraneous text '" .. group[k + 1] .. "' after modifier")
					end
					local modtext = group[k]:match("^<(.*)>$")
					if not modtext then
						parse_err("Internal error: Modifier '" .. group[k] .. "' isn't surrounded by angle brackets")
					end
					local prefix, arg = modtext:match("^([a-z]+):(.*)$")
					if not prefix then
						local valid_prefixes = get_valid_prefixes()
						for i, valid_prefix in ipairs(valid_prefixes) do
							valid_prefixes[i] = "'" .. valid_prefix .. ":'"
						end
						parse_err("Modifier " .. group[k] .. " lacks a prefix, should begin with one of " ..
							m_table.serialCommaJoin(valid_prefixes))
					end
					if prefix == "ref" or prefix == "q" or prefix == "qq" or prefix == "a" or prefix == "aa" then
						if not termobj[prefix] then
							termobj[prefix] = {}
						end
						table.insert(termobj[prefix], arg)
					elseif param_mods[prefix] then
						if j < #comma_separated_groups then
							parse_err("Modifier '" .. prefix .. "' should occur after the last comma-separated term")
						end
						if not param_mods[prefix].insert and parsed[prefix] then
							parse_err("Modifier '" .. prefix .. "' occurs twice, second occurrence " .. group[k])
						end
						local converted
						if param_mods[prefix].convert then
							converted = param_mods[prefix].convert(arg)
						else
							converted = arg
						end
						if param_mods[prefix].insert then
							if param_mods[prefix].flatten then
								for _, obj in ipairs(converted) do
									table.insert(parsed[prefix], obj)
								end
							else
								table.insert(parsed[prefix], converted)
							end
						else
							parsed[prefix] = converted
						end
					else
						local valid_prefixes = get_valid_prefixes()
						for i, valid_prefix in ipairs(valid_prefixes) do
							valid_prefixes[i] = "'" .. valid_prefix .. "'"
						end
						parse_err("Unrecognized prefix '" .. prefix .. "' in modifier " .. group[k]
							.. ", should be " .. m_table.serialCommaJoin(valid_prefixes))
					end
				end
				table.insert(parsed.terms, termobj)
			end
			if not parsed.bullets then
				parsed.bullets = 1
			end
			table.insert(parsed_respellings, parsed)
		else
			local termobjs = {}
			for _, term in ipairs(split_on_comma(respelling)) do
				table.insert(termobjs, parse_respelling(term, pagename, parse_err))
			end
			table.insert(parsed_respellings, {
				terms = termobjs,
				audio = {},
				rhyme = {},
				hyph = {},
				hmp = {},
				bullets = 1,
			})
		end
	end

	if overall_hyph then
		local hyphs = {}
		for _, hyph in ipairs(overall_hyph) do
			if hyph.syllabification == "+" then
				hyph.syllabification = syllabify_from_spelling(pagename)
				hyph.hyph = split_syllabified_spelling(hyph.syllabification)
			elseif hyph.syllabification == "-" then
				overall_hyph = {}
				break
			end
		end
	end

	-- Loop over individual respellings, processing each.
	for _, parsed in ipairs(parsed_respellings) do
		parsed.pronun = generate_pronun(parsed)
		local no_auto_rhyme = false
		for _, term in ipairs(parsed.terms) do
			if term.raw then
				if not should_generate_rhyme_from_ipa(term.raw_phonemic or term.raw_phonetic) then
					no_auto_rhyme = true
					break
				end
			elseif not should_generate_rhyme_from_respelling(term.term) then
				no_auto_rhyme = true
				break
			end
		end

		if #parsed.hyph == 0 then
			if not overall_hyph and all_words_have_vowels(pagename) then
				for _, term in ipairs(parsed.terms) do
					if not term.raw then
						local syllabification = syllabify_from_spelling(term.term)
						local aligned_syll = align_syllabification_to_spelling(syllabification, pagename)
						if aligned_syll then
							m_table.insertIfNot(parsed.hyph, generate_hyph_obj(aligned_syll))
						end
					end
				end
			end
		else
			for _, hyph in ipairs(parsed.hyph) do
				if hyph.syllabification == "+" then
					hyph.syllabification = syllabify_from_spelling(pagename)
					hyph.hyph = split_syllabified_spelling(hyph.syllabification)
				elseif hyph.syllabification == "-" then
					parsed.hyph = {}
					break
				end
			end
		end

		-- Generate the rhymes.
		local function do_rhyme(rhyme_ret)
			for _, pronun in ipairs(parsed.pronun.pronun) do
				if pronun.phonemic then
					-- Count number of syllables by looking at syllable boundaries (including stress marks).
					local num_syl = get_num_syl_from_phonemic(pronun.phonemic)
					-- Get the rhyme by truncating everything up through the last stress mark + any following
					-- consonants, and remove syllable boundary markers.
					local rhyme = convert_phonemic_to_rhyme(pronun.phonemic)
					local saw_already = false
					for _, existing in ipairs(rhyme_ret.pronun) do
						if existing.rhyme == rhyme then
							saw_already = true
							-- We already saw this rhyme but possibly with a different number of syllables,
							m_table.insertIfNot(existing.num_syl, num_syl)
							break
						end
					end
				end
			end

		if #parsed.rhyme == 0 then
			if overall_rhyme or no_auto_rhyme then
				parsed.rhyme = nil
			else
				parsed.rhyme = do_rhymes
			end
		else
			local no_rhyme = false
			for _, rhyme in ipairs(parsed.rhyme) do
				if rhyme.rhyme == "-" then
					no_rhyme = true
					break
				end
			end
		end
	end

	if overall_rhyme then
		local no_overall_rhyme = false
		for _, orhyme in ipairs(overall_rhyme) do
			if orhyme.rhyme == "-" then
				no_overall_rhyme = true
				break
			end
		end
		if no_overall_rhyme then
			overall_rhyme = nil
		else
			local all_hyphs
			if overall_hyph then
				all_hyphs = overall_hyph
			else
				all_hyphs = {}
				for _, parsed in ipairs(parsed_respellings) do
					for _, hyph in ipairs(parsed.hyph) do
						m_table.insertIfNot(all_hyphs, hyph)
					end
				end
			end
		end
	end

	-- If all sets of pronunciations have the same rhymes, display them only once at the bottom.
	-- Otherwise, display rhymes beneath each set, indented.
	local first_rhyme_ret
	local all_rhyme_sets_eq = true
	for j, parsed in ipairs(parsed_respellings) do
		if j == 1 then
			first_rhyme_ret = parsed.rhyme
		elseif not m_table.deepEquals(first_rhyme_ret, parsed.rhyme) then
			all_rhyme_sets_eq = false
			break
		end
	end

	local function format_rhyme(rhyme_ret, num_bullets)
		local pronunciations = {}
		local rhymes = {}
		for _, pronun in ipairs(parsed.pronun) do
			table.insert(rhymes, pronun)
		end
		local data = {
			lang = lang,
			rhymes = rhymes,
			qualifiers = tag and {tag} or nil,
			force_cat = force_cat,
		}
		local bullet = string.rep("*", num_bullets) .. " "
		local formatted = bullet .. require("Module:rhymes").format_rhyme(data)
		local formatted_for_len_parts = {}
		table.insert(formatted_for_len_parts, bullet .. "Rhymes: " .. (tag and "(" .. tag .. ") " or ""))
		for j, pronun in ipairs(parsed.pronun) do
			if j > 1 then
				table.insert(formatted_for_len_parts, ", ")
			end
			if pronun.qualifiers then
				table.insert(formatted_for_len_parts, "(" .. table.concat(pronun.qualifiers, ", ") .. ") ")
			end
			table.insert(formatted_for_len_parts, "-" .. pronun.rhyme)
	    end
		return formatted, textual_len(table.concat(formatted_for_len_parts))
	    end
    end

	-- If all sets of pronunciations have the same hyphenations, display them only once at the bottom.
	-- Otherwise, display hyphenations beneath each set, indented.
	local first_hyphs
	local all_hyph_sets_eq = true
	for j, parsed in ipairs(parsed_respellings) do
		if j == 1 then
			first_hyphs = parsed.hyph
		elseif not m_table.deepEquals(first_hyphs, parsed.hyph) then
			all_hyph_sets_eq = false
			break
		end
	end

	local function format_hyphenations(hyphs, num_bullets)
		local hyphtext = require("Module:hyphenation").format_hyphenations { lang = lang, hyphs = hyphs, caption = "Syllabification" }
		return string.rep("*", num_bullets) .. " " .. hyphtext
	end

	-- If all sets of pronunciations have the same homophones, display them only once at the bottom.
	-- Otherwise, display homophones beneath each set, indented.
	local first_hmps
	local all_hmp_sets_eq = true
	for j, parsed in ipairs(parsed_respellings) do
		if j == 1 then
			first_hmps = parsed.hmp
		elseif not m_table.deepEquals(first_hmps, parsed.hmp) then
			all_hmp_sets_eq = false
			break
		end
	end

	local function format_homophones(hmps, num_bullets)
		local hmptext = require("Module:homophones").format_homophones { lang = lang, homophones = hmps }
		return string.rep("*", num_bullets) .. " " .. hmptext
	end

	local function format_audio(audios, num_bullets)
		local ret = {}
		for i, audio in ipairs(audios) do
			-- FIXME! There should be a module for this.
			local text = frame:expandTemplate {
				title = "audio", args = {"tl", audio.file, audio.gloss }
			}
			if audio.q and audio.q[1] or audio.qq and audio.qq[1]
				or audio.a and audio.a[1] or audio.aa and audio.aa[1] then
				text = require("Module:pron qualifier").format_qualifiers(audio, text)
			end
			table.insert(ret, string.rep("*", num_bullets) .. " " .. text)
		end
		return table.concat(ret, "\n")
	end

	local textparts = {}
	local min_num_bullets = 9999
	for j, parsed in ipairs(parsed_respellings) do
		if parsed.bullets < min_num_bullets then
			min_num_bullets = parsed.bullets
		end
		if j > 1 then
			table.insert(textparts, "\n")
		end
		table.insert(textparts, parsed.pronun.text)
		if #parsed.audio > 0 then
			table.insert(textparts, "\n")
			-- If only one pronunciation set, add the audio with the same number of bullets, otherwise
			-- indent audio by one more bullet.
			table.insert(textparts, format_audio(parsed.audio,
				#parsed_respellings == 1 and parsed.bullets or parsed.bullets + 1))
		end
	end
	if overall_audio and #overall_audio > 0 then
		table.insert(textparts, "\n")
		table.insert(textparts, format_audio(overall_audio, min_num_bullets))
	end
	if overall_rhyme then
		table.insert(textparts, "\n")
		table.insert(textparts, format_rhyme(overall_rhyme, min_num_bullets))
	end
	if overall_hyph and #overall_hyph > 0 then
		table.insert(textparts, "\n")
		table.insert(textparts, format_hyphenations(overall_hyph, min_num_bullets))
	end
	if overall_hmp and #overall_hmp > 0 then
		table.insert(textparts, "\n")
		table.insert(textparts, format_homophones(overall_hmp, min_num_bullets))
	end

	return table.concat(textparts)
end

return export
Module:tl-pron/sandbox

Navigation menu

Search