Jump to content

Module:la-pronunc

From Wiktionary, the free dictionary

This module is not to be directly used. It is used by {{la-IPA}}, see there for usage.


local export = {}

local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local lang = require("Module:languages").getByCode("la")

local concat = table.concat
local deep_equals = m_table.deepEquals
local gsplit = m_str_utils.gsplit
local insert = table.insert
local invert = m_table.invert
local list_to_set = m_table.listToSet
local remove = table.remove
local rfind = m_str_utils.find
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local umatch = m_str_utils.match

local MACRON = u(0x304) -- ̄
local BREVE = u(0x306) -- ̆
local TREMA = u(0x308) -- ̈
local LENGTH = MACRON .. "?" .. BREVE .. "?" .. TREMA .. "?"
local TIE = u(0x361) -- ͡
local VOWELS = "aeɛiɪoɔuʊyʏ"
local VOWEL = "[" .. VOWELS .. "]"

local TILDE = u(0x303) -- ̃
local HALF_LONG = "ˑ"
local LONG = "ː"

local letters_ipa = {
	["a"] = "a", ["e"] = "e", ["i"] = "i", ["o"] = "o", ["u"] = "u", ["y"] = "y",
	["ā"] = "aː", ["ē"] = "eː", ["ī"] = "iː", ["ō"] = "oː", ["ū"] = "uː", ["ȳ"] = "yː",
	["ae"] = "ae̯", ["au"] = "au̯", ["ei"] = "ei̯", ["eu"] = "eu̯", ["oe"] = "oe̯", ["ou"] = "uː",
	["b"] = "b", ["d"] = "d", ["f"] = "f",
	["c"] = "k", ["g"] = "ɡ", ["q"] = "k", ["v"] = "w", ["x"] = "ks",
	["ph"] = "pʰ", ["th"] = "tʰ", ["ch"] = "kʰ", ["rh"] = "r",
	["qw"] = "kʷ", ["gw"] = "ɡʷ", ["sw"] = "sʷ",
	["'"] = "ˈ", ["ˈ"] = "ˈ",
}

local letters_ipa_eccl = {
	["a"] = "a", ["e"] = "e", ["i"] = "i", ["o"] = "o", ["u"] = "u", ["y"] = "i",
	["ā"] = "aː", ["ē"] = "eː", ["ī"] = "iː", ["ō"] = "oː", ["ū"] = "uː", ["ȳ"] = "iː",
	["ae"] = "eː", ["au"] = "au̯", ["ei"] = "ei̯", ["eu"] = "eu̯", ["oe"] = "eː", ["ou"] = "uː",
	["b"] = "b", ["d"] = "d", ["f"] = "f",
	["k"] = "q", -- dirty hack to make sure k isn't palatalized
	["c"] = "k", ["g"] = "ɡ", ["v"] = "v", ["x"] = "ks",
	["ph"] = "f", ["th"] = "tʰ", ["ch"] = "kʰ", ["rh"] = "r",
	["qw"] = "kʷ", ["gw"] = "ɡʷ", ["sw"] = "sʷ",
	["h"] = "",
	["'"] = "ˈ", ["ˈ"] = "ˈ",
}

local lax_vowel = {
	["e"] = "ɛ",
	["i"] = "ɪ",
	["o"] = "ɔ",
	["u"] = "ʊ",
	["y"] = "ʏ",
}

local tense_vowel = m_table.invert(lax_vowel)

local voicing = {
	["p"] = "b",
	["t"] = "d",
	["k"] = "ɡ",
}

local devoicing = m_table.invert(voicing)

local phonetic_rules = {

	-- Bibliography included at the end

	-- Assimilation of [g] to [ŋ] before a following /n/
	{"ɡ([.ˈ]*)n", "ŋ%1n"},
	-- Per Allen (1978: 23), although note the reservations expressed on the next page.

	-- Assimilation of word-internal /n/ and /m/ to following consonants. Exception: /m/ does not assimilate to a following /n/.
	{"[mn]([.ˈ]*)([kɡ])", "ŋ%1%2"},
	{"m([.ˈ]*)([td])", "n%1%2"},
	{"n([.ˈ]*)([mpb])", "m%1%2"},
		-- Per George M. Lane: “Nasals changed their place of articulation to that of the following consonant. Thus, dental n before the labials p and b became the labial m... labial m before the gutturals c and g became guttural n...labial m before the dentals t, d, s became dental n…” (§164.3); “One nasal, n, is assimilated to another, m...but an m before n is never assimilated..." (§166.5).		-- Per Lloyd (1987: 84): “The opposition between nasals was neutralized in syllable-final position, with the realization of the nasality being assimilated to the point of articulation of the following consonant, e.g., [m] is found only before labials, [n] only before dentals or alveolars, and [ŋ] only before velars and /n/."
		-- Potential addition: assimilation of final /m/ and /n/ across word boundaries, per e.g. Allen (1987: 28, 31).
	
	-- No additional labialization before high back vowels
	{"ʷ%f[uʊ]", ""},
	
	-- Tensing of short vowels before another vowel
	{
		"(" .. VOWEL .. ")([.ˈ]+[h]?)%f" .. VOWEL,
		function (v, following)
			return (tense_vowel[v] or v) .. following
		end,
	},

	-- But not before consonantal glides
	{"e([iu]̯)", "ɛ%1"},

	-- Nasal vowels
	{
		"(" .. VOWEL .. ")m$",
		function (v)
			return (lax_vowel[v] or v) .. TILDE .. HALF_LONG
		end,
	},
	{
		"(" .. VOWEL .. ")[nm]([.ˈ]*[fs])",
		function (v, following)
			return (tense_vowel[v] or v) .. TILDE .. LONG .. following
		end,
	},
	
	-- Realization of /r/ as a tap
		-- Pultrová (2013) argues for Latin /r/ being an alveolar tap.
		-- Lloyd (1987: 81) agrees: “The /r/ was doubtlessly an alveolar flap."
		-- Allen (1978: 33) expresses doubt: “By the classical period there is no reason to think that the sound had not strengthened to the trill described by later writers.”
		-- Unconditional [r] transcription is preferable to unconditional [ɾ] per 18 September 2021 discussion at Module_talk:la-pronunc#Transcription_of_Latin's_rhotic_consonant
		-- No consensus yet on how to implement conditional allophony of [r] vs. [ɾ]
		
	-- Voicing and loss of intervocalic /h/.
	{"([^ˈ].)h", "%1(ɦ)"},
	-- Per Allen (1978: 43–45).

	-- Phonetic (as opposed to lexical/phonemic) assimilations
		-- Place
			-- First because this accounts for 'atque' seemingly escaping total assimilation (and 'adque' presumably not)
	{"d([.ˈ]*s%f[" .. VOWELS .. "ptk])", "s%1"}, -- leave [t] out since etsi has [ts], not [sː]
	{"s[^ː]([.ˈ]*)s%f[ptk]", "s(ː)%1"},
	{"st([.ˈ]+)([^" .. VOWELS .. "])", "s(t)%1%2"},

	{"d([.ˈ]+)([pkɡln])", "%2%1%2"}, --leave [r] out since dr does not assimilate, even when heterosyllabic (e.g. quadrans), except in prefixed words
	{"b([.ˈ]+)([mf])", "%2%1%2"},
	{"s([.ˈ]+)(f)", "%2%1%2"},

	-- Regressive voicing assimilation in consonant clusters
	{
		"([bdɡ])([.ˈ]*)%f[ptksf]",
		function (consonant, following)
			return (devoicing[consonant] or consonant) .. following
		end,
	},
	{
		"([ptk])([.ˈ]*)%f[bdɡ]",
		function (consonant, following)
			return (voicing[consonant] or consonant) .. following
		end,
	},

	-- Allophones of /l/
	{"l", "ɫ̪"},
		-- “Pinguis”. Dark/velarized.
		-- Per Weiss (2009: 117): “…pinguis (velar). l is exīlis before i and when geminate, otherwise l is pinguis.”
		-- Page 82: “…l is pinguis even before e, e.g. Herculēs < Hercolēs … < Hercelēs …”
		-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-; l pinguis occurred before any other vowel; before any consonant except l; and in word-final position […] l pinguis actually had two degrees of avoirdupois, being fatter before a consonant than before a vowel…” 
		-- Page 41: “…velarized l (that is, ‘l pinguis’)…”
		-- Sen (2015: §2) states that /l/ was velarized in word-final position or before consonants–other than another /l/–and that it had varying degrees of “dark resonance (velarization in articulatory terms)” (p. 23) before e, a, o, and u (p. 33).
		-- Both Sen and Sihler indicate different degrees of velarization, depending on the environment. IPA lacks a way to represent these gradations, unfortunately.
	{"ɫ̪([.ˈ]*)ɫ̪", "l%1lʲ"},
	{"ɫ̪([.ˈ]*[iɪyʏ])", "lʲ%1"},
		-- “Exīlis”. Not dark/velarized. Possibly palatalized.
		-- Per Sen (2015: 29): It is plausible […] that simple onset /l/ was palatalized before /i/, thus [lʲ] […] it seems likely that geminate /ll/ was also palatalized, given the similar behaviour of the two…”
		-- Per Weiss (2009: 82): “In Latin, l developed…a non-velar (possibly palatal) allophone called exīlis before i and when geminate…”
		-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-.”
		-- Per Sihler (2000: §133.1): "It is less clear whether the 'thin' lateral [i.e. L exilis] was specifically palatal, or palatalized, or only neutral."
		-- Giannini and Marotta apparently argue that it was not palatalized (https://i.imgur.com/ytM1QDn.png). I do not have access to the book in question.

	-- Retracted /s/
	{"s", "s̠"},
		-- Lloyd (1987: 80–81) expresses some uncertainty about this, but appears to overall be in favour of it: “…the evidence that the apico-alveolar pronunciation was ancient in Latin and inherited from Indo-European is quite strong.”
		-- Per Zampaulo (2019: 93), “…in many instances, Latin s was likely pronounced as an apical segment [s̺] (rather than laminal [s])."
		-- Per Widdison (1987: 64), "In all, it would be fair to state that the apico-alveolar [ś] articulation represented the main allophonic variant of Latin and possibly IE /s/..."

	-- dental Z
	{"z", "z̪"},

	-- Dental articulations
	{"[td]", "%0̪"},
	{"n([.ˈ]*[td])", "n̪%1"}, --it's not as clear as for the stops

	--Allophones of A
	{"a", "ä"},

	-- Works cited
		-- Allen, William Sidney. 1978. Vox Latina: A Guide to the pronunciation of Classical Latin.
		-- Lane, George M. A Latin grammar for schools and colleges.
		-- Lloyd, Paul M. 1987. From Latin to Spanish.
		-- Pultrová, Lucie. 2013. On the phonetic nature of the Latin R.
		-- Sen, Ranjan. 2015. Syllable and segment in Latin.
		-- Sihler, Andrew L. 1995. New comparative grammar of Greek and Latin.
		-- Sihler, Andrew L. 2000. Language history: An introduction.
		-- Weiss, Michael. 2009. Outline of the historical and comparative grammar of Latin.
		-- Widdison, Kirk A. 16th century Spanish sibilant reordering: Reasons for divergence.
		-- Zampaulo, André. 2019. Palatal Sound Change in the Romance languages: Diachronic and Synchronic Perspectives.
}

local phonetic_rules_eccl = {
	-- Specifically the Roman Ecclesiastical for singing from the Liber Usualis

	{"([aɛeiɔou][ːˑ.ˈ]*)s([.ˈ]*)%f[aɛeiɔou]", "%1s̬%2"}, --partial voicing of s between vowels
	{"s([.ˈ]*)%f[bdɡmnlv]", "z%1"}, --full voicing of s before voiced consonants
	{"ek([.ˈ]*)s([aɛeiɔoubdɡmnlv])", "eɡ%1z%2"}, --voicing of the prefix ex-
	{"kz", "ɡz"}, --i give up, without this /ksˈl/ gives [kzˈl]

	-- Tapped R intervocalically and in complex onset
	-- ^ Citation needed for this being the case in Ecclesiastical pronunciation
	-- {"([aɛeiɔou][ːˑ.ˈ]+)r([aɛeiɔou]?)", "%1ɾ%2"},
	-- {"([fbdɡptk])r", "%1ɾ"},
	
	{"a", "ä"}, --a is open and central per 17 September 2021 discussion at Template_talk:la-IPA#Ecclesiastical_a
	-- /e/ and /o/ realization is phonetic but handled in convert_word below as it is sensitive to stress

	-- Dental articulations
	{"([ln])([.ˈ]*[td][^͡])", "%1̪%2"}, --assimilation of n to dentality. 
	--Note that the quality of n might not be dental otherwise--it may be alveolar in most contexts in Italian, according to Wikipedia.
	{"([td])([^͡])", "%1̪%2"}, --t and d are dental, except as the first element of a palatal affricate
	{"t͡s", "t̪͡s̪"}, -- dental affricates
	{"d͡z", "d̪͡z̪"}, --dental affricates
	{"t̪([.ˈ]*t͡ʃ)", "t%1"},
	{"d̪([.ˈ]*d͡ʒ)", "d%1"},

	--end of words
	{"([ln])t$", "%1̪t̪"},
	{"([td])$", "%1̪"},

	--Partial assimilation of l and n before palatal affricates, as in Italian
	{"([ln])([.ˈ]*t͡ʃ)", "%1̠ʲ%2"},
	{"([ln])([.ˈ]*d͡ʒ)", "%1̠ʲ%2"},
	{"([ln])([.ˈ]*ʃ)", "%1̠ʲ%2"},

	-- other coda nasal assimilation, full and partial. Per Canepari, only applies to /n/ and not to /m/
	{"n([.ˈ]*[kɡ])", "ŋ%1"},
	{"n([.ˈ]*[fv])", "ɱ%1"},
}

local lengthen_vowel = {
	["a"] = "aː", ["aː"] = "aː",
	["ɛ"] = "ɛː", ["ɛː"] = "ɛː",
	["e"] = "eː", ["eː"] = "eː",
	["i"] = "iː", ["iː"] = "iː",
	["ɔ"] = "ɔː", ["ɔː"] = "ɔː",
	["o"] = "oː", ["oː"] = "oː",
	["u"] = "uː", ["uː"] = "uː",
	["au̯"] = "aːu̯",
	["ɛu̯"] = "ɛːu̯",
	["eu̯"] = "eːu̯",
}

local vowels = list_to_set{
	"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
	"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
	"ae̯", "au̯", "ei̯", "eu̯", "oe̯", "ou̯",
}


local onsets = {
	"p", "pʰ", "b",
	"t", "tʰ", "d",
	"k", "kʰ", "kʷ", "ɡ", "ɡʷ",
	"s", "sʷ", "z", "f", "v", "h",
	"t͡s", "d͡z", "t͡ʃ", "d͡ʒ", "ʃ",
	"l", "r",
	"m", "n", "ɲ",
	"j", "w",
	
	"pl", "pʰl", "bl",
	"kl", "kʰl", "ɡl",
	"fl",
	
	"pr", "pʰr", "br",
	"tr", "tʰr", "dr",
	"kr", "kʰr", "ɡr",
	"fr",
}
for i = 1, #onsets do
	local v = onsets[i]
	if umatch(v, "^[ptk][ʰʷ]*[lr]?$") then
		insert(onsets, "s" .. v)
	end
end
onsets = list_to_set(onsets)

local codas = list_to_set{
	"p", "pʰ", "b",
	"t", "tʰ", "d",
	"k", "kʰ", "ɡ",
	"s", "z", "f",
	"ʃ",
	"l", "r",
	"m", "n", "ɲ",
	"j",

	"ps", "ts", "ks",

	"sp", "st", "sk",
	"spʰ", "stʰ", "skʰ",

	"lp", "lpʰ", "lb", "lps",
	"lt", "ltʰ", "ld",
	"lk", "lkʰ", "lɡ", "lks",
	"ls",
	"lm", "ln", "lms", "lns",

	"rp", "rpʰ", "rb", "rps",
	"rt", "rtʰ", "rd",
	"rk", "rkʰ", "rɡ", "rks",
	"rs",
	"rl", "rls",
	"rm", "rn", "rms", "rns",

	"mp", "mpʰ", "mb", "mps",
	"nt", "ntʰ", "nd",
	"nk", "nkʰ", "nɡ", "nks",
	"ns",
}

-- Prefixes that end in a consonant; can be patterns. Occurrences of such
-- prefixes + i + vowel cause the i to convert to j (to suppress this, add a
-- dot, i.e. syllable boundary, after the i).
local cons_ending_prefixes = {
	"a[bd]", "circum", "con", "dis", "ex", "inter", "in", "ob", "per",
	"subter", "sub", "super", "tr[aā]ns"
}

local macrons_to_breves = {
	["ā"] = "ă",
	["ē"] = "ĕ",
	["ī"] = "ĭ",
	["ō"] = "ŏ",
	["ū"] = "ŭ",
	-- Unicode doesn't have breve-y
	["ȳ"] = "y" .. BREVE,
}

local function normalize_ligatures(ligature, diacritic)
	return (ligature == "æ" and "a" or "o") .. diacritic .. "e"
end

-- NOTE: Everything is lowercased very early on, so we don't have to worry
-- about capitalized letters.
-- FIXME: handle ǟë̄ï̄ȫǖÿ̄ etc.
local short_vowels_string = "aeiouyăĕĭŏŭäëïöüÿ" -- no breve-y in Unicode
local long_vowels_string = "āēīōūȳ"
local vowels_string = short_vowels_string .. long_vowels_string
local vowels_c = "[" .. vowels_string .. "]"
local non_vowels_c = "[^" .. vowels_string .. "]"

local function track(page)
	require("Module:debug/track")("la-pronunc/" .. page)
	return true
end

local function remove_diacritic(word, ch)
	return toNFC((ugsub(toNFD(word), ch, "")))
end

local function letters_to_ipa(word, phonetic, eccl)
	local phonemes = {}
	
	local dictionary = eccl and letters_ipa_eccl or letters_ipa
	
	while ulen(word) > 0 do
		local longestmatch = ""
		
		for letter, ipa in pairs(dictionary) do
			if ulen(letter) > ulen(longestmatch) and usub(word, 1, ulen(letter)) == letter then
				longestmatch = letter
			end
		end
		
		if ulen(longestmatch) > 0 then
			if dictionary[longestmatch] == "ks" then
				insert(phonemes, "k")
				insert(phonemes, "s")
			else
				insert(phonemes, dictionary[longestmatch])
			end
			word = usub(word, ulen(longestmatch) + 1)
		else
			insert(phonemes, usub(word, 1, 1))
			word = usub(word, 2)
		end
	end
	
	if eccl then for i = 1, #phonemes do
		local prev, cur, nxt = phonemes[i - 1], phonemes[i], phonemes[i + 1]
		if nxt and (cur == "k" or cur == "ɡ") and rfind(nxt, "^[eɛi][ːˑ]*$") then
			if cur == "ɡ" then
				cur = "d͡ʒ"
				if prev == "ɡ" then
					prev = "d"
				end
			elseif prev == "s" then --and ((not phonemes[i - 2]) or phonemes[i - 2] ~= "k")
				prev, cur = "ʃ", "ʃ"
			else
				cur = "t͡ʃ"
				if prev == "k" then
					prev = "t"
				end
			end
		end
		-- dirty hack to make sure k isn't palatalized
		if cur == "q" then
			cur = "k"
		end
		if cur == "t" and nxt == "i" and not (prev == "s" or prev == "t") and vowels[phonemes[i + 2]] then
			cur = "t͡s"
		end
		if cur == "z" then
			if nxt == "z" then
				cur, nxt = "d", "d͡z"
			else
				cur = "d͡z"
			end
		end
		if cur == "kʰ" or cur == "tʰ" then
			cur = cur:gsub("ʰ$", "")
		end
		if cur == "ɡ" and nxt == "n" then
			cur, nxt = "ɲ", "ɲ"
		end
		phonemes[i - 1], phonemes[i], phonemes[i + 1] = prev, cur, nxt
	end end
	
	return phonemes
end


local function get_onset(syll)
	local consonants = {}
	
	for i = 1, #syll do
		if vowels[syll[i]] then
			break
		end
		if syll[i] ~= "ˈ" then
			insert(consonants, syll[i])
		end
	end
	
	return concat(consonants)
end


local function get_coda(syll)
	local consonants = {}
	
	for i = #syll, 1, -1 do
		if vowels[syll[i]] then
			break
		end
		
		insert(consonants, 1, syll[i])
	end
	
	return concat(consonants)
end


local function get_vowel(syll)
	for i = 1,#syll do
		if vowels[syll[i]] then return syll[i] end
	end
end


-- Split the word into syllables of CV shape
local function split_syllables(remainder)
	local syllables, syll = {}, {}

	for _, phoneme in ipairs(remainder) do
		if phoneme == "." then
			if #syll > 0 then
				insert(syllables, syll)
				syll = {}
			end
			-- Insert a special syllable consisting only of a period.
			-- We remove it later but it forces no movement of consonants across
			-- the period.
			insert(syllables, {"."})
		elseif phoneme == "ˈ" then
			if #syll > 0 then
				insert(syllables,syll)
			end
			syll = {"ˈ"}
		elseif vowels[phoneme] then
			insert(syll, phoneme)
			insert(syllables, syll)
			syll = {}
		else
			insert(syll, phoneme)
		end
	end
	
	-- If there are phonemes left, then the word ends in a consonant.
	-- Add another syllable for them, which will get joined the preceding
	-- syllable down below.
	if #syll > 0 then
		insert(syllables, syll)
	end
	
	-- Split consonant clusters between syllables
	for i, current in ipairs(syllables) do
		if #current == 1 and current[1] == "." then
			-- If the current syllable is just a period (explicit syllable
			-- break), remove it. The loop will then skip the next syllable,
			-- which will prevent movement of consonants across the syllable
			-- break (since movement of consonants happens from the current
			-- syllable to the previous one).
			remove(syllables, i)
		elseif i > 1 then
			local previous = syllables[i - 1]
			local onset = get_onset(current)
			-- Shift over consonants until the syllable onset is valid
			while not (onset == "" or onsets[onset]) do
				insert(previous, remove(current, 1))
				onset = get_onset(current)
			end
			
			-- If the preceding syllable still ends with a vowel,
			-- and the current one begins with s + another consonant, then shift it over.
			if get_coda(previous) == "" and (current[1] == "s" and not vowels[current[2]]) then
				insert(previous, remove(current, 1))
			end
			
			-- Check if there is no vowel at all in this syllable. That
			-- generally happens either (1) with an explicit syllable division
			-- specified, like 'cap.ra', which will get divided into the syllables
			-- [ca], [p], [.], [ra]; or (2) at the end of a word that ends with
			-- one or more consonants. We move the consonants onto the preceding
			-- syllable, then remove the resulting empty syllable. If the
			-- new current syllable is [.], remove it, too. The loop will then
			-- skip the next syllable, which will prevent movement of consonants
			-- across the syllable break (since movement of consonants happens
			-- from the current syllable to the previous one).
			if not get_vowel(current) then
				for j=1,#current do
					insert(previous, remove(current, 1))
				end
				remove(syllables, i)
				if syllables[i] and #syllables[i] == 1 and syllables[i][1] == "." then
					remove(syllables, i)
				end
			end
		end
	end
	
	for i, syll in ipairs(syllables) do
		local onset = get_onset(syll)
		local coda = get_coda(syll)
		
		if not (onset == "" or onsets[onset]) then
			track("bad onset")
			--error("onset error:[" .. onset .. "]")
		end
		
		if not (coda == "" or codas[coda]) then
			track("bad coda")
			--error("coda error:[" .. coda .. "]")
		end
	end
	
	return syllables
end

local function phoneme_is_short_vowel(phoneme)
	return rfind(phoneme, "^[aɛeiɔouy]$")
end

local function detect_accent(syllables, is_prefix, is_suffix)
	-- Manual override
	for i=1,#syllables do
		for j=1,#syllables[i] do
			if syllables[i][j] == "ˈ" then
				remove(syllables[i],j)
				return i
			end
		end
	end
	-- Prefixes have no accent.
	if is_prefix then
		return -1
	end
	-- Suffixes have an accent only if the stress would be on the suffix when the
	-- suffix is part of a word. Don't get tripped up by the first syllable being
	-- nonsyllabic (e.g. in -rnus).
	if is_suffix then
		local syllables_with_vowel = #syllables - (get_vowel(syllables[1]) and 0 or 1)
		if syllables_with_vowel < 2 then
			return -1
		end
		if syllables_with_vowel == 2 then
			local penult = syllables[#syllables - 1]
			if phoneme_is_short_vowel(penult[#penult]) then
				return -1
			end
		end
	end
	-- Detect accent placement
	if #syllables > 2 then
		-- Does the penultimate syllable end in a single vowel?
		local penult = syllables[#syllables - 1]
		
		if phoneme_is_short_vowel(penult[#penult]) then
			return #syllables - 2
		else
			return #syllables - 1
		end
	elseif #syllables == 2 then
		return #syllables - 1
	elseif #syllables == 1 then
		return #syllables --mark stress on monosyllables so that stress-conditioned sound rules work correctly. Then, delete it prior to display
	end
end

local function clean_syllable_breaks(word)
	return (ugsub(word, "[%.ˈ][%.ˈ]+", function(m)
		return m:find("ˈ") and "ˈ" or "."
	end))
end

local function convert_word(word, phonetic, eccl)
	-- Normalize i/j/u/v; do this before removing breves, so we keep the
	-- ŭ in langŭī (perfect of languēscō) as a vowel.
	word = ugsub(word, "(" .. vowels_c .. ")[vw](" .. non_vowels_c .. ")", "%1u%2")
	word = ugsub(word, "qu(" .. vowels_c .. ")", "qw%1")
	word = word:gsub("%f[^%z.'][ck]w", "qw")
	word = ugsub(word, "%f[^%z.'n]gu(" .. vowels_c .. ")", "gw%1") -- nguV or initial guV

	word = ugsub(word, "^i(" .. vowels_c .. ")", "j%1")
	word = ugsub(word, "^u(" .. vowels_c .. ")", "v%1")
	-- Per the August 31 2019 recommendation by [[User:Brutal Russian]] in
	-- [[Module talk:la-pronunc]], we convert i/j between vowels to jj if the
	-- preceding vowel is short but to single j if the preceding vowel is long.
	word = ugsub(word, "(" .. vowels_c .. ")('?)([iju])()", function (vowel, stress, cons, pos)
		if vowels_string:find(usub(word, pos, pos)) then
			if potential_consonant == "u" then
				cons = stress .. "v"
			elseif eccl or long_vowels_string:find(vowel) then
				cons = stress .. "j"
			else
				cons = "j" .. stress .. "j"
			end
			return vowel .. cons
		end
	end)

	--Convert v/w to u syllable-finally
	word = word:gsub("[vw]%f[%z.']", "u")

	-- Convert i to j before vowel and after any prefix that ends in a consonant,
	-- per the August 23 2019 discussion in [[Module talk:la-pronunc]].
	for _, pref in ipairs(cons_ending_prefixes) do
		word = ugsub(word, "^(" .. pref .. ")i(" .. vowels_c .. ")", "%1j%2")
	end

	-- Convert z to zz between vowels so that the syllable weight and stress assignment will be correct.
	word = ugsub(word, "(" .. vowels_c .. ")z(" .. vowels_c .. ")", "%1zz%2")

	if eccl then
		word = ugsub(word, "(" .. vowels_c .. ")ti(" .. vowels_c .. ")", "%1tt͡si%2")
	end

	-- Now remove breves.
	word = remove_diacritic(word, BREVE)

	-- Normalize syllabic vowels like aë, oë; do this after removing breves but
	-- before any other normalizations.
	word = ugsub(word, "(" .. vowels_c .. ")([äëïöüÿ])", "%1.%2")
	word = remove_diacritic(word, TREMA)

	-- Assume the u in a final -us or -um is not part of a diphthong
	word = word:gsub("([aeo])(u[ms])$", "%1.%2")
	word = word:gsub("[aeou]%f[i]", "%0.")
	word = word:gsub("_", "")
	
	-- Vowel length before nasal + fricative is allophonic
	word = toNFC(toNFD(word):gsub("([aeiouy])" .. MACRON .. "([mn][.']*[fs])", "%1%2"))

	if eccl then
		word = toNFC(word:gsub("[aeiouy]%f[j]", "%0" .. MACRON))
	end
	
	-- Per May 10 2019 discussion in [[Module talk:la-pronunc]], we syllabify
	-- prefixes ab-, ad-, ob-, sub- separately from following l or r.
	word = word:gsub("^a([bd])([lr])", "a%1.%2")	
	word = word:gsub("^ob([lr])", "ob.%1")	
	word = word:gsub("^sub([lr])", "sub.%1")

	-- Remove hyphens indicating prefixes or suffixes; do this after the above,
	-- some of which are sensitive to beginning or end of word and shouldn't
	-- apply to end of prefix or beginning of suffix.
	local is_prefix, is_suffix
	word = word:gsub("^(%-?)(.-)(%-?)$", function(m1, m2, m3)
		is_prefix, is_suffix = m1 == "-", m3 == "-"
		return m2
	end)

	-- Convert word to IPA
	local phonemes = letters_to_ipa(word, phonetic, eccl)
	
	-- Split into syllables
	local syllables = split_syllables(phonemes)
	
	-- Add accent
	local accent = detect_accent(syllables, is_prefix, is_suffix)

	for i, syll in ipairs(syllables) do
		for j, phoneme in ipairs(syll) do
			if eccl then
				syll[j] = syll[j]:gsub("ː", "")
			elseif phonetic then
				syll[j] = lax_vowel[syll[j]] or syll[j]
			end
		end
	end

	for i, syll in ipairs(syllables) do
		if eccl and i == accent and phonetic and vowels[syll[#syll]] then
			syll[#syll] = lengthen_vowel[syll[#syll]] or syll[#syll]
		end
	
		for j = 1, #syll - 1 do
			if syll[j] == syll[j + 1] then
				syll[j + 1] = ""
			end
		end
	end

	for i, syll in ipairs(syllables) do
		syll = concat(syll)
		-- Atonic /ɔ/ and /ɛ/ merge with /o/ and /e/ respectively
		if eccl and phonetic and i == accent then
			syll = syll:gsub("o", "ɔ")
			syll = syll:gsub("e", "ɛ")
		-- Syllable-initial /ɡn/ becomes /n/ (e.g. "gnōscō")
		elseif not eccl then
			syll = syll:gsub("^ɡn", "n")
		end
		syllables[i] = (i == accent and "ˈ" or "") .. syll
	end

	word = clean_syllable_breaks(concat(syllables, "."))

	-- poetic meter shows that a consonant before "h" was syllabified as an onset, not as a coda. 
	-- Based on outcome of talk page discussion, this will be indicated by the omission of /h/ [h] in this context.
	word = ugsub(word, "([^" .. VOWELS .. "ptk.ˈːˑ])([.ˈːˑ]*)h", "%2%1")
	
	if eccl then
		word = ugsub(word, "([^" .. VOWELS .. "])ʃ([.ˈ]*)ʃ", "%1%2ʃ") -- replace ʃ.ʃ or ʃˈʃ with .ʃ or ˈʃ after any consonant
	end

	if eccl then
		word = word:gsub("ʷ", "w")
	else
		word = word:gsub("j", "i̯") -- normalize glide spelling
		word = word:gsub("w", "u̯")
	end

	if phonetic then
		local rules = eccl and phonetic_rules_eccl or phonetic_rules
		for i, rule in ipairs(rules) do
			word = ugsub(word, rule[1], rule[2])
		end
		word = word:gsub("%.+", "") -- remove the dots
	end

	-- FIXME: why is this being done twice?
	if not eccl then
		word = word:gsub("j", "i̯") -- normalize glide spelling
		word = word:gsub("w", "u̯")
	end

	if phonetic then
		word = ugsub(word, "(%a([̪̠̯]?))%1", "%1" .. LONG) -- convert double consonants into long ones
		word = ugsub(word, "[ːˑ][ːˑ]+", "ː") -- maximum of one full length mark
	end

	return clean_syllable_breaks(word)
end

function export.convert_words(text, phonetic, eccl)
	local disallowed = ugsub(text, "[a-zA-ZæœāǣēīōūȳăĕĭŏŭäëïöüÿĀǢĒĪŌŪȲĂĔĬŎŬÄËÏÖÜŸ%-,.?!:;()'\"_ " .. MACRON .. BREVE .. TREMA .. TIE .. "]", "")
	local n = ulen(disallowed)
	if n > 0 then
		local msg = ("The character%%s %s %%s not allowed."):format(mw.dumpObject(disallowed))
		if n == 1 then
			error(msg:format("", "is"))
		else
			error(msg:format("s", "are"))
		end
	end
	
	text = toNFD(text)
	-- Call ulower() even though it's also called in phoneticize,
	-- in case convert_words() is called externally.
	text = ulower(text)
	text = text:gsub("[,?!:;()\"]", "")
	text = ugsub(text, "([æœ])(" .. LENGTH .. ")", normalize_ligatures)
	-- Treat a tie as "_".
	text = text:gsub(TIE, "_")
	
	local result = {}
	-- Split on spaces and hyphens, but hyphens preceded/followed by a space
	-- are included in the word (e.g. prefixes and suffixes).
	for chunk in gsplit(text, " +") do
		for word in gsplit(chunk, "%f[%z-]%-+%f[^%z-]") do
			if word:match("[^-]") then
				insert(result, convert_word(toNFC(word), phonetic, eccl))
			end
		end
	end
	
	return trim(concat(result, " "))
end

-- Phoneticize Latin TEXT. Return a list of one or more phoneticizations,
-- each of which is a two-element list {PHONEMIC, PHONETIC}. If ECCL, use
-- Ecclesiastical pronunciation. Otherwise, use Classical pronunciation.
function export.phoneticize(text, eccl)
	local function do_phoneticize(text, eccl)
		return {
			export.convert_words(text, false, eccl),
			export.convert_words(text, true, eccl),
		}
	end

	text = ulower(text)
	-- If we have a macron-breve sequence, generate two pronunciations, one for
	-- the long vowel and one for the short.
	if rfind(text, "[āēīōūȳ]" .. BREVE) then
		local longvar = ugsub(text, "([āēīōūȳ])" .. BREVE, "%1")
		local shortvar = ugsub(text, "([āēīōūȳ])" .. BREVE, macrons_to_breves)
		local longipa = do_phoneticize(longvar, eccl)
		local shortipa = do_phoneticize(shortvar, eccl)
		-- Make sure long and short variants are actually different (they won't
		-- be in Ecclesiastical pronunciation).
		if not deep_equals(longipa, shortipa) then
			return {longipa, shortipa}
		else
			return {longipa}
		end
	elseif rfind(text, ";") then
		local tautosyllabicvar = text:gsub(";", "")
		local heterosyllabicvar = text:gsub(";", ".")
		local tautosyllabicipa = do_phoneticize(tautosyllabicvar, eccl)
		local heterosyllabicipa = do_phoneticize(heterosyllabicvar, eccl)
		if not deep_equals(tautosyllabicipa, heterosyllabicipa) then
			return {tautosyllabicipa, heterosyllabicipa}
		else
			return {tautosyllabicipa}
		end
	else
		return {do_phoneticize(text, eccl)}
	end
end

local function make_row(phoneticizations, dials)
	local full_pronuns = {}
	for _, phoneticization in ipairs(phoneticizations) do
		local phonemic = phoneticization[1]
		local phonetic = phoneticization[2]
		local IPA_args = {{pron = '/' .. phonemic .. '/'}}
		insert(IPA_args, {pron = '[' .. phonetic .. ']'})
		insert(full_pronuns, m_IPA.format_IPA_full { lang = lang, items = IPA_args })
	end
	return m_a.format_qualifiers(lang, dials) .. ' ' .. concat(full_pronuns, ' or ')
end

function export.show_full(frame)
	local boolean_default_true = {type = "boolean", default = true}
	local args = require("Module:parameters").process(frame:getParent().args, {
		[1] = {default = mw.title.getCurrentTitle().nsText == "Template" and "īnspīrāre" or mw.loadData("Module:headword/data").pagename},
		classical = boolean_default_true,
		cl = {alias_of = "classical"},
		ecclesiastical = boolean_default_true,
		eccl = {alias_of = "ecclesiastical"},
		vul = {type = "boolean"}, -- To be removed.
		ann = true,
		accent = {list = true},
		indent = true
	})

	-- Track down any remaining uses of |vul=
	if args.vul ~= nil then
		track("vul")
	end

	local text = args[1]
	local categories = {}
	local accent = args.accent

	local indent = (args.indent or "*") .. " "
	local out = ""
	
	if args.indent then
		out = indent
	end
	
	if args.classical then
		out = out .. make_row(export.phoneticize(text, false), #accent > 0 and accent or {"Classical"})
	else
		insert(categories, lang:getCanonicalName() .. " terms with Ecclesiastical IPA pronunciation only")
	end
	
	local anntext = (
		args.ann == "1" and "'''" .. text:gsub("[.'_]", "") .. "''':&#32;" or
		args.ann and "'''" .. args.ann .. "''':&#32;" or
		"")

	out = anntext .. out
	
	if args.ecclesiastical then
		if args.classical then
			out = out .. "\n" .. indent .. anntext
		end
		out = out .. make_row(
			export.phoneticize(text, true),
			#accent > 0 and accent or {'Ecclesiastical'}
		)
	end
	
	return out .. require("Module:utilities").format_categories(categories)
end

return export