Module:mh-pronunc/sandbox
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
-- This module is primarily maintained at:
-- https://en.wiktionary.org/wiki/Module:mh-pronunc
-- Please direct all technical queries and contributions there.
-- The version of this script on Wikipedia is only a mirror.
local export = {}
local ASYLLABIC = "̯"
local BREVE = "̆"
local BREVE2 = "͝"
local CEDILLA = "̧"
local CENTRAL = "̈"
local DEVOICE = "̥"
local DEVOICE2 = "̊"
local LESSROUND = "̜"
local LESSROUND2 = "͑"
local MACRON = "̄"
local MOREROUND = "̹"
local MOREROUND2 = "͗"
local SYLLABIC = "̩"
local TIE = "͡"
local TIE2 = "͜"
local EPENTH_CLUSTER = 0
local ASSIM_CLUSTER = 1
local STABLE_CLUSTER = 2
-- Forward-declare functions.
local addUnique
local assign
local fastTrim
local lerpF2
local mergedMidVowelsMap
local needClusterTypes
local needPhoneticMap
local needVowelCharts
local parse
local parseBoolean
local reverseString
local splitTokens
local string_gsub2
local string_gsubx
local toBender
local toMOD
local toPhonemic
local toPhonetic
local toPhoneticDialect
local toPhoneticRemainder
-- Forward-declare lookup tables.
local benderMaps
local clusterTypes
local fromF1
local fromF2
local fromF2Conson
local parseC_CH_CWmap
local parsePseudoConsonMap
local parseRemainingMap
local phonemicMap
local phoneticMap
local toF1
local toF2
local toMODmap
local voicedPrimaries
-- Adds elements to a sequence as if it's a set (retains unique elements only).
addUnique = function(seq, value)
for _, value2 in pairs(seq) do
if value == value2 then
return
end
end
seq[#seq + 1] = value
end
-- Intended to work the same as JavaScript's Object.assign() function.
assign = function(target, ...)
local args = { ... }
for _, source in pairs(args) do
if type(source) == "table" then
for key, value in pairs(source) do
target[key] = value
end
end
end
return target
end
fastTrim = function(text)
return string.match(text, "^%s*(.-)%s*$")
end
lerpF2 = function(secondaryL, secondaryR)
needVowelCharts()
return fromF2[0.5 * (toF2[secondaryL] + toF2[secondaryR])]
end
needClusterTypes = function()
if clusterTypes then
return
end
local EPENTH = {
["j"] = EPENTH_CLUSTER,
["G"] = EPENTH_CLUSTER,
["w"] = EPENTH_CLUSTER
}
local ASSIM = {
["j"] = ASSIM_CLUSTER,
["G"] = ASSIM_CLUSTER,
["w"] = ASSIM_CLUSTER
}
local STABLE = {
["j"] = STABLE_CLUSTER,
["G"] = STABLE_CLUSTER,
["w"] = STABLE_CLUSTER
}
local EMPTY = {
["p"] = EPENTH, ["t"] = EPENTH, ["k"] = EPENTH,
["m"] = EPENTH, ["n"] = EPENTH, ["N"] = EPENTH,
["r"] = EPENTH, ["l"] = EPENTH, ["h"] = EPENTH, ["y"] = EPENTH
}
clusterTypes = {
["p"] = assign({}, EMPTY, {
["p"] = STABLE, -- /pp/
["m"] = ASSIM -- /pm/ becomes [mm]
}),
["t"] = assign({}, EMPTY, {
["t"] = STABLE -- /tt/
}),
["k"] = assign({}, EMPTY, {
["k"] = STABLE, -- /kk/
["N"] = ASSIM -- /kŋ/ becomes [ŋŋ]
}),
["m"] = assign({}, EMPTY, {
["p"] = STABLE, -- /mp/
["m"] = STABLE -- /mm/
}),
["n"] = assign({}, EMPTY, {
["t"] = STABLE, -- /nt/
["n"] = STABLE, -- /nn/
["r"] = STABLE, -- /nr/
["l"] = STABLE -- /nl/
}),
["N"] = assign({}, EMPTY, {
["k"] = STABLE, -- /ŋk/
["N"] = STABLE -- /ŋŋ/
}),
["r"] = assign({}, EMPTY, {
["n"] = ASSIM, -- /rn/ becomes [nn]
["r"] = STABLE, -- /rr/
["l"] = STABLE -- /rl/
}),
["l"] = assign({}, EMPTY, {
["t"] = assign({}, STABLE, {
["j"] = EPENTH_CLUSTER, -- /ltʲ/
}),
["n"] = ASSIM, -- /ln/ becomes [nn]
["r"] = STABLE, -- /lr/
["l"] = STABLE -- /ll/
}),
["h"] = EMPTY
}
end
needPhoneticMap = function()
if phoneticMap then
return
end
needVowelCharts()
local map = {
["p"] = "p",
["b"] = "b",
["t"] = "t",
["d"] = "d",
["k"] = "k",
["g"] = "ɡ",
["m"] = "m",
["n"] = "n",
["N"] = "ŋ",
["r"] = "r",
["l"] = "l",
["Hj"] = "j",
["HG"] = "ɰ",
["Hw"] = "w",
["_"] = "‿",
["j"] = "ʲ",
["G"] = "ˠ",
["w"] = "ʷ",
["a1"] = "æ",
["E1"] = "ɛ",
["e1"] = "e",
["i1"] = "i",
["a2"] = "a",
["E2"] = "ɜ",
["e2"] = "ɘ",
["i2"] = "ɨ",
["a3"] = "ɑ",
["E3"] = "ʌ",
["e3"] = "ɤ",
["i3"] = "ɯ",
["a5"] = "ɒ",
["E5"] = "ɔ",
["e5"] = "o",
["i5"] = "u",
["^"] = ASYLLABIC,
["@"] = "",
["("] = "(",
[")"] = ")",
[":"] = "ː",
["="] = "",
["\""] = "ˈ",
["%"] = "ˌ",
[","] = "",
["\\"] = ""
}
assign(map, false and {
["P"] = "b̥",
["T"] = "d̥",
["K"] = "ɡ̊"
} or {
["P"] = map["p"],
["T"] = map["t"],
["K"] = map["k"]
})
if false then
for primary in mw.text.gsplit("kKgN", "") do
map[primary.."G"] = map[primary]
end
end
map["Hj"] = map["Hj"] or map["i1^"] or (map["i1"]..map["^"])
map["i1^"] = map["i1^"] or map["Hj"]
map["yj"] = map["yj"] or map["i1^"]
map["i3^"] = map["i3^"] or map["HG"]
if true then
assign(map, {
["i3^"] = "ɰ",
["e3^"] = "ʁ",
["E3^"] = "ʁ",
["a3^"] = "ʕ"
})
end
if true then
for f1 in mw.text.gsplit("aEei", "") do
local key = f1.."5^"
map[key] = map[key] or map["Hw"]
end
end
for primary in mw.text.gsplit("pPbtTdkKgmnNrl_ \t\n", "") do
for secondary in mw.text.gsplit("jGw", "") do
local key = primary..secondary
map[key] = map[key] or ((map[primary] or primary)..map[secondary])
end
end
for f1 = 1, 4 do
local vowelF1 = fromF1[f1]
local vowel = vowelF1.."2"
map[vowel] = map[vowel] or (map[vowelF1.."1"]..CENTRAL)
vowel = vowelF1.."4"
map[vowel] = map[vowel] or (map[vowelF1.."5"]..LESSROUND2)
for f2 = 1, 5 do
vowel = vowelF1..fromF2[f2]
local semi = vowel.."="
map[semi] = map[semi] or (map[vowel]..map["="])
semi = vowel.."@"
map[semi] = map[semi] or (map[vowel]..map["@"])
semi = vowel.."^"
map[semi] = map[semi] or (map[vowel]..map["^"])
end
end
phoneticMap = map
end
needVowelCharts = function()
if toF1 then
return
end
toF1 = {
["a"] = 1, ["E"] = 2, ["e"] = 3, ["i"] = 4,
[ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4
}
fromF1 = {
[ 1 ] = "a", [ 2 ] = "E", [ 3 ] = "e", [ 4 ] = "i",
["a"] = "a", ["E"] = "E", ["e"] = "e", ["i"] = "i"
}
toF2 = {
["j"] = 1, ["G"] = 3, ["w"] = 5,
["1"] = 1, ["2"] = 2, ["3"] = 3, ["4"] = 4, ["5"] = 5,
[ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4, [ 5 ] = 5
}
fromF2 = {
[ 1 ] = "1", [ 2 ] = "2", [ 3 ] = "3", [ 4 ] = "4", [ 5 ] = "5",
["1"] = "1", ["2"] = "2", ["3"] = "3", ["4"] = "4", ["5"] = "5",
["j"] = "1", ["G"] = "3", ["w"] = "5"
}
fromF2Conson = {
[ 1 ] = "j", [ 3 ] = "G", [ 5 ] = "w",
["1"] = "j", ["3"] = "G", ["5"] = "w",
["j"] = "j", ["G"] = "G", ["w"] = "w"
}
end
parse = function(code)
local outSeq = {}
code = mw.ustring.gsub(code, "%s+", " ")
code = string.lower(code)
for text in mw.text.gsplit(code, " *,[ ,]*") do
text = fastTrim(text)
if text ~= "" then
local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "")
if temp ~= "" then
error("'"..code.."' contains unsupported characters: "..temp)
end
-- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudoconsonants.
parsePseudoConsonMap = parsePseudoConsonMap or {
["y"] = "0",
["h"] = "0h",
["w"] = "0w"
}
text = string.gsub(text, "_*([hwy])_+", parsePseudoConsonMap)
text = string.gsub(text, "_+([hwy])", parsePseudoConsonMap)
if string.find(text, "_") then
error("contains misplaced underscores: "..code)
end
-- a plain {i} protected from dialect-specific reflexes
text = string.gsub(text, "'i", "I")
-- "yi'y" and "'yiy" sequences
text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB)
if aposA ~= "" then
-- "dwelling upon" i
return "Z"
elseif aposB ~= "" then
-- "passing over lightly" i
return "z"
end
end)
-- Convert multigraphs to pseudo-X-SAMPA format.
parseC_CH_CWmap = parseC_CH_CWmap or {
["k"] = "kG",
["kh"] = "kGh", -- N\A
["kw"] = "kW",
["l"] = "lJ",
["lh"] = "lG",
["lw"] = "lW",
["m"] = "mJ",
["mh"] = "mG",
["mw"] = "mJw", -- N\A
["n"] = "nJ",
["nh"] = "nG",
["nw"] = "nW",
["ng"] = "NG",
["ngh"] = "NGh", -- N\A
["ngw"] = "NW",
["r"] = "rG",
["rh"] = "rGh", -- N\A
["rw"] = "rW",
["0"] = "_J",
["0h"] = "_G",
["0w"] = "_W"
}
text = string.gsub(text, "[klmnr0]g?[hw]?", parseC_CH_CWmap)
if string.find(text, "g") then
error("contains g that is not part of ng: "..code)
end
-- Convert remaining sequences to pseudo-X-SAMPA format.
parseRemainingMap = parseRemainingMap or {
["b"] = "pG",
["d"] = "rj",
["e"] = "E",
["&"] = "e",
["h"] = "hG",
["j"] = "tj",
["J"] = "j",
["p"] = "pj",
["t"] = "tG",
["w"] = "hw",
["W"] = "w",
["y"] = "hj",
["z"] = "yj",
["Z"] = "Yj",
["'"] = ""
}
text = string.gsub(text, ".", parseRemainingMap)
-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics,
-- but allow VC, CV at affix boundaries
-- where a vowel may link to another morpheme's consonant.
temp = string.gsub(text, "[%s%-]+", "")
if string.find(temp, "_..[jGw]") or
string.find(temp, ".[jGw]_.")
then
error("pseudoconsonants may not neighbor a consonant")
end
if string.find(temp, "[aEeIi]_.[aEeIi]") then
error(
"pseudoconsonants may only be at the beginning or end"..code
)
end
if string.find(temp, "[aEeIi][aEeIi]") then
error("vowels must be separated by a consonant: "..code)
end
if string.find(temp, ".[jGw].[jGw]$") then
error("may not end with a consonant cluster: "..code)
end
string.gsub(" "..temp, "[ jGw](.[jGw])(.[jGw][ptkmnNrlhyYjGw]*)",
function(consonX, consonY)
if consonX ~= consonY then
error(
"may not begin with a consonant cluster "..
"unless it is a geminate: "..code
)
end
end
)
if text ~= "" then
addUnique(outSeq, text)
end
end
end
return outSeq
end
parseBoolean = function(text)
if type(text) == "string" then
text = string.gsub(text, "[^0-9A-Za-z]", "")
if text ~= "" and
text ~= "0" and
string.lower(text) ~= "false"
then
return true
end
end
return false
end
reverseString = function(text)
local chars = splitTokens(text)
local i = 1
local j = #chars
while i < j do
chars[i], chars[j] = chars[j], chars[i]
i = i + 1
j = j - 1
end
text = table.concat(chars, "")
return text
end
splitTokens = function(text, pattern, chars, shorten)
chars = chars or {}
local index = 1
for ch in string.gmatch(
text, pattern or "[%z\1-\127\194-\244][\128-\191]*"
) do
chars[index] = ch
index = index + 1
end
if index <= #chars then
if shorten then
table.remove(chars, index)
else
repeat
chars[index] = nil
index = index + 1
until index > #chars
end
end
return chars
end
string_gsub2 = function(text, pattern, subst)
local result = text
result = string.gsub(result, pattern, subst)
-- If it didn't change the first time, it won't change the second time.
if result ~= text then
result = string.gsub(result, pattern, subst)
end
return result
end
string_gsubx = function(text, pattern, subst)
repeat
local oldText = text
text = string.gsub(text, pattern, subst)
until oldText == text
return text
end
toBender = function(inSeq, args)
-- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender).
-- "med" is from the Marshallese-English Dictionary (1976).
-- "mod" is from the Marshallese-English Online Dictionary.
-- "default" is the same as "mod" but with cedillas.
local version = args and args.version
if not benderMaps then
local map1968 = {
["pj"] = "p", ["pG"] = "b",
["tj"] = "j", ["tG"] = "t",
["kG"] = "k", ["kw"] = "q",
["mj"] = "m", ["mG"] = "ṁ",
["nj"] = "n", ["nG"] = "ṅ", ["nw"] = "n̈",
["NG"] = "g", ["Nw"] = "g̈",
["rj"] = "d", ["rG"] = "r", ["rw"] = "r̈",
["lj"] = "l", ["lG"] = "ł", ["lw"] = "l̈",
["yj"] = "yi'y",
["Yj"] = "'yiy",
["hj"] = "y", ["hG"] = "h", ["hw"] = "w",
["_j"] = "", ["_G"] = "", ["_w"] = "",
["a"] = "a",
["E"] = "e",
["e"] = "&",
["i"] = "i",
["I"] = "i"
}
local mapMED = assign({}, map1968, {
["mG"] = "m̧",
["nG"] = "ņ",
["nw"] = "ņ°",
["Nw"] = "g°",
["rw"] = "r°",
["lG"] = "ļ",
["lw"] = "ļ°",
["e"] = "ȩ"
})
local mapMOD = assign({}, mapMED, {
["kw"] = "kʷ",
["mG"] = "ṃ",
["nG"] = "ṇ",
["nw"] = "ṇʷ",
["Nw"] = "gʷ",
["rw"] = "rʷ",
["lG"] = "ḷ",
["lw"] = "ḷʷ",
["e"] = "ẹ"
})
local mapDefault = assign({}, mapMOD, {
["mG"] = "m̧",
["nG"] = "ņ",
["nw"] = "ņʷ",
["lG"] = "ļ",
["lw"] = "ļʷ",
["e"] = "ȩ"
})
benderMaps = {
["1968"] = map1968,
["med"] = mapMED,
["mod"] = mapMOD,
["default"] = mapDefault
}
end
local map = benderMaps[
type(version) == "string" and string.lower(version) or ""
] or benderMaps["default"]
local outSeq = {}
for _, text in pairs(inSeq) do
text = string.gsub(text, ".[jGw]?", map)
addUnique(outSeq, text)
end
return outSeq
end
toMOD = function(text)
toMODmap = toMODmap or {
["Ȩ"] = "Ẹ", ["ȩ"] = "ẹ",
["Ļ"] = "Ḷ", ["ļ"] = "ḷ",
["M̧"] = "Ṃ", ["m̧"] = "ṃ",
["Ņ"] = "Ṇ", ["ņ"] = "ṇ",
["N̄"] = "Ñ", ["n̄"] = "ñ",
["O̧"] = "Ọ", ["o̧"] = "ọ"
}
text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", toMODmap)
return text
end
toPhonemic = function(inSeq)
local outSeq = {}
if not phonemicMap then
local map = {
["pj"] = "pʲ", ["pG"] = "pˠ",
["tj"] = "tʲ", ["tG"] = "tˠ",
["kG"] = "kˠ", ["kw"] = "kʷ",
["mj"] = "mʲ", ["mG"] = "mˠ",
["nj"] = "nʲ", ["nG"] = "nˠ", ["nw"] = "nʷ",
["NG"] = "ŋˠ", ["Nw"] = "ŋʷ",
["rj"] = "rʲ", ["rG"] = "rˠ", ["rw"] = "rʷ",
["lj"] = "lʲ", ["lG"] = "lˠ", ["lw"] = "lʷ",
["hj"] = "j", ["hG"] = "ɰ", ["hw"] = "w",
["_j"] = "", ["_G"] = "", ["_w"] = "",
["a"] = "æ",
["E"] = "ɛ",
["e"] = "e",
["i"] = "i",
["I"] = "i"
}
phonemicMap = map
if false then
assign(map, {
["a"] = "ɐ",
["E"] = "ə",
["e"] = "ɘ",
["i"] = "ɨ",
["I"] = "ɨ"
})
end
map["yj"] = map.hj..map.i..ASYLLABIC..map.hj
map["Yj"] = map.hj..map.i.."ː"..map.hj
end
for _, text in pairs(inSeq) do
text = string.gsub(text, ".[jGw]?", phonemicMap)
addUnique(outSeq, text)
end
return outSeq
end
toPhonetic = function(inSeq, args)
-- Recognize "ralik" for Rālik Chain (western dialect).
-- Recognize "ratak" for Ratak Chain (eastern dialect).
-- For other values, list both possible dialect reflexes where applicable.
local dialect = args and args.dialect and
mw.ustring.lower(mw.text.trim(args.dialect)) or ""
if dialect == "rālik" then
dialect = "ralik"
end
-- If enabled, break words at consonant cluster boundaries
-- and enunciate the word fragments individually.
-- This mode does not assimilate clusters or produce epenthetic vowels.
local enunciate = not not (args and parseBoolean(args.enunciate))
-- If enabled, display liaison joiners to mark
-- spaces or hyphens in the input code that are not consonant clusters.
local liaison = not not (args and parseBoolean(args.liaison))
-- If enabled, do not display pseudoconsonant hints at all.
local noHints = not not (args and parseBoolean(args.nohints))
-- "false" will display all obstruent allophones as voiceless.
-- "true" will display all obstruent allophones as voiced.
-- Empty string or absent by default will display
-- only medial obstruent allophones as semi-voiced.
local voice = args and args.voice or ""
if voice ~= "" then
voice = parseBoolean(voice)
end
local outSeq = {}
local config = {
["outSeq"] = outSeq,
["enunciate"] = enunciate,
["liaison"] = liaison,
["noHints"] = noHints,
["voice"] = voice
}
for _, text in pairs(inSeq) do
text = string.gsub(text, "[%s%-]+", " ")
text = fastTrim(text)
local isRalik = dialect == "ralik"
if isRalik or dialect == "ratak" then
text = toPhoneticDialect(text, config, isRalik)
toPhoneticRemainder(text, config)
else
local ralik = toPhoneticDialect(text, config, true)
local ratak = toPhoneticDialect(text, config, false)
-- If both dialect reflexes are the same, display only one of them.
toPhoneticRemainder(ralik, config)
if ralik ~= ratak then
toPhoneticRemainder(ratak, config)
end
end
end
return outSeq
end
toPhoneticDialect = function(text, config, isRalik)
-- To streamline morpheme-initial regular expressions.
text = "\t"..text
-- Morphemes can begin with geminated consonants, but spoken words cannot.
text = string.gsub(text, "([\tjGw] *)(.[jGw])( *)%2( *)([aEeIi])",
function(prefix, conson, _, __, vowel)
local copyVowel = vowel
if vowel == "I" then
copyVowel = "i"
elseif
vowel == "a" and
conson ~= "hG"
then
copyVowel = "E"
end
if isRalik then
return prefix.."hj"..copyVowel..conson.._..conson..__..vowel
elseif conson == "hw" then
return prefix..conson..copyVowel..conson.._..conson..__..vowel
else
return prefix..conson..copyVowel.._..conson..__..vowel
end
end
)
-- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior.
-- To block this in the template argument, use "'i" instead of "i".
if isRalik then
-- Rālik {wiwV-} becomes {yiwV-}.
text = string.gsub(text, "([\tjGw] *h)w( *i *hw *[aEeIi])", "%1j%2")
end
-- {[yw]iwV-} becomes {[yw]iwwV-} in both dialects.
text = string.gsub(text, "([\tjGw] *h[jw] *i *hw)( *[aEeIi])", "%1hw%2")
-- {yiyV-} sequences
text = string.gsub(text,
"([\tjGw] *)hj( *)i( *)hj( *[aEeIi])",
isRalik and "%1Yj%2%3%4" or "%1yj%2%3%4"
)
-- No longer need initial "\t".
text = text.sub(text, 2)
-- Don't need to protect {i} anymore.
text = string.gsub(text, "I", "i")
return text
end
toPhoneticRemainder = function(code, config)
-- "\n" bookends pronunciations of full terms.
-- "\t" bookends prosodic breaks within pronunciations.
local text = "\n\t"..code.."\t\n"
local oldText
-- Handle pseudoconsonants and phrases that begin or end with bare vowels.
local hasLeftVowel = string.find(code, "^_")
if not hasLeftVowel then
hasLeftVowel = string.find(code, "^[aEei]")
if hasLeftVowel then
text = string.gsub(
text,
"\n\t".."([aEei][^\t]*)".."\t\n",
"\n\t".."_j%1".."\t\n"..
"\n\t".."_G%1".."\t\n"..
"\n\t".."_w%1".."\t\n"
)
end
end
local hasRightVowel = string.find(code, "_.$")
if not hasRightVowel then
hasRightVowel = string.find(code, "[aEei]$")
if hasRightVowel then
text = string.gsub(
text,
"\n\t".."([^\t]-[aEei])".."\t\n",
"\n\t".."%1_j".."\t\n"..
"\n\t".."%1_G".."\t\n"..
"\n\t".."%1_w".."\t\n"
)
end
end
local hasEdgeVowel = hasLeftVowel or hasRightVowel
if hasEdgeVowel then
text = string.gsub(text, "/", "\t\t")
end
local enunciate = config.enunciate
local liaison = config.liaison
local noHints = config.noHints
local outSeq = config.outSeq
-- Use liaison if we're enunciating.
liaison = liaison or enunciate
if enunciate then
-- Create a prosodic break at consonant clusters.
text = string.gsub(text, "([jGw]) *(.[jGw])", "%1".."\t\t".."%2")
end
-- Per the Marshallese Reference Grammar.
if false then
-- Non-phrase-initial {yi'y-} vocalizes to true {yiy}.
text = string.gsub(text, "([^\t] *)yj", "%1hjihj")
-- Experimental, to fix the iọkiọkwe problem.
else
-- Non-phrase-initial {yi'y-}
-- vocalizes to true {yiy} at the beginning of a word,
-- but not in a non-initial position within a word.
text = string.gsub(text, " yj", " hjihj")
end
-- {'yiy} vocalizes contextually.
do
-- To {iyy} after a consonant.
if not enunciate then
text = string.gsub(text, "([jGw] *)Yj", "%1ihjhj")
end
-- To {yiyy} everywhere else.
text = string.gsub(
text, "Yj", enunciate and ("hjihj".."\t\t".."hj") or "hjihjhj"
)
end
-- Mid-vowel harmony assimilation across semiconsonants.
do
-- Always {e-a}, never {ẹ-a}.
text = string.gsub(text, "e([ hjGw]*a)", "E%1")
-- Always {ẹ-i}, never {e-i}.
text = string.gsub(text, "E([ hjGw]*i)", "e%1")
-- Always {e-e} and {ẹ-ẹ}, never {e-ẹ} or {ẹ-e}.
text = string.gsub(text, "[Ee][ hjGw]*[Ee][ hjGwEe]*",
function(match)
local index = string.find(text, "[Ee][^Ee]*$")
local vowel = string.sub(text, index, index)
match = string.gsub(match, "[Ee]", vowel)
return match
end
)
end
-- Detect and mark stressed syllables, but not if this term is an affix.
if not hasEdgeVowel then
-- Temporarily mark the end of the term's bookend as stressed.
text = string.gsub(text, "(\t[\t\n])", "\"%1")
-- Temporarily mark all natural syllables as unstressed.
text = string.gsub(text, "(.[jGw] *[aEei])", ",%1")
-- Recursively place stress before each CVC, CVCV and CVCCV sequence.
text = string_gsubx(
text,
",("..
".[jGw] *[aEei] *[ptkmnNrlh]?[jGw]? *"..
",?"..
".[jGw] *[aEei]? *"..
"\"[^\t]*\t"..
")",
"\"%1"
)
-- Remove dangling syllable markers from the term's bookends.
text = string.gsub(text, " *\"? *\t *,? *", "\t")
-- Remove all unstressed syllable markers.
text = string.gsub(text, ",", "")
if not enunciate then
-- Restore unstressed syllable markers
-- only within consonant clusters that are not already stressed.
-- These will be removed again later anyway.
text = string.gsub(text, "([jGw] *)(.[jGw])", "%1,%2")
end
-- If there is more than one stressed syllable,
-- then mark the penultimate stressed syllable as primarily stressed,
-- and the others as secondarily stressed.
if string.find(text, "\"[^\"\t]*\"[^\t]*\t") then
text = string.gsub(text, "\"", "%%")
text = string.gsub(text, "%%([^%%\t]*%%[^%%\t]*\t)", "\"%1")
end
end
-- Mark full vowels as syllabic.
text = string.gsub(text, "([aEei])", "%1=")
if not enunciate then
-- Tag consonant clusters for the next operation.
oldText = text
text = string.gsub(text, "(.[jGw])( *[\"%%,]?.[jGw])", "%1/%2")
needClusterTypes()
-- Process unstable and assimilating consonant clusters.
if oldText ~= text then
text = string_gsub2(
text,
"([aEei])(= *[\"%%,]?)(.)([jGw])/"..
"( *[\"%%,]?)(.)([jGw])( *)([aEei])",
function(
vowelL, _, primaryL, secondaryL,
__, primaryR, secondaryR, ___, vowelR
)
local vowelE = ""
local markE = ""
local cluster = clusterTypes[primaryL][primaryR][secondaryR]
if cluster == EPENTH_CLUSTER then
-- An epenthetic vowel will be inserted.
if primaryL == "h" then
-- If the first consonant is a semiconsonant,
-- then copy the vowel on the left.
vowelE = vowelL
elseif primaryR == "h" then
-- If the first consonant is a full consonant
-- but the second consonant is a semicomsonant,
-- then copy the vowel on the right.
vowelE = vowelR
elseif primaryR == "y" then
-- If the first consonant is a full consonant
-- but the second consonant is {yi'y},
-- then the epenthetic vowel is {i},
-- and the second consonant becomes plain {y}.
vowelE = "i"
primaryR = "h"
else
-- If neither consonant is a semiconsonant,
-- then the epenthetic vowel has an F1
-- that is the maximum of
-- the two neighboring vowels and {e}.
vowelE = fromF1[math.max(
toF1[vowelL],
toF1[vowelR],
toF1["E"]
)]
end
markE = "@"
else
-- No epenthetic vowel.
if cluster == ASSIM_CLUSTER then
-- Regressive primary assimilation.
primaryL = primaryR
end
if secondaryL == "w" and
primaryR ~= "t"
then
-- Progressive secondary assimilation.
-- But there is no {tʷ} in Marshallese.
secondaryR = secondaryL
else
-- Regressive secondary assimilation.
secondaryL = secondaryR
end
end
return (
vowelL.._..primaryL..secondaryL..vowelE..markE..
__..primaryR..secondaryR..___..vowelR
)
end
)
end
end
needVowelCharts()
-- Give a default F2 to vowels,
-- averaging the F2 of their two neighboring consonants.
-- This can also create transitional vowels whose F2
-- have no direct counterparts with consonant secondary articulation.
text = string_gsub2(text, "([jGw])( *.)([=@] *[\"%%,]?.)([jGw])",
function(secondaryL, _, __, secondaryR)
return secondaryL.._..lerpF2(secondaryL, secondaryR)..__..secondaryR
end
)
-- Unconditionally surface semiconsonants in complete isolation.
oldText = text
text = string.gsub(text, "\t *h(.) *\t", "\tH%1\t")
-- If the term contains any other semiconsonants...
if oldText == text and
string.find(text, "h")
then
local hasVG = false
local hasGV = false
local hasVGV = false
-- Give unsurfaced semiconsonants a surface F1
-- matching the vowels on their left.
text = string.gsub(text, "([aEei])(.[=@] *[\"%%,]?)h(.)",
function(vowelF1, _, secondary)
hasVG = true
return vowelF1.._..vowelF1..fromF2[toF2[secondary]].."^"
end
)
-- Adjust the F1 of surfaced semiconsonants
-- according to the vowels on their right.
-- To the maximum of the vowel if {y} or {w}.
-- To the minimum of the vowel if {h}.
if hasVG then
text = string.gsub(text, "(.)(.)(%^ *)([aEei])",
function(semiF1, semiF2, _, vowelF1)
hasGV = true
hasVGV = true
local fn = semiF2 == "3" and math.min or math.max
return fromF1[fn(
toF1[semiF1], toF1[vowelF1]
)]..semiF2.._..vowelF1
end
)
end
-- Give remaining unsurfaced semiconsonants a surface F1
-- matching the vowels on their right.
text = string.gsub(text, "h(.)( *)([aEei])",
function(secondary, _, vowelF1)
hasGV = true
return vowelF1..fromF2[toF2[secondary]].."^".._..vowelF1
end
)
local startsGV = hasGV and not not string.find(text, "\t *[\"%%,]?..%^")
local endsVG = hasVG and not not string.find(text, "%^ *\t")
if not enunciate then
-- If a vowel comes before a semiconsonant of the same F1,
-- then change the vowel's F2 to match the the semiconsonant.
if hasVG then
text = string.gsub(
text, "(.).([=@] *[\"%%,]?)%1(.)", "%1%3%2%1%3"
)
end
-- If a non-open vowel comes after {y} of the same F1
-- and before a velarized full consonant,
-- then change the vowel's F2 to match the {y}.
if hasGV then
text = string.gsub(
text,
"([Eei])(1)(%^ *)%1.([=@] *[\"%%,]?[ptkmnNrl]G)",
"%1%2%3%1%2%4"
)
end
-- If a non-open vowel comes after {y} of the same F1
-- and before a syllable stress boundary,
-- then change the vowel's F2 to match the {y}.
if hasGV then
text = string.gsub(
text, "([Eei])(1)(%^ *)%1.([=@] *[\"%%,])", "%1%2%3%1%2%4"
)
end
-- If {a} comes after {y} of the same F1 after a stressed vowel,
-- then change the vowel's F2 to match the {y}.
if hasVGV then
text = string.gsub(text, "(= *a)(1)(%^ *a).", "%1%2%3%2")
end
-- If a vowel comes after {w} of the same F1 after a stressed vowel,
-- then change the vowel's F2 to match the {w}.
if hasVGV then
text = string_gsub2(
text, "(= *)(.)(5)(%^ *)%2.", "%1%2%3%4%2%3"
)
end
-- If a vowel comes after {h}...
if hasGV then
text = string.gsub(
text, "(.)(3)(%^ *)(.).([=@] *[\"%%,]?.)([jw15])",
function(semiF1, semiF2, _, vowelF1, __, secondary)
local vowelF2
if semiF1 == vowelF1 then
-- If they have the same F2,
-- then change the vowel's F2 to match the {h}.
vowelF2 = semiF2
else
-- If they do not have the same F2,
-- then reset the vowel's F2.
vowelF2 = lerpF2(semiF2, secondary)
end
return (
semiF1..semiF2.._..vowelF1..vowelF2..__..secondary
)
end
)
end
-- If a vowel comes after {y} or {w}
-- at the beginning of a prosodic unit
-- and before a stress boundary
-- before a semiconsonant and another vowel
-- that have the same F2 as each other
-- and both have the same F1 as the first vowel,
-- then change the first vowel's F2 to match.
if hasVGV then
text = string.gsub(
text,
"\t *(.[15]%^ *)(.).([=@] *[\"%%,])%2(.)(%^ *)%2%4",
"\t%1%2%4%3%2%4%5%2%4"
)
end
end
-- Unsurface {h} everywhere.
text = string.gsub(text, ".3%^", "hG")
-- Unsurface semiconsonants that can coalesce
-- with either of their neighboring vowels,
-- but not crossing syllable stress boundaries.
if hasGV then
text = string.gsub(text, "(.)(.)%^( *)%1%2",
function(vowelF1, vowelF2, _)
return "h"..fromF2Conson[toF2[vowelF2]].._..vowelF1..vowelF2
end
)
end
if hasVG then
text = string.gsub(text, "(.)(.)(= *)%1%2%^",
function(vowelF1, vowelF2, _)
return vowelF1..vowelF2.._.."h"..fromF2Conson[toF2[vowelF2]]
end
)
end
-- Adjust the F1 of remaining surfaced {y} and {w}.
text = string.gsub(text, "(.)([15])%^", function(semiF1, semiF2)
if semiF2 == "1" then
if semiF1 == "a" then
semiF1 = "E"
end
else -- semiF1 == "5"
semiF1 = "i"
end
return semiF1..semiF2.."^"
end)
-- Delete remaining unsurfaced semiconsonants altogether.
text = string.gsub(text, "h.", "")
if hasVGV and not enunciate then
-- Indicate certain long monophthongs as geminated.
text = string.gsub(text, "([aEei].)[=@]( *)%1[=@]", "%1=%2:")
text = string.gsub(
text, "([aEei].)[=@]( *[\"%%,])%1[=@]([^:])", "%1=%2:%3"
)
end
-- If a weakened semiconsonant falls on a stressed syllable
-- before a vowel with the same F2,
-- then shift forward the stress marker.
text = string.gsub(text, "([\"%%,])0(.)( *[aEei])(.)",
function(stress, semiF2, _, vowelF2)
if toF2[semiF2] == toF2[vowelF2] then
return "0"..semiF2..stress.._..vowelF2
end
end
)
end
-- Neutralize the difference between full and epenthetic vowels.
text = string.gsub(text, "[=@]", "")
-- Simplify secondary articulation of consonant clusters.
text = string.gsub(text, "([jGw])( *[\"%%,]?.)%1", "%2%1")
-- Partially voice obstruents before vowels at the beginning of a phrase or
-- in consonant clusters after other obstruents or laterals.
text = string.gsub(text, "([ptkl\t] *[\"%%,]?)([ptk])(. *[aEei])",
function(_, primary, __)
return _..string.upper(primary)..__
end
)
voicedPrimaries = voicedPrimaries or {
["p"] = "b", ["t"] = "d", ["k"] = "g"
}
-- Voice remaining obstruents before vowels.
text = string.gsub(text, "([ptk])(. *%(?[aEei])", function(primary, _)
return voicedPrimaries[primary].._
end)
if hasEdgeVowel then
if noHints then
-- Strip pseudoglides.
text = string.gsub(text, "_.", "")
elseif hasLeftVowel then
-- Reverse text of left pseudoglide.
text = string.gsub(text, "\t *_(.)", "\t%1_")
end
end
if liaison then
-- Remove whitespace from bookends.
text = string.gsub(text, " *\t *", "\t")
-- Prepare liaisons.
text = string.gsub(text, "[ _]+", "_")
else
-- Strip liaisons.
text = string.gsub(text, " ", "")
end
if enunciate then
-- Convert bookends to spaces.
text = string.gsub(text, "\t+", " ")
end
needPhoneticMap()
-- Convert pseudo-X-SAMPA to phonetic IPA.
text = string.gsub(text, ".[jGw1-5]?%^?", phoneticMap)
-- Output unique pronunciations.
string.gsub(text, "\n[^\n]*\n", function(result)
addUnique(outSeq, fastTrim(result))
return ""
end)
end
export._parse = parse
export._toBender = toBender
export._toMOD = toMOD
export._toPhonemic = toPhonemic
export._toPhonetic = toPhonetic
function export.bender(frame)
return table.concat(toBender(parse(frame.args[1], frame.args)), ", ")
end
function export.MOD(frame)
return toMOD(frame.args[1])
end
function export.parse(frame)
return table.concat(parse(frame.args[1]), ", ")
end
function export.phonemic(frame)
return table.concat(toPhonemic(parse(frame.args[1])), ", ")
end
function export.phonetic(frame)
return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ")
end
return export