Module:User:Theknightwho/en-pron
Appearance
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.
local concat = table.concat
local gsub = string.gsub
local insert = table.insert
local match = string.match
local max = math.max
local remove = table.remove
local split = mw.text.split
local sub = string.sub
local umatch = mw.ustring.match
local m_data = mw.loadData("Module:User:Theknightwho/en-pron/data")
local rules = m_data.rules
local list = m_data.list
local phonemes
local Word
local Phoneme = {}
Phoneme.__index = Phoneme
function Phoneme:__tostring()
return self.type
end
function Phoneme:new(data)
return setmetatable(data or {}, self)
end
function Phoneme:change(phoneme)
setmetatable(self, phonemes[phoneme])
end
function Phoneme:is_pause()
end
function Phoneme:is_pause2()
end
function Phoneme:is_word_start()
return self == self.parent[1]
end
function Phoneme:is_word_end()
return self == self.parent[#self.parent]
end
function Phoneme:is_after_stress(i)
for i = i - 1, 1, -1 do
if self.parent[i].level == 4 then
return true
end
end
end
function Phoneme:is_stressed()
return self.level and self.level >= 3
end
function Phoneme:is_unstressed()
return self.level and self.level <= 1
end
function Phoneme:is_diminished()
return self.level == 0
end
function Phoneme:is_max_stress()
return self.level == 4
end
local function is(phoneme, attr)
return phoneme and phoneme[attr]
end
phonemes = {
["%%"] = Phoneme:new{ -- Elided.
stress = true,
level = -1,
},
-- Stress level 1 is used internally to mean diminished stress.
["%"] = Phoneme:new{ -- Unstressed.
stress = true,
level = 1,
},
-- Stress level 2 is used internally to mean no explicit stress.
[","] = Phoneme:new{ -- Secondary.
stress = true,
level = 3,
ipa = "ˌ",
enPR = "′",
},
["'"] = Phoneme:new{ -- Primary.
stress = true,
level = 4,
ipa = "ˈ",
enPR = "<b>′</b>",
},
["''"] = Phoneme:new{ -- Primary (with priority).
stress = true,
level = 5,
},
["="] = Phoneme:new{ -- Stress on the previous syllable.
stress = true,
},
["|"] = Phoneme:new{ -- Syllable boundary.
enPR = "-",
ipa = ".",
},
["_"] = Phoneme:new{
pause = true,
},
-- Vowels.
["a"] = Phoneme:new{
vowel = true,
short = true,
enPR = "ă",
ipa = "æ",
process = function(self)
if self:is_diminished() then
self:change("a#")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["a2"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("a")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["a#"] = Phoneme:new{
vowel = true,
unstressed = true,
enPR = "ə",
ipa = "ə",
process = function(self)
if not self:is_unstressed() then
self:change("a")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["a#2"] = Phoneme:new{
vowel = true,
process = function(self)
if is(self.parent:next(2), "vowel") then
self:change("a#")
else
self:change("a")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["aa"] = Phoneme:new{
vowel = true,
enPR = "ä",
ipa = "ɑː",
process = function(self)
self:change("A:")
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["aI"] = Phoneme:new{
vowel = true,
enPR = "ī",
ipa = "aɪ",
process = function(self)
if is(self.parent:next(1), "class") == "a" then
self.parent:append(";")
elseif is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["aI@"] = Phoneme:new{
vowel = true,
enPR = "ī-ə",
ipa = "aɪ.ə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["aI3"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("aI@")
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["aU"] = Phoneme:new{
vowel = true,
enPR = "ou",
ipa = "aʊ",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["A:"] = Phoneme:new{
vowel = true,
enPR = "ä",
ipa = "ɑː",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["A@"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("A:")
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["A#"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("a")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["A~"] = Phoneme:new{
vowel = true,
enPR = "äɴ",
ipa = "ɑ̃",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["@"] = Phoneme:new{
vowel = true,
unstressed = true,
enPR = "ə",
ipa = "ə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["@-"] = Phoneme:new{
vowel = true,
unstressed = true,
nonsyllabic = true,
enPR = "ə",
ipa = "(ə)",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["@2"] = Phoneme:new{
vowel = true,
unstressed = true,
process = function(self)
if is(self.parent:next(1), "vowel") then
self:change("I")
self.parent:append(";")
else
self:change("@")
end
end,
},
["@5"] = Phoneme:new{
vowel = true,
unstressed = true,
process = function(self)
if (
is(self.parent:next(1), "vowel") or
is(self.parent:next(1), "pause")
) then
self:change("U")
else
self:change("@")
end
end,
},
["@L"] = Phoneme:new{
vowel = true,
unstressed = true,
enPR = "əl",
ipa = "əl",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["3"] = Phoneme:new{
vowel = true,
enPR = "ə",
ipa = "ə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["3:"] = Phoneme:new{
vowel = true,
enPR = "ûr",
ipa = "ɜː",
process = function(self)
if self:is_diminished() then
self:change("@")
end
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["e#"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("E")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["e@"] = Phoneme:new{
vowel = true,
enPR = "âr",
ipa = "ɛə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["eI"] = Phoneme:new{
vowel = true,
enPR = "ā",
ipa = "eɪ",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["E"] = Phoneme:new{
vowel = true,
short = true,
enPR = "ĕ",
ipa = "ɛ",
process = function(self)
if self:is_diminished() then
if is(self.parent:next(1, true), "type") == "n" then
self:change("@")
else
self:change("I") -- I2
end
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["E#"] = Phoneme:new{
vowel = true,
process = function(self)
if not self:is_unstressed() then
self:change("E")
else
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["E/"] = Phoneme:new{
vowel = true,
process = function(self)
local nxt = self.parent:next(1, nil, "vowel")
if nxt and nxt:is_stressed() and self:is_unstressed() then
self:change("@")
else
self:change("E")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["E2"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("E")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["i"] = Phoneme:new{
vowel = true,
unstressed = true,
enPR = "(ē)",
ipa = "i",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
elseif self.parent:nth(self, "vowel", true) ~= -1 then
self:change("I")
elseif self:is_stressed() then
self:change("i:")
end
end,
},
["i:"] = Phoneme:new{
vowel = true,
enPR = "ē",
ipa = "iː",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
end
end,
},
["i@"] = Phoneme:new{
vowel = true,
enPR = "ē-ə",
ipa = "i.ə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["i@3"] = Phoneme:new{
vowel = true,
enPR = "îr",
ipa = "ɪə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["I"] = Phoneme:new{
vowel = true,
short = true,
enPR = "ĭ",
ipa = "ɪ",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
end
end,
},
["I2"] = Phoneme:new{
vowel = true,
unstressed = true,
process = function(self)
self:change("I")
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
end
end,
},
["I#"] = Phoneme:new{
vowel = true,
unstressed = true,
enPR = "(ĭ)",
process = function(self)
self:change("I")
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
end
end,
},
["I2#"] = Phoneme:new{
vowel = true,
unstressed = true,
process = function(self)
self:change("I")
if is(self.parent:next(1), "vowel") then
self.parent:append(";")
end
end,
},
["IR"] = Phoneme:new{
vowel = true,
-- RP = phonemes["3:"].RP,
enPR = "ûr",
},
["o@"] = Phoneme:new{
vowel = true,
enPR = "ôr",
ipa = "ɔː",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["oU"] = Phoneme:new{
vowel = true,
enPR = "ō",
ipa = "əʊ",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["oU#"] = Phoneme:new{
vowel = true,
process = function(self)
if self:is_stressed() then
self:change("0")
elseif self:is_diminished() then
self:change("@")
else
self:change("oU")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["0"] = Phoneme:new{
vowel = true,
short = true,
enPR = "ŏ",
ipa = "ɒ",
process = function(self)
if self:is_diminished() then
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["0#"] = Phoneme:new{
vowel = true,
process = function(self)
if not self:is_unstressed() then
self:change("0")
else
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["02"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("0")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["O"] = Phoneme:new{
vowel = true,
enPR = "ô",
ipa = "ɔː",
process = function(self)
if self:is_diminished() then
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["O:"] = Phoneme:new{
vowel = true,
enPR = "ô",
ipa = "ɔː",
process = function(self)
if self:is_diminished() then
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["O2"] = Phoneme:new{
vowel = true,
process = function(self)
self:change("0")
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["O@"] = Phoneme:new{
vowel = true,
enPR = "ôr",
ipa = "ɔː",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["OI"] = Phoneme:new{
vowel = true,
enPR = "oi",
ipa = "ɔɪ",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["O~"] = Phoneme:new{
vowel = true,
enPR = "ôɴ",
ipa = "ɔ̃",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["8"] = Phoneme:new{
vowel = true,
short = true,
enPR = "o͝o",
ipa = "ʊ",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["8@"] = Phoneme:new{
vowel = true,
enPR = "o͝or",
ipa = "ʊə",
process = function(self)
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["u:"] = Phoneme:new{
vowel = true,
enPR = "o͞o",
ipa = "uː",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["U"] = Phoneme:new{
vowel = true,
short = true,
enPR = "o͝o",
ipa = "ʊ",
process = function(self)
if is(self.parent:next(-1, true), "type") == "j" then
self:change("8")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["U@"] = Phoneme:new{
vowel = true,
enPR = "o͝or",
ipa = "ʊə",
process = function(self)
if self:is_unstressed() then
self:change("8@")
end
if is(self.parent:next(1), "vowel") then
self.parent:append("r")
end
end,
},
["V"] = Phoneme:new{
vowel = true,
short = true,
enPR = "ŭ",
ipa = "ʌ",
process = function(self)
if self:is_diminished() then
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["V#"] = Phoneme:new{
vowel = true,
process = function(self)
if not self:is_unstressed() then
self:change("V")
else
self:change("@")
end
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["VR"] = Phoneme:new{
vowel = true,
-- RP = phonemes["3:"].RP,
enPR = "ûr",
},
-- Syllabic consonants.
["l-"] = Phoneme:new{
vowel = true,
enPR = "əl",
ipa = "əl",
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["m-"] = Phoneme:new{
vowel = true,
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["n-"] = Phoneme:new{
vowel = true,
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
["r-"] = Phoneme:new{
vowel = true,
process = function(self)
if is(self.parent:next(1, true), "vowel") then
self.parent:append("|")
end
end,
},
-- Consonants.
["b"] = Phoneme:new{
voiced = true,
bilabial = true,
plosive = true,
},
["d"] = Phoneme:new{
voiced = true,
alveolar = true,
plosive = true,
},
["d#"] = Phoneme:new{
voiced = true,
alveolar = true,
plosive = true,
process = function(self)
if (
is(self.parent:next(-1), "vowel") or
is(self.parent:next(-1), "voiced")
) then
self:change("d")
else
self:change("t")
end
end,
},
["d/dZ"] = Phoneme:new{
voiced = true,
alveolar = true,
plosive = true,
process = function(self)
if self.parent:next(1, true):is_stressed() then
self:change("d")
else
self:change("dZ")
end
end,
},
["d/dZ2"] = Phoneme:new{
voiced = true,
alveolar = true,
plosive = true,
process = function(self)
if (
self.parent:next(1, true):is_stressed() or
self.parent:next(2, true):is_stressed()
) then
self:change("d")
else
self:change("dZ")
end
end,
},
["dZ"] = Phoneme:new{
voiced = true,
palatoalveolar = true,
sibilant = true,
affricate = true,
enPR = "j",
ipa = "d͡ʒ",
},
["D"] = Phoneme:new{
voiced = true,
dental = true,
fricative = true,
enPR = "<i>th</i>",
ipa = "ð",
},
["f"] = Phoneme:new{
voiceless = true,
labiodental = true,
fricative = true,
},
["g"] = Phoneme:new{
voiced = true,
velar = true,
plosive = true,
ipa = "ɡ",
},
["h"] = Phoneme:new{
voiceless = true,
glottal = true,
fricative = true,
},
["j"] = Phoneme:new{
voiced = true,
palatal = true,
approximant = true,
liquid = true,
enPR = "y",
},
[";"] = Phoneme:new{
liquid = true,
palatal = true,
enPR = "-",
ipa = ".",
},
["k"] = Phoneme:new{
voiceless = true,
velar = true,
plosive = true,
},
["l"] = Phoneme:new{
voiced = true,
alveolar = true,
lateral = true,
approximant = true,
liquid = true,
},
["l#"] = Phoneme:new{
voiceless = true,
alveolar = true,
lateral = true,
fricative = true,
enPR = "l",
ipa = "ɬ",
process = function(self)
self:change("l")
end,
},
["m"] = Phoneme:new{
voiced = true,
bilabial = true,
nasal = true,
},
["n"] = Phoneme:new{
voiced = true,
alveolar = true,
nasal = true,
process = function(self)
if is(self.parent:next(1, true), "velar") then
self:change("N")
end
end,
},
["N"] = Phoneme:new{
voiced = true,
velar = true,
nasal = true,
enPR = "ng",
ipa = "ŋ",
},
["p"] = Phoneme:new{
voiceless = true,
bilabial = true,
plosive = true,
},
["r"] = Phoneme:new{
voiced = true,
alveolar = true,
approximant = true,
rhotic = true,
ipa = "ɹ",
},
["s"] = Phoneme:new{
voiceless = true,
alveolar = true,
sibilant = true,
fricative = true,
},
["s/S"] = Phoneme:new{
voiceless = true,
alveolar = true,
sibilant = true,
fricative = true,
process = function(self)
if self.parent:next(1, true):is_stressed() then
self:change("s")
else
self:change("S")
end
end,
},
["s/S2"] = Phoneme:new{
voiceless = true,
alveolar = true,
sibilant = true,
fricative = true,
process = function(self)
if (
self.parent:next(1, true):is_stressed() or
self.parent:next(2, true):is_stressed()
) then
self:change("s")
else
self:change("S")
end
end,
},
["S"] = Phoneme:new{
voiceless = true,
palatoalveolar = true,
sibilant = true,
fricative = true,
enPR = "sh",
ipa = "ʃ",
},
["t"] = Phoneme:new{
voiceless = true,
alveolar = true,
plosive = true,
},
["t2"] = Phoneme:new{
voiceless = true,
alveolar = true,
plosive = true,
process = function(self)
self:change("t")
end,
},
["t/S"] = Phoneme:new{
voiceless = true,
alveolar = true,
plosive = true,
process = function(self)
if self.parent:next(1, true):is_stressed() then
self:change("t")
else
self:change("S")
end
end,
},
["t/tS"] = Phoneme:new{
voiceless = true,
alveolar = true,
plosive = true,
process = function(self)
if self.parent:next(1, true):is_stressed() then
self:change("t")
else
self:change("tS")
end
end,
},
["t/tS2"] = Phoneme:new{
voiceless = true,
alveolar = true,
plosive = true,
process = function(self)
if (
self.parent:next(1, true):is_stressed() or
self.parent:next(2, true):is_stressed()
) then
self:change("t")
else
self:change("tS")
end
end,
},
["tS"] = Phoneme:new{
voiceless = true,
palatoalveolar = true,
sibilant = true,
affricate = true,
enPR = "ch",
ipa = "t͡ʃ",
},
["T"] = Phoneme:new{
voiceless = true,
dental = true,
fricative = true,
enPR = "th",
ipa = "θ",
},
["v"] = Phoneme:new{
voiced = true,
labiodental = true,
fricative = true,
},
["w"] = Phoneme:new{
voiced = true,
velar = true,
protruded = true,
approximant = true,
liquid = true,
},
["w#"] = Phoneme:new{
voiceless = true,
velar = true,
protruded = true,
approximant = true,
enPR = "hw",
ipa = "ʍ",
process = function(self)
self:change("w")
end,
},
["x"] = Phoneme:new{
voiceless = true,
velar = true,
fricative = true,
enPR = "ᴋʜ",
},
["z"] = Phoneme:new{
voiced = true,
alveolar = true,
sibilant = true,
fricative = true,
},
["z/Z"] = Phoneme:new{
voiced = true,
alveolar = true,
sibilant = true,
fricative = true,
process = function(self)
if self.parent:next(1, true):is_stressed() then
self:change("z")
else
self:change("Z")
end
end,
},
["z#"] = Phoneme:new{
voiced = true,
alveolar = true,
sibilant = true,
fricative = true,
process = function(self)
if (
is(self.parent:next(-1), "vowel") or
is(self.parent:next(-1), "voiced")
) then
self:change("z")
else
self:change("s")
end
end,
},
["z/2"] = Phoneme:new{
voiced = true,
alveolar = true,
sibilant = true,
fricative = true,
process = function(self)
if is(self.parent:next(-1), "sibilant") then
self.parent:prepend("I") -- I2
self:change("z")
elseif (
is(self.parent:next(-1), "vowel") or
is(self.parent:next(-1), "voiced")
) then
self:change("s")
else
self:change("z")
end
end,
},
["Z"] = Phoneme:new{
voiced = true,
palatoalveolar = true,
sibilant = true,
fricative = true,
enPR = "zh",
ipa = "ʒ",
},
["?"] = Phoneme:new{
voiceless = true,
glottal = true,
plosive = true,
ipa = "(ʔ)",
},
[":"] = Phoneme:new{
ipa = "ː",
},
}
for k, p in pairs(phonemes) do
p.__index = p
p.__tostring = Phoneme.__tostring
p.type = k
end
local Phonemizer = {}
Phonemizer.__index = Phonemizer
function Phonemizer:new(data)
return setmetatable(data, Phonemizer)
end
function Phonemizer:check_list()
-- TODO
end
function Phonemizer:main_rule(rule)
if sub(self.str, self.head, self.head + #rule - 1) ~= rule then
return false
end
return #rule * 21 - 20
end
function Phonemizer:get_rule_char()
self.rule_char = sub(self.rule, self.rule_ptr, self.rule_ptr)
return self.rule_char
end
function Phonemizer:advance_rule_ptr()
self.rule_ptr = self.rule_ptr + self.dir
end
local group_rules = {}
group_rules["A"] = function(self, this)
return m_data.A[this] and 20 - self.distance
end
group_rules["B"] = function(self, this)
return m_data.B[this] and 20 - self.distance
end
group_rules["C"] = function(self, this)
return m_data.C[this] and 19 - self.distance
end
group_rules["D"] = function(self, this)
return m_data.D[this] and (self.dir == -1 and 21 or 20) - self.distance
end
group_rules["F"] = function(self, this)
return m_data.F[this] and 20 - self.distance
end
group_rules["K"] = function(self, this)
return not (
m_data.A[this] or (
this == "" and
self.dir == 1 and
self.suffix_vowel
)
) and 20 - self.distance
end
-- TODO: self.suffix_removed
group_rules["N"] = function(self)
if self.dir == 1 and not self.suffix_removed then
self.look_ptr = self.look_ptr - self.dir
return 1
end
end
group_rules["P"] = function(self)
if (
self.dir == 1 and
not self.suffix_removed
) then
self.rule_prefix = sub(self.rule, self.rule_ptr + 1)
self.rule_ptr = #self.rule
return 0
end
end
-- Note: don't match if there are no previous vowels and no prefix has been removed.
group_rules["S"] = function(self)
if (
self.dir == 1 and
(self.vowels > 0 or self.prefix_removed) and
not self.suffix_removed
) then
self.rule_suffix = sub(self.rule, self.rule_ptr + 1)
-- If the suffix starts with a vowel, add the "a" modifier to it.
if m_data.A[sub(self.str, self.head, self.head)] then
self.rule_suffix = self.rule_suffix .. "a"
end
self.rule_ptr = #self.rule
return 0
end
end
group_rules["V"] = function(self)
if self.dir == -1 then
self.look_ptr = self.look_ptr - self.dir
return self.pos == "verb" and 1
end
end
group_rules["X"] = function(self, this)
local look_ptr = self.look_ptr
while this ~= "" do
if m_data.Y[this] then
return false
end
look_ptr = look_ptr + self.dir
this = sub(self.str, look_ptr, look_ptr)
end
return self.dir == -1 and 3 or (19 - self.distance)
end
group_rules["Y"] = function(self, this)
return m_data.Y[this] and 20 - self.distance
end
group_rules["Z"] = function(self, this)
return not umatch(this, "^%w$") and 21 - self.distance
end
group_rules["!"] = function(self)
if self.dir == -1 and self.first_upper then
self.look_ptr = self.look_ptr - self.dir
return 1
end
end
group_rules["#"] = function(self)
return self.dir == 1 and 0
end
group_rules["$"] = function(self, this)
if self.dir == 1 then
-- TODO
end
end
group_rules["%"] = function(self, this)
local prev = self.look_ptr - self.dir
return sub(self.str, prev, prev) == this and 21 - self.distance
end
group_rules["&"] = function(self)
if self.dir == -1 and self.stresses > 0 then
self.look_ptr = self.look_ptr - self.dir
return 19
end
end
group_rules["+"] = function(self)
self.look_ptr = self.look_ptr - self.dir
return 20
end
-- TODO: self.hypen & self.hyphen_after
group_rules["-"] = function(self, this)
return (
this == "-" or
this == "" and (
self.dir == -1 and self.hyphen or
self.dir == 1 and self.hyphen_after
)
) and 22 - self.distance
end
group_rules["."] = function(self, this)
return this ~= "" and 20 - self.distance
end
group_rules["<"] = function(self)
self.look_ptr = self.look_ptr - self.dir
return -20
end
group_rules["@"] = function(self)
local syllables = 1
while sub(self.rule, self.rule_ptr + self.dir, self.rule_ptr + self.dir) == "@" do
syllables = syllables + 1
self:advance_rule_ptr()
end
local look_ptr, vowel_count, can_increment = self.look_ptr, 0, true
local this = sub(self.str, look_ptr, look_ptr)
while this ~= "" do
if m_data.Y[this] then
vowel_count = vowel_count + (can_increment and 1 or 0)
can_increment = false
else
can_increment = true
end
look_ptr = look_ptr + self.dir
this = sub(self.str, look_ptr, look_ptr)
end
return vowel_count >= syllables and 18 + syllables - self.distance
end
group_rules["_"] = function(self, this)
return this == "" and (self.dir == -1 and 4 or (21 - self.distance))
end
function Phonemizer:group_rules()
local this = sub(self.str, self.look_ptr, self.look_ptr)
if group_rules[self.rule_char] then
return group_rules[self.rule_char](self, this)
elseif self.rule_char == this then
return 21 - self.distance
else
return false
end
end
function Phonemizer:check_rule(rule, look_ptr, dir, distance_iter, open_bracket, close_bracket)
self.rule = rule
self.rule_prefix = nil
self.rule_suffix = nil
self.look_ptr = look_ptr
self.dir = dir
self.rule_ptr = dir
self:get_rule_char()
self.distance = -distance_iter
local points = 0
while self.rule_char ~= "" do
self.distance = self.distance + distance_iter
self.distance = self.distance > 18 and 19 or self.distance
if self.rule_char == open_bracket then
self:advance_rule_ptr()
self:get_rule_char()
local best_score = -1
while not (
self.rule_char == "" or
self.rule_char == close_bracket
) do
local add = self:group_rules()
if add and add > best_score then
best_score = add
end
self:advance_rule_ptr()
self:get_rule_char()
end
if best_score == -1 then
return false
end
points = points + best_score
else
local add = self:group_rules()
if not add then
return false
end
points = points + add
end
self:advance_rule_ptr()
self:get_rule_char()
self.look_ptr = self.look_ptr + dir
end
return points
end
function Phonemizer:check_rules(i)
-- Temporary: fail if rule has conditional modifiers.
if rules[i + 4] then
return false
end
local points = self:main_rule(rules[i + 1])
if not points then
return false
elseif rules[i] then
local add = self:check_rule(rules[i], self.head - 1, -1, 2, "]", "[")
if not add then
return false
end
points = points + add
end
if rules[i + 2] then
local add = self:check_rule(rules[i + 2], self.head + #rules[i + 1], 1, 6, "[", "]")
if not add then
return false
end
points = points + add
end
if points and points > self.best_score then
self.best_score = points
self.best_rule = i + 3
self.prefix = self.rule_prefix
self.suffix = self.rule_suffix
end
end
local prefix_modifiers = {}
prefix_modifiers["t"] = function(self)
self.stem.stress_override = true
end
prefix_modifiers["i"] = function(self)
if sub(self.prefix.str, -1) == "i" then
self.prefix.str = sub(self.prefix.str, 1, -2) .. "y"
end
end
-- If the prefix contains no primary stress, or has a primary stress and the "t" flag, then the stem's stress is calculated without the prefix. If it has a primary stress and no "t" flag, then the stress is calculated with the prefix.
function Phonemizer:handle_prefix()
-- Get modifiers, and replace self.prefix with data table.
self.prefix_modifiers = sub(self.prefix, 2)
self.prefix = {
str = sub(self.str, 1, sub(self.prefix, 1, 1)),
suffix_removed = true -- Stem treated as a "suffix".
}
self.stem = {
str = sub(self.str, #self.prefix.str + 1),
prefix_removed = true,
stress_override = self.max_stress < 4
}
-- Handle any modifiers.
for i = 1, #self.prefix_modifiers do
prefix_modifiers[sub(self.prefix_modifiers, i, i)](self)
end
-- Recalculate prefix phonemes.
self.phonemes = self:new(self.prefix):get_phonemes()
-- Calculate phonemes for the rest of the term, and add to phoneme table.
for _, v in ipairs(self:new(self.stem):get_phonemes()) do
v.parent = self.phonemes
insert(self.phonemes, v)
end
end
local suffix_modifiers = {}
suffix_modifiers["a"] = function(self)
self.stem.suffix_vowel = true
end
suffix_modifiers["d"] = function(self)
if sub(self.stem.str, -2, -2) == sub(self.stem.str, -1, -1) then
self.stem.doubled_final_letter = true
end
end
suffix_modifiers["e"] = function(self)
local vowel = sub(self.stem.str, #self.stem.str - 1, #self.stem.str - 1)
if (
m_data.Y[vowel] and
m_data.B[sub(self.stem.str, #self.stem.str, #self.stem.str)]
) then
for exception, len in pairs(m_data.add_e_exceptions) do
if sub(self.stem.str, -len) == exception then
return
end
end
self.stem.str = self.stem.str .. "e"
self.stem.e_added = true
end
for addition, len in pairs(m_data.add_e_additions) do
if sub(self.stem.str, -len) == addition then
self.stem.str = self.stem.str .. "e"
self.stem.e_added = true
end
end
end
suffix_modifiers["f"] = function(self)
-- TODO
end
suffix_modifiers["i"] = function(self)
if sub(self.stem.str, -1, -1) == "i" then
self.stem.str = sub(self.stem.str, 1, #self.stem.str - 1) .. "y"
end
end
suffix_modifiers["m"] = function(self)
self.stem.suffix_removed = nil
end
suffix_modifiers["q"] = function(self)
-- TODO
end
suffix_modifiers["t"] = prefix_modifiers["t"]
suffix_modifiers["v"] = function(self)
self.stem.pos = "verb"
end
function Phonemizer:handle_suffix()
local suffix_len = sub(self.suffix, 1, 1)
self.stem = {
str = sub(self.str, 1, -suffix_len - 1),
suffix_removed = true
}
for i = 2, #self.suffix do
suffix_modifiers[sub(self.suffix, i, i)](self)
end
self.phonemes = self:new(self.stem):get_phonemes()
end
function Phonemizer:insert_phonemes(new)
local i, unstressed = 1
while i <= #new do
local best_match
for k in pairs(phonemes) do
if k == sub(new, i, i + #k - 1) and (
not best_match or
#k > #best_match
) then
best_match = k
end
end
insert(self.phonemes, phonemes[best_match]:new())
i = i + #best_match
best_match = phonemes[best_match]
if best_match.stress and best_match.level then
self.max_stress = max(self.max_stress, best_match.level)
unstressed = true
elseif best_match.vowel then
if not (unstressed or best_match.unstressed) then
self.stresses = self.stresses + 1
end
unstressed = nil
self.vowels = self.vowels + 1
end
end
end
function Phonemizer:get_phonemes()
if umatch(sub(self.str, 1, 1), "^%u$") then
self.first_upper = true
end
for k, v in pairs(m_data.replace) do
self.str = gsub(self.str, k, v)
end
self.head = 1
self.phonemes = setmetatable({}, Word)
self.stresses = 0
self.max_stress = 0
self.vowels = 0
for i = list.n, 1, -2 do
if list[i] == self.str then
-- TODO
end
end
while self.head <= #self.str do
self.best_rule = nil
self.best_score = -1
for i = 1, rules.n, 5 do
self:check_rules(i)
end
if self.best_rule then
if self.suffix then
self:handle_suffix()
end
if rules[self.best_rule] then
self:insert_phonemes(rules[self.best_rule])
-- If the post-rule contains #, replace the next "e" with "E".
if rules[self.best_rule - 1] and match(rules[self.best_rule - 1], "#") then
self.str = sub(self.str, 1, self.head - 1) ..
gsub(sub(self.str, self.head), "e", "E", 1)
end
end
-- Break after a prefix, since the remainder is handled by a recursive call.
if self.prefix then
self:handle_prefix()
break
end
-- Advance by the length of the main match.
self.head = self.head + #rules[self.best_rule - 2]
else
self.head = self.head + 1
end
end
if (
self.stress_override or
not (self.prefix_removed or self.suffix_removed)
) then
self.phonemes:handle_stress()
end
return self.phonemes
end
Word = {}
Word.__index = Word
function Word:new(word)
local p = Phonemizer:new{str = word}:get_phonemes()
return p
end
function Word:iter()
repeat
self.i = self.i + 1
until (
not self[self.i] or
not self.attr or
self[self.i][self.attr]
)
return self[self.i]
end
function Word:iterate(attr)
self.i = 0
self.attr = attr
return self.iter, self
end
function Word:handle_stress()
local max_stress, stress, phoneme = 0
for phoneme in self:iterate() do
-- Stress on previous vowel.
if phoneme.type == "=" then
prev_vowel = self:next(-1, true, "vowel")
if prev_vowel and prev_vowel.level < 5 then
prev_vowel.level = 4
max_stress = max(max_stress, 4)
-- Reduce any preceding primary stress phonemes.
for j = self.i - 1, 1, -1 do
if (
self[j] ~= prev_vowel and
self[j].level and
self[j].level == 4
) then
self[j].level = 3
end
end
end
remove(self, self.i)
self.i = self.i - 1
elseif phoneme.stress then
stress = phoneme.level
remove(self, self.i)
self.i = self.i - 1
if stress > max_stress then
max_stress = stress
elseif stress >= 4 then
stress = max_stress - 1
end
elseif phoneme.vowel and not phoneme.nonsyllabic then
if stress then
phoneme.level = stress
else
phoneme.level = phoneme.unstressed and 1 or 2
max_stress = max(max_stress, phoneme.level)
end
prev_vowel = phoneme
stress = nil
end
end
-- Remove elided syllables.
for phoneme in self:iterate("vowel") do
if self[self.i].level == -1 then
if self:next(1, true, "vowel").level < 3 then
remove(self, self.i)
self.i = self.i - 1
else
phoneme.level = 2
end
end
end
-- Handle stressed syllables:
-- If there's a primary stress with priority, reduce any other primary stresses to secondary.
-- Add secondary stress to every unstressed syllable that isn't explicitly unstressed/diminished or adjacent to a stressed syllable. If primary stress has not yet been added, then the first match gets primary stress instead.
for phoneme in self:iterate("vowel") do
if phoneme.level > 3 and max_stress == 5 then
phoneme.level = phoneme.level - 1
elseif phoneme.level == 2 then
local prev, nxt = self:next(-1, true, "vowel"), self:next(1, true, "vowel")
if not (prev and prev.level > 2 or nxt and nxt.level > 2) then
phoneme.level = max_stress <= 3 and 4 or 3
max_stress = 4
end
end
end
-- Handle unstressed and diminished syllables: first and last syllables can't be diminished, nor a penultimate syllable before an unstressed final syllable. Otherwise, an unstressed syllable is diminished.
-- Note: this must be done after any explicit stresses have been added, since the stress of the following syllable is relevant.
for phoneme in self:iterate("vowel") do
if phoneme.level <= 2 then
if self:nth(phoneme, "vowel") == 1 then
phoneme.level = 1
else
local pos_from_end = self:nth(phoneme, "vowel", true)
if (
pos_from_end == -1 or
pos_from_end == -2 and self:next(1, true, "vowel").level <= 2
) then
phoneme.level = 1
else
phoneme.level = 0
end
end
end
end
end
function Word:handle_syllabification()
local i, vowel, phoneme, level = 0, 0
while i < #self do
i = i + 1
phoneme = self[i]
level = phoneme.level
repeat
if not level or level < 3 then
break
end
local stress = phonemes[level == 3 and "," or "'"]:new()
if self:nth(phoneme, "vowel") == 1 then
insert(self, 1, stress)
i = i + 1
break
end
local init = self[i - 1]
if init.vowel or init.type == ";" then
insert(self, i, stress)
i = i + 1
break
end
for j = 3, 1, -1 do
if i > j then
init = {}
for k = i - j, i - 1 do
insert(init, tostring(self[k]))
end
if m_data.initials[concat(init)] then
break
end
end
end
if not init then
insert(self, i, stress)
i = i + 1
break
end
phoneme = self[i - #init - 1]
if #init > 1 and phoneme.short then
remove(init, 1)
end
insert(self, i - #init, stress)
i = i + 1
until true
end
end
function Word:append(phoneme)
insert(self, self.i + 1, phonemes[phoneme]:new{parent = self})
end
function Word:prepend(phoneme)
phoneme = phonemes[phoneme]:new{parent = self.parent}
insert(self, self.i, phoneme)
phoneme.parent = self
self.phoneme = self[self.i]
if self.phoneme.process then
self.phoneme:process()
end
self.i = self.i + 1
end
-- Returns the next nth phoneme after the current one that has attribute `attr`.
-- e.g. next(2, "vowel") returns the second vowel after the current phoneme.
-- next(-1, "sibilant") returns the previous sibilant.
function Word:next(n, word, attr)
if word and not attr then
return self[self.i + n]
elseif n == 0 then
return (not attr or self.phoneme[attr]) and self.phoneme
end
local count = 0
local inc = n > 0 and 1 or -1
for i = self.i + inc, n > 0 and #self or 1, inc do
local phoneme = self[i]
if not attr or phoneme[attr] then
count = count + inc
if count == n then
return phoneme
end
end
end
if not word then
return self:next_word(self.parent.i, n, attr, inc, count)
end
end
function Word:next_word(i, n, attr, inc, count)
i = i + inc
word = self.parent[i]
if not word then
return
end
for i = n > 0 and 1 or #word, n > 0 and #word or 1, inc do
local phoneme = word[i]
if not attr or phoneme[attr] then
count = count + inc
if count == n then
return phoneme
end
end
end
return word:next_word(i, n, attr, inc, count)
end
-- Counts the phonemes from the start that have attribute `attr`, and returns the value for the current phoneme. `from_end` counts from the end, and returns a negative value. If the current phoneme does not have `attr`, returns nil.
-- e.g. If the current phoneme is the second vowel, nth("vowel") will return 2.
-- If the current phoneme is the final vowel, nth("vowel", true) will return -1.
function Word:nth(phoneme, attr, from_end)
local count, p = 0
local inc = from_end and -1 or 1
for i = from_end and #self or 1, from_end and 1 or #self, inc do
p = self[i]
if p[attr] then
count = count + inc
if p == phoneme then
return count
end
end
end
end
function Word:process()
-- Remove duplicate consonant phonemes.
local i = 0
while i < #self do
i = i + 1
if i > 1 and not self[i].vowel and self[i].type == self[i - 1].type then
remove(self, i)
i = i - 1
end
self[i].parent = self
end
self.i = 0
while self.i < #self do
self.i = self.i + 1
self.phoneme = self[self.i]
if self.phoneme.process then
self.phoneme:process()
end
end
self:handle_syllabification()
local output = {}
for k, v in ipairs(self) do
insert(output, v.ipa or v.type)
end
return concat(output)
end
local export = {}
function export.show(frame)
if type(frame) == "table" then
frame = frame.args[1]
end
local words = split(frame, "[^%w']")
for i, word in ipairs(words) do
word = Word:new(word)
word.parent = words
words[i] = word
end
for i, word in ipairs(words) do
words.i = i
words.word = word
words[i] = word:process()
end
return concat(words, " ")
end
return export