Module:VL-translit
Jump to navigation
Jump to search
- The following documentation is located at Module:VL-translit/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local la_utils = require("Module:la-utilities")
local export = {}
local vowel_patterns = {}
vowel_patterns["all"] = {
{"ā", "a"},
{"ae", "ę"},
{"áé", "ę́"},
{"e", "ę"},
{"o", "ǫ"},
}
vowel_patterns["It-W"] = {
{"ē", "ẹ"},
{"i", "ẹ"},
{"ī", "i"},
{"ō", "ọ"},
{"u", "ọ"},
{"ū", "u"},
}
vowel_patterns["Italo-Western"] = vowel_patterns["It-W"]
vowel_patterns["Western"] = vowel_patterns["It-W"]
vowel_patterns["E"] = {
{"ē", "ẹ"},
{"i", "ẹ"},
{"ī", "i"},
{"ō", "o"},
{"ū", "u"},
}
vowel_patterns["Eastern"] = vowel_patterns["E"]
vowel_patterns["Romanian"] = vowel_patterns["E"]
vowel_patterns["S"] = {
{"ē", "e"},
{"ẹ", "e"},
{"ī", "i"},
{"ō", "o"},
{"ọ", "o"},
{"ū", "u"},
}
vowel_patterns["Sardinian"] = vowel_patterns["S"]
local dictionary = {
["a"] = "a",["e"] = "e",["i"] = "i",["o"] = "o",["u"] = "u",
["ā"] = "ā",["ē"] = "ē",["ī"] = "ī",["ō"] = "ō",["ū"] = "ū",
["ae"] = "ae",["oe"] = "ē",["ai"] = "aị",["ei"] = "ėị",["au"] = "aụ",["eu"] = "ėụ",
["b"] = "b",["d"] = "d",["f"] = "f",
["c"] = "c",["g"] = "g",["v"] = "v",["x"] = "x",
["qu"] = "qŭ",
["'"] = "'"
}
local vowels = {
"a", "e", "i", "o", "u",
"ā", "ē", "ī", "ō", "ū",
"ae", "oe", "aị", "ėị", "aụ", "ėụ",
"-"
}
local onsets = {
"b", "p", "d", "t",
"g", "c", "cu", "qŭ",
"f", "s", "z",
"l", "m", "n", "r", "j", "v", "w",
"bl", "pl", "br", "pr", "ps",
"dr", "tr",
"gl", "cl", "gr", "cr",
"fl", "fr",
"sp", "st", "sc", "scu", "sl", "sm", "sn", "su",
"spr", "str", "scr",
"spl", "scl"
}
local codas = {
"b", "p", "d", "t", "g", "c",
"f", "s", "z",
"l", "m", "n", "r", "j",
"sp", "st", "sc",
"lp", "lt", "lc",
"lb", "ld", "lg",
"lf",
"rp", "rt", "rc",
"rb", "rd", "rg",
"rf",
"mp", "nt", "nc",
"mb", "nd", "ng",
"lm", "rl", "rm", "rn",
"ps", "ts", "cs", "x", "ls", "ns", "rs",
"lcs", "ncs", "rcs",
"lms", "rls", "rms", "rns"
}
for i, val in ipairs(vowels) do
vowels[val] = true
end
for i, val in ipairs(onsets) do
onsets[val] = true
end
for i, val in ipairs(codas) do
codas[val] = true
end
-- These phonetic rules apply to the whole word, not just a syllable
local word_rules_start = {
{"h", ""},
{"k", "c"},
--{"w", "v"},
{"y", "i"},
{"ȳ", "ī"},
{"x('?)s", "x%1"},
{"x('?)([ct])", "s%1%2"},
{"b([st])", "p%1"},
{"d([st])", "t%1"},
{"g([st])", "c%1"},
{"n([bp])", "m%1"},
--{"qu", "qv"},
--{"ngu([aeiouāēīōū])", "ngv%1"},
{"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"},
-- This is not a copypaste error, the pattern needs to run twice to catch the edge case of -ababa-
-- (common in the VL conditional)
{"([aeiouāēīōū])('?)b([aeiouāēīōū])", "%1%2v%3"},
{"um$", "u"}
}
local stress_shift_rules = {
["qu"] = "'qu", ["ngu"] = "n'gu", ["gu"] = "'gu", ["v"] = "'v",
["bl"] = "'bl", ["pl"] = "'pl", ["br"] = "'br", ["pr"] = "'pr",
["dr"] = "'dr", ["tr"] = "'tr",
["gl"] = "'gl", ["cl"] = "'cl", ["gr"] = "'gr", ["cr"] = "'cr",
["fl"] = "'fl", ["fr"] = "'fr", ["ct"] = "c't", ["pt"] = "p't", ["gd"] = "g'd",
["sl"] = "s'l", ["sm"] = "s'm", ["sn"] = "s'n", ["su"] = "s'u",
["st"] = "s't", ["xt"] = "x't",
["spr"] = "s'pr", ["str"] = "s'tr", ["scr"] = "s'cr",
["spl"] = "s'pl", ["scl"] = "s'cl",
["nct"] = "nc't"
}
local word_rules_end = {
{"^ĭ", "j"},
-- {"g([ei])", "j%1"},
-- {"dĭ", "j"},
-- {"gĭ", "j"},
-- {"z", "j"},
{"ė", "e"},
{"ị", "i"},
{"ụ", "u"},
{"ĭ", "i"},
{"ŭ", "u"},
{"ei", "i"},
{"ii", "i"},
{"ee$", "ie"},
{"([aẹęeọǫou])(́?)e$", "%1%2i"},
}
local nasalized = {}
nasalized["It-W"] = {
{"[ẹęeēi](́?)[nm]", "ẽ%1"},
{"[ī](́?)[nm]", "ĩ%1"},
{"[ū](́?)[nm]", "ũ%1"},
{"[ọǫoōu](́?)[nm]", "õ%1"},
}
nasalized["E"] = {
{"[ẹęeēi](́?)[nm]", "ẽ%1"},
{"[ī](́?)[nm]", "ĩ%1"},
{"[uū](́?)[nm]", "ũ%1"},
{"[ọoō](́?)[nm]", "õ%1"},
}
nasalized["S"] = {
{"[eē](́?)[nm]", "ẽ%1"},
{"[iī](́?)[nm]", "ĩ%1"},
{"[uū](́?)[nm]", "ũ%1"},
{"[oō](́?)[nm]", "õ%1"},
}
local function nasalize_vowels(word, family)
word = mw.ustring.gsub(word, "[aā](́?)[nm]$", "ã%1")
word = mw.ustring.gsub(word, "[aā][nm](́?)s", "ã%1s")
for _, rule in ipairs(nasalized[family]) do
word = mw.ustring.gsub(word, rule[1] .. "$", rule[2])
word = mw.ustring.gsub(word, rule[1] .. "(́?)s", rule[2] .. "%2s")
end
return word
end
local function segment_word(word)
local phonemes = {}
while mw.ustring.len(word) > 0 do
local longestmatch = ""
for letter, ipa in pairs(dictionary) do
if mw.ustring.len(letter) > mw.ustring.len(longestmatch) and mw.ustring.sub(word, 1, mw.ustring.len(letter)) == letter then
longestmatch = letter
end
end
if mw.ustring.len(longestmatch) > 0 then
table.insert(phonemes, dictionary[longestmatch])
word = mw.ustring.sub(word, mw.ustring.len(longestmatch) + 1)
else
table.insert(phonemes, mw.ustring.sub(word, 1, 1))
word = mw.ustring.sub(word, 2)
end
end
return phonemes
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels[syll[i]] then
break
end
if syll[i] ~= "'" then
table.insert(consonants, syll[i])
end
end
return table.concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels[syll[i]] then
break
end
table.insert(consonants, 1, syll[i])
end
return table.concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels[syll[i]] then return syll[i] end
end
end
local function split_syllables(remainder)
local syllables = {}
local syll = {}
while #remainder > 0 do
local phoneme = table.remove(remainder, 1)
if phoneme == "'" then
if #syll > 0 then
table.insert(syllables, syll)
end
syll = {"'"}
elseif vowels[phoneme] then
table.insert(syll, phoneme)
table.insert(syllables, syll)
syll = {}
else
table.insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant
-- Add them to the last syllable
for _, phoneme in ipairs(syll) do
table.insert(syllables[#syllables], phoneme)
end
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if i > 1 then
local previous = syllables[i-1]
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets[onset]) do
table.insert(previous, table.remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel, and the current one begins with s + another consonant, or with gn, then shift it over
if get_coda(previous) == "" and ((current[1] == "s" and not vowels[current[2]]) or (current[1] == "g" and current[2] == "n")) then
table.insert(previous, table.remove(current, 1))
end
-- If there is no vowel at all in this syllable
if not get_vowel(current) then
for j=1,#current do
table.insert(syllables[i-1], table.remove(current, 1))
end
table.remove(syllables,i)
end
end
end
for i, syll in ipairs(syllables) do
local onset = get_onset(syll)
local coda = get_coda(syll)
if not (onset == "" or onsets[onset]) then
require("Module:debug").track("VL-noun/bad onset")
error("onset error:[" .. onset .. "]")
end
if not (coda == "" or codas[coda]) then
require("Module:debug").track("VL-noun/bad coda")
error("coda error:[" .. coda .. "]")
end
end
return syllables
end
local function detect_accent(syllables)
-- Manual override
for i = 1, #syllables do
for j = 1, #syllables[i] do
if syllables[i][j] == "'" then
table.remove(syllables[i], j)
return i
end
end
end
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables[#syllables - 1]
if mw.ustring.find(penult[#penult], "^[aeiou]$") then
local ult = syllables[#syllables]
if ult[2] and mw.ustring.find(ult[1] .. ult[2], "[bdg][lr]") then
return #syllables - 1
else
return #syllables - 2
end
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
end
return #syllables
end
local function place_accent(syllable)
-- Special case: i before a or o
new_syllable = mw.ustring.gsub(syllable, "i([aoāō])", "i%1́")
if syllable == new_syllable then
new_syllable = mw.ustring.gsub(syllable, "([aeẹęioọǫuāēīōūėịụ-])", "%1́")
end
return new_syllable
end
local function convert_word(word, vowel_pattern)
-- Prothetic i before s + consonant
if vowel_pattern == "It-W" then
word = mw.ustring.gsub(word, "^s([ptclmn])", "is%1")
end
-- do starting word-based rules
for _, rule in ipairs(word_rules_start) do
word = mw.ustring.gsub(word, rule[1], rule[2])
end
for k, v in pairs(stress_shift_rules) do
word = mw.ustring.gsub(word, k .. "'", v)
end
-- Double consonant stress shifts
for _, v in ipairs({"b", "c", "d", "f", "g", "l", "m", "n", "p", "r", "s", "t"}) do
word = mw.ustring.gsub(word, v .. v .. "'", v .. "'" .. "v")
end
local phonemes = segment_word(word)
local syllables = split_syllables(phonemes)
local accent = detect_accent(syllables)
-- Check antepenult for e, i > j (written i)
--[[local antepenult = syllables[#syllables - 2]
local penult = syllables[#syllables - 1]
if antepenult and penult then
if syllables[accent] == antepenult and mw.ustring.find(antepenult[#antepenult], "^[eēiī]$") and mw.ustring.find(penult[#penult], "^[aāoō]$") then
syllables[#syllables-2][#antepenult] = "ị"
accent = accent + 1
end
end]]--
for i, syll in ipairs(syllables) do
if syllables[i + 1] then
if mw.ustring.find(syll[#syll], "^[eēiī]$") and mw.ustring.find(syllables[i + 1][1], "^[aāoōuū]$") then
syll[#syll] = "ĭ"
if syllables[accent] == syll then
accent = accent + 1
end
end
end
end
for i, syll in ipairs(syllables) do
syll = table.concat(syll)
for i, rule in ipairs(vowel_patterns["all"]) do
syll = mw.ustring.gsub(syll, rule[1], rule[2])
end
for i, rule in ipairs(vowel_patterns[vowel_pattern]) do
syll = mw.ustring.gsub(syll, rule[1], rule[2])
end
--[[if i ~= accent then
syll = mw.ustring.gsub(syll, "ẹ", "e")
syll = mw.ustring.gsub(syll, "ọ", "o")
end]]
syllables[i] = (i == accent and place_accent(syll) or syll)
end
word = table.concat(syllables)
for _, rule in ipairs(word_rules_end) do
word = mw.ustring.gsub(word, rule[1], rule[2])
end
word = nasalize_vowels(word, vowel_pattern)
return word
end
function export.convert_words(words, vowel_pattern)
word_table = {}
for word in mw.ustring.gmatch(words, "%S+") do
table.insert(word_table, convert_word(word, vowel_pattern))
end
return "*" .. table.concat(word_table, " ")
end
return export