Module:se-IPA
Appearance
- The following documentation is located at Module:se-IPA/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module implements {{se-IPA}}
.
local export = {}
local lang = require("Module:languages").getByCode("se")
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local lower = m_str_utils.lower
local sub = m_str_utils.sub
local u = require("Module:string/char")
local BREVE = u(0x0306)
local letters_phonemes = {
["ˈ"] = "ː",
["a"] = "a", ["ạ"] = "a",
["á"] = "aː", ["áˈ"] = "aˑ", ["á" .. BREVE ] = "a",
["b"] = "b",
["c"] = "t͡s",
["č"] = "t͡ʃ",
["d"] = "d",
["đ"] = "ð",
["ẹ"] = "e", ["ē"] = "eː",
["g"] = "ɡ",
["ī"] = "iː",
["kh"] = "kʰ",
["ọ"] = "o", ["ō"] = "oː",
["ph"] = "pʰ",
["š"] = "ʃ",
["th"] = "tʰ",
["ŧ"] = "θ",
["ū"] = "uː",
["z"] = "d͡z",
["ž"] = "d͡ʒ",
["ea"] = "ea̯", ["eaˈ"] = "e̯a", ["ea" .. BREVE] = "ĕă̯",
["ie"] = "ie̯", ["ieˈ"] = "i̯e", ["ie" .. BREVE] = "ĭĕ̯",
["oa"] = "oɑ̯", ["oaˈ"] = "o̯ɑ", ["oa" .. BREVE] = "ŏɑ̯̆",
["uo"] = "uo̯", ["uoˈ"] = "u̯o", ["uo" .. BREVE] = "ŭŏ̯",
["dj"] = "ɟ",
["lj"] = "ʎ",
["nj"] = "ɲ",
["hj"] = "j̥", ["hjj"] = "j̥.j̥", ["hjˈj"] = "j̥ː.j̥",
["hl"] = "l̥", ["hll"] = "l̥.l̥", ["hlˈl"] = "l̥ː.l̥",
["hm"] = "m̥", ["hmm"] = "m̥.m̥", ["hmˈm"] = "m̥ː.m̥",
["hn"] = "n̥", ["hnn"] = "n̥.n̥", ["hnˈn"] = "n̥ː.n̥",
["hr"] = "r̥", ["hrr"] = "r̥.r̥", ["hrˈr"] = "r̥ː.r̥",
}
-- This adds letters_phonemes["e"] = "e", letters_phonemes["i"] = "i", etc.
for letter in gmatch("efhijklmnŋoprstuv", ".") do
letters_phonemes[letter] = letter
end
-- Preaspirated
for letter in gmatch("ptcčk", ".") do
letters_phonemes["h" .. letter] = "h" .. letters_phonemes[letter]
letters_phonemes["h" .. letter .. letter] = "hː" .. letters_phonemes[letter]
end
local function get_phoneme(remainder)
-- Find the longest string of letters that matches a recognised sequence in the list
local longestmatch = ""
for letter, _ in pairs(letters_phonemes) do
if sub(remainder, 1, len(letter)) == letter and len(letter) > len(longestmatch) then
longestmatch = letter
end
end
if len(longestmatch) > 0 then
return longestmatch, sub(remainder, len(longestmatch) + 1)
else
return sub(remainder, 1, 1), sub(remainder, 2)
end
end
local function get_syllable(remainder)
local syll = {cons = {}, vowel = ""}
local cons
while find(remainder, "^([^aạáeẹēiīoọōuū]+)") do
cons, remainder = get_phoneme(remainder)
if cons == "nˈnj" then
require("Module:debug").track("se-IPA/nnj")
end
if cons == "ˈ" then
syll.cons.quantity = 3
else
if cons == "dj" or cons == "lj" then
if syll.cons[#syll.cons] == string.sub(cons, 1, 1) then
syll.cons[#syll.cons] = cons
syll.cons.quantity = 3
else
table.insert(syll.cons, cons)
end
elseif cons == "nj" and syll.cons[#syll.cons] == "n" then
syll.cons[#syll.cons] = "nj"
end
table.insert(syll.cons, cons)
end
end
if find(remainder, "^([aạáeẹēiīoọōuū]+)") then
syll.vowel, remainder = get_phoneme(remainder)
end
if remainder == "" then
remainder = nil
end
-- Determine consonant quantity
if not syll.cons.quantity then
if not syll.cons[2] then
syll.cons.quantity = 1
else
if find(syll.cons[#syll.cons], "(.)%1$") or (syll.cons[#syll.cons] == syll.cons[#syll.cons - 1] and not find(syll.cons[#syll.cons], "^[bdgzž]$")) or (syll.cons[#syll.cons - 1] == "p" and syll.cons[#syll.cons] == "m") or (syll.cons[#syll.cons - 1] == "t" and syll.cons[#syll.cons] == "n") or (syll.cons[#syll.cons - 1] == "t" and syll.cons[#syll.cons] == "nj") or (syll.cons[#syll.cons - 1] == "k" and syll.cons[#syll.cons] == "ŋ") then
syll.cons.quantity = 2
else
syll.cons.quantity = 3
end
end
end
return syll, remainder
end
-- Split the word into syllables of C(C)V shape
local function split_syllables(remainder)
remainder = lower(remainder)
remainder = gsub(remainder, "([aáeēiīoōuū])i", "%1j")
local syllables = {}
local syll
while remainder do
syll, remainder = get_syllable(remainder)
table.insert(syllables, syll)
end
syllables.count = #syllables
if syllables[#syllables].vowel == "" then
syllables.count = syllables.count - 1
end
return syllables
end
local function shorten(vowel)
vowel = gsub(vowel, "^[ēīōū]$", {["ē"] = "e", ["ī"] = "i", ["ō"] = "o", ["ū"] = "u"})
for _, v in ipairs({"á", "ea", "ie", "oa", "uo"}) do
vowel = gsub(vowel, v, v .. BREVE)
end
return vowel
end
local function shift(vowel)
for _, v in ipairs({"á", "ea", "ie", "oa", "uo"}) do
vowel = gsub(vowel, v, v .. "ˈ")
end
return vowel
end
local function lengthen(vowel)
vowel = gsub(vowel, "^[eiou]$", {["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū"})
vowel = gsub(vowel, BREVE, "")
return vowel
end
-- Determine whether long vowels should be shortened before certain consonants
local function should_shorten(syll, nextsyll)
if nextsyll.cons[1] then
if find(nextsyll.cons[1], "^h([ptcčk])%1$") then
-- Long preaspirate
return true
elseif find(nextsyll.cons[1], "^([đflmnŋrsšv])ˈ%1$") then
-- Overlong vowel
return true
elseif (syll.vowel == "ie" or syll.vowel == "uo") and find(nextsyll.vowel, "^[áīū]$") then
if find(nextsyll.cons[1], "^([bcčdgkptzž])%1$") then
-- Geminate stop
return true
elseif nextsyll.cons[1] == "pm" or nextsyll.cons[1] == "tn" or nextsyll.cons[1] == "tnj" or nextsyll.cons[1] == "kŋ" then
-- Glottalised nasal
return true
elseif nextsyll.cons[2] and not find(nextsyll.cons[2], "^h[ptcčk]$") then
-- Clusters, except when the second element is a strong-grade preaspirate
return true
end
elseif (syll.vowel == "ie" or syll.vowel == "uo") and nextsyll.vowel == "a" then
if find(nextsyll.cons[1], "^([bdgzž])%1$") then
-- Geminate voiced stop
return true
elseif nextsyll.cons[2] and not find(nextsyll.cons[2], "(.)%1$") and not find(nextsyll.cons[2], "^h[ptcčk]$") and not (nextsyll.cons[2] == "pm" or nextsyll.cons[2] == "tn" or nextsyll.cons[2] == "tnj" or nextsyll.cons[2] == "kŋ") then
-- Clusters, except when the second element is long, or a preaspirate, or a preglottalised nasal
return true
end
end
end
return false
end
local function convert_spelling(syllables)
local foot = 0
for i, syll in ipairs(syllables) do
if syll.vowel == "" then
if syll.cons[#syll.cons] == "t" then
syll.cons[#syll.cons] = "ht"
elseif syll.cons[#syll.cons] == "d" then
syll.cons[#syll.cons] = "t"
end
break
end
local nextsyll = syllables[i + 1] or {cons = {}, vowel = ""}
foot = foot + 1
if foot == 3 and i ~= syllables.count then
foot = 1
end
-- Make i and u long in even syllables
if foot == 2 and (syll.vowel == "i" or syll.vowel == "u") and nextsyll.cons[1] ~= "j" then
syll.vowel = lengthen(syll.vowel)
end
if #syll.cons == 1 then
if foot == 1 then
-- Postaspiration
syll.cons[1] = gsub(syll.cons[1], "^([kpt])$", "%1h")
elseif foot == 3 then
-- d is đ between two unstressed vowels
syll.cons[1] = gsub(syll.cons[1], "d", "đ")
end
elseif #syll.cons > 1 then
if syll.cons[#syll.cons] == syll.cons[#syll.cons - 1] and syll.cons[#syll.cons - 2] and find(syll.cons[#syll.cons - 2], "[cčkpsšt]$") then
-- Ungeminate last consonant after voiceless
syll.cons[#syll.cons] = nil
elseif find(syll.cons[#syll.cons], "[cčkpsšt]$") then
-- Ungeminate last consonant after voiceless
syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "(.)%1$", "%1")
else
-- Preaspirate final voiceless consonant after voiced
syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "^([cčkpt])$", "h%1")
syll.cons[#syll.cons] = gsub(syll.cons[#syll.cons], "^([cčkpt])%1$", "h%1%1")
end
-- Devoice final geminates
if syll.cons[#syll.cons] == "bb" then
syll.cons[#syll.cons] = "pp"
elseif syll.cons[#syll.cons] == "dd" then
syll.cons[#syll.cons] = "tt"
elseif syll.cons[#syll.cons] == "gg" then
syll.cons[#syll.cons] = "kk"
elseif syll.cons[#syll.cons] == "zz" then
syll.cons[#syll.cons] = "cc"
elseif syll.cons[#syll.cons] == "žž" then
syll.cons[#syll.cons] = "čč"
end
end
-- Devoice remaining single voiced consonants
for j, cons in ipairs(syll.cons) do
if cons == "b" and syll.cons[j - 1] ~= "b" and (j ~= 1 or syll.cons[2] ~= "b" and syll.cons[2] ~= "m") then
syll.cons[j] = "p"
elseif cons == "d" and syll.cons[j - 1] ~= "d" and (j ~= 1 or syll.cons[2] ~= "d" and syll.cons[2] ~= "n" and syll.cons[2] ~= "nj") then
syll.cons[j] = "t"
elseif cons == "g" and syll.cons[j - 1] ~= "g" and (j ~= 1 or syll.cons[2] ~= "g" and syll.cons[2] ~= "ŋ") then
syll.cons[j] = "k"
elseif cons == "z" and syll.cons[j - 1] ~= "z" and (j ~= 1 or syll.cons[2] ~= "z") then
syll.cons[j] = "c"
elseif cons == "ž" and syll.cons[j - 1] ~= "ž" and (j ~= 1 or syll.cons[2] ~= "ž") then
syll.cons[j] = "č"
end
end
-- Regularise divergent spellings in clusters
--if #syll.cons > 2 then
-- error("Clusters with more than 2 consonants are not yet supported.")
--end
if foot == 2 and syll.cons.quantity == 3 then
-- Lengthen initial sonorant in quantity 3
table.insert(syll.cons, 2, "ˈ")
end
-- Secondary stress
if foot == 1 and i > 1 then
if #syll.cons == 1 then
table.insert(syll.cons, 1, "ˌ")
elseif #syll.cons == 2 then
table.insert(syll.cons, 2, "ˌ")
end
end
end
-- This needs to be a separate pass because otherwise unstressed ī and ū won't have been lengthened yet
for i, syll in ipairs(syllables) do
local nextsyll = syllables[i + 1] or {cons = {}, vowel = ""}
-- if should_shorten(syll, nextsyll) then
-- syll.vowel = shorten(syll.vowel)
if find(nextsyll.vowel, "^[ạẹọ]$") then
syll.vowel = shift(syll.vowel)
end
end
end
-- Dialect-specific conversions
local function dialect(syllables)
for i, syll in ipairs(syllables) do
-- Western Finnmark dialect
if syll.cons[1] then
if syll.cons[#syll.cons] == "ŋ" then
syll.cons[#syll.cons] = "nj"
if syll.cons[#syll.cons - 1] == "ˈ" then
if syll.cons[#syll.cons - 2] then
syll.cons[#syll.cons - 2] = gsub(syll.cons[#syll.cons - 2], "^[gkŋ]$", {["g"] = "d", ["k"] = "t", ["ŋ"] = "nj"})
end
else
if syll.cons[#syll.cons - 1] then
syll.cons[#syll.cons - 1] = gsub(syll.cons[#syll.cons - 1], "^[gk]$", {["g"] = "d", ["k"] = "t", ["ŋ"] = "nj"})
end
end
end
end
end
end
-- Convert word to IPA
local function to_IPA(syllables)
for i, syll in ipairs(syllables) do
for j, cons in ipairs(syll.cons) do
if syll.vowel == "" and cons == "ht" then
syll.cons[j] = "h(t)"
elseif letters_phonemes[cons] then
-- Drop the final part after the tie bar
if string.find(letters_phonemes[cons], "͡", nil, true) and syll.cons[j] == syll.cons[j + (syll.cons[j + 1] == "ˈ" and 2 or 1)] then
syll.cons[j] = gsub(letters_phonemes[cons], "͡.*$", "")
else
syll.cons[j] = letters_phonemes[cons]
end
end
end
syll.vowel = letters_phonemes[syll.vowel] or syll.vowel
syllables[i] = table.concat(syll.cons) .. syll.vowel
end
return "ˈ" .. table.concat(syllables)
end
function export.IPA(frame)
local params = {
[1] = {default = mw.title.getCurrentTitle().text},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local syllables = split_syllables(args[1])
convert_spelling(syllables)
dialect(syllables)
return
require("Module:accent qualifier").format_qualifiers(lang, {"Kautokeino"}) .. " " ..
require("Module:IPA").format_IPA_full { lang = lang, items = {{pron = "/" .. to_IPA(syllables) .. "/"}} } ..
require("Module:utilities").format_categories(lang:getCanonicalName() .. " " .. tostring(syllables.count) .. "-syllable words", lang)
end
return export