Module:nn-pronunciation
Jump to navigation
Jump to search
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local nonsyllabic = U(0x32F) -- inverted breve below
local retracted = U(0x320) -- minus sign below
local voiceless = U(0x325) -- combining ring below
local voiceless_above = U(0x30A) -- combining ring above
local habove = "ʰ"
local long = "ː"
local primary_stress = "ˈ"
local consonants = "bdðfghjklmnprstvxþ"
local consonant = "[" .. consonants .. "]"
local vowels = "aɛɪiʏyœɔou"
local vowel = "[" .. vowels .. "]+" .. "[" .. nonsyllabic .. "]?"
-- Phonemic WORK IN PROGRESS
data_m = {
["initial"] = {
["b"] = "b",
["d"] = "d",
["g"] = "ɡ",
["p"] = "pʰ",
["t"] = "tʰ",
["k"] = "kʰ",
-- ["f"] = "f",
["gj"] = "ʝ",
["kj"] = "ç",
["hv"] = "kv",
["þ"] = "θ"--[[ .. retracted]],
["hl"] = "l" .. voiceless,
["hn"] = "n" .. voiceless,
["hr"] = "r" .. voiceless,
["hj"] = "j",
[""] = "",
},
["internal"] = {
-- ["ð"] = "ð" .. retracted,
["b"] = "b",
["d"] = "d",
["x"] = "ks",
["f"] = "f",
["þ"] = "θ"--[[ .. retracted]],
[""] = "",
[""] = "",
[""] = "",
},
["vowels"] = {
["a"] = "ɐ̞",
["å"] = "o̞",
["è"] = "ɛ̝",
["e"] = "e",
["é"] = "e",
["ì"] = "ɪ",
["i"] = "i",
["í"] = "i",
["ỳ"] = "ʏ",
["y"] = "y",
["ý"] = "y",
["ò"] = "ɞ̞",
["o"] = "u̞",
["ó"] = "u̞",
["ù"] = "ʊ",
["u"] = "ʉ",
["ú"] = "ʉ",
["æ"] = "æ",
["ø"] = "ø",
["ø̀"] = "œ",
},
["before_ng"] = {
["a"] = "ɐ̞",
["e"] = "ɛ̝",
["u"] = "ʊ",
["i"] = "ɪ",
["y"] = "ʏ",
["ø"] = "œ",
},
["digraphs"] = {
["bb"] = "bb",
["dd"] = "dd",
["kj"] = "ç",
["ll"] = "ll",
["rn"] = "rn",
["rl"] = "rl",
["sl"] = "sl",
["sn"] = "sn",
["tn"] = "tn" .. voiceless,
["au"] = "œʊ" .. nonsyllabic,
["ei"] = "ɛ̝ɪ" .. nonsyllabic,
["øy"] = "œʏ" .. nonsyllabic,
},
["trigraphs"] = {
["mnd"] = "md",
["mnt"] = "m" .. voiceless .. "t",
},
["long"] = {
["a"] = "ɐ̞" .. long,
["ɛ"] = "ɛ̝" .. long,
},
}
-- Phonetic
data_t = {
["initial"] = {
["b"] = "b",
["d"] = "d",
["g"] = "ɡ",
["p"] = "pʰ",
["t"] = "tʰ",
["k"] = "kʰ",
["f"] = "f",
["gj"] = "ʝ",
["kj"] = "ç",
["hv"] = "kv",
["þ"] = "θ"--[[ .. retracted]],
["hl"] = "l" .. voiceless,
["hn"] = "n" .. voiceless,
["hr"] = "r" .. voiceless,
["hj"] = "j",
[""] = "",
},
["internal"] = {
-- ["ð"] = "ð" .. retracted,
["b"] = "b",
["d"] = "d",
["x"] = "ks",
["f"] = "f",
["þ"] = "θ"--[[ .. retracted]],
[""] = "",
[""] = "",
[""] = "",
},
["vowels"] = {
["a"] = "ɐ̞",
["å"] = "o̞",
["è"] = "ɛ̝",
["e"] = "e",
["é"] = "e",
["ì"] = "ɪ",
["i"] = "i",
["í"] = "i",
["ỳ"] = "ʏ",
["y"] = "y",
["ý"] = "y",
["ò"] = "ɞ̞",
["o"] = "u̞",
["ó"] = "u̞",
["ù"] = "ʊ",
["u"] = "ʉ",
["ú"] = "ʉ",
["æ"] = "æ",
["ø"] = "ø",
["ø̀"] = "œ",
},
["longvowels"] = {
["a"] = "ɐ̞" .. long,
["å"] = "o̞" .. long,
["è"] = "ɛ̝" .. long,
["e"] = "e" .. long,
["é"] = "e" .. long,
["i"] = "ɪ" .. long,
["y"] = "ɪ" .. long,
["í"] = "i" .. long,
["ý"] = "i" .. long,
["ò"] = "ɞ̞" .. long,
["o"] = "u̞" .. long,
["ó"] = "u̞" .. long,
["ù"] = "ʊ" .. long,
["u"] = "ʉ" .. long,
["ú"] = "ʉ" .. long,
["æ"] = "æ" .. long,
["ø̀"] = "œ" .. long,
["ö"] = "œ" .. long,
["ø"] = "ø" .. long,
},
["before_ng"] = {
["a"] = "ɐ̞",
["e"] = "ɛ̝",
["u"] = "ʊ",
["i"] = "ɪ",
["y"] = "ʏ",
["ø"] = "œ",
},
["digraphs"] = {
["bb"] = "b",
["dd"] = "d",
["kj"] = "ç",
["ll"] = "l",
["rn"] = "rn",
["rl"] = "rl",
["sl"] = "sl",
["sn"] = "sn",
["tn"] = "tn" .. voiceless,
["au"] = "œʊ" .. nonsyllabic,
["ei"] = "ɛ̝ɪ" .. nonsyllabic,
["øy"] = "œʏ" .. nonsyllabic,
["oy"] = "ɞ̞ʏ" .. nonsyllabic,
},
["trigraphs"] = {
["mnd"] = "md",
["mnt"] = "m" .. voiceless .. "t",
},
["long"] = {
["a"] = "ɐ̞" .. long,
["ɛ"] = "ɛ̝" .. long,
},
}
-- add data for preaspirated stops
for letter in gmatch("ptk", ".") do
data_t.digraphs[letter .. letter] = "ʰ" .. letter
data_t.digraphs[letter .. "n"] = "ʰ" .. letter .. "n"
data_m.digraphs[letter .. letter] = "h" .. letter
data_m.digraphs[letter .. "n"] = "h" .. letter .. "n"
end
-- Phonemic WORK IN PROGRESS
rules_m = {
[1] = {
["(" .. primary_stress .. consonant .. "*" .. vowel .. ")nn"]
=
"%1tn"--[[ .. voiceless]],
["(" .. vowel .. ")" .. "g" .. "([aʏðlr])"] = "%1ɣ%2",
["(" .. vowel .. ")" .. "g" .. "([ji])"] = "%1j%2",
["(" .. vowel .. ")" .. "[kg]" .. "([ts])"] = "%1x%2",
["(" .. vowel .. ")" .. "p" .. "([tsk])"] = "%1f%2",
["ng([ls])"] = "ŋ%1"
},
[2] = {
["(u" .. nonsyllabic .. "?)[vɣ]"] = "%1",
["g"] = "k",
},
[3] = {
["k(ʰ?[ɛiɪ])"] = "c%1",
["k(ʰ?ai)"] = "c%1",
["k(ʰ?[ɛiɪ])"] = "c%1",
["k(ʰ?ai)"] = "c%1",
["kj"] = "c",
["jj"] = "i" .. nonsyllabic .. "j"
},
[4] = {
["nk"] = "ŋk",
["kc"] = "çç",
}
}
-- Phonetic
rules_t = {
[1] = {
["(" .. consonant .. "*" .. vowel .. vowel .. ")nn"]
=
"%1tn".. voiceless,
["(" .. vowel .. ")" .. "g" .. "([aʏðlr])"] = "%1ɣ%2",
["(" .. vowel .. ")" .. "g" .. "([ji])"] = "%1j%2",
["(" .. vowel .. ")" .. "[kg]" .. "([ts])"] = "%1x%2",
["(" .. vowel .. ")" .. "p" .. "([tsk])"] = "%1f%2",
["ng([ls])"] = "ŋ%1"
},
[2] = {
["nn"]
=
"n".. long,
["(u" .. nonsyllabic .. "?)[vɣ]"] = "%1",
["g"] = "k",
},
[3] = {
["k(ʰ?[ɛiɪ])"] = "c%1",
["k(ʰ?ai)"] = "c%1",
["k(ʰ?[ɛiɪ])"] = "c%1",
["k(ʰ?ai)"] = "c%1",
["kj"] = "c",
["jj"] = "i" .. nonsyllabic .. "j"
},
[4] = {
["nk"] = "ŋk",
["kc"] = "c",
["pn"] = "pn" .. voiceless
}
}
-- mode = "t" for phonetic or "m" for phonemic
function export.toIPA(mode, term, accent)
if type(term) ~= "string" then
error('The function "toIPA" requires a string argument.')
end
local IPA = {}
if accent ~= "off" then
table.insert(IPA, primary_stress)
end
local working_string = mw.ustring.lower(term)
local firstletter = sub(working_string, 1, 1)
local firsttwoletters = sub(working_string, 1, 2)
if mode == "t" then
if find(firstletter, consonant) then
if data_t.initial[firsttwoletters] then
table.insert(IPA, data_t.initial[firsttwoletters])
working_string = sub(working_string, 3)
elseif data_t.initial[firstletter] then
table.insert(IPA, data_t.initial[firstletter])
working_string = sub(working_string, 2)
else
table.insert(IPA, firstletter)
working_string = sub(working_string, 2)
end
end
while mw.ustring.len(working_string) > 0 do
local letter = { sub(working_string, 1, 1), sub(working_string, 2, 3) }
local twoletters = { sub(working_string, 1, 2), sub(working_string, 3, 4) }
local threeletters = { sub(working_string, 1, 3), sub(working_string, 4, 5) }
if data_t.trigraphs[threeletters[1]] then
table.insert(IPA, data_t.trigraphs[threeletters[1]])
working_string = sub(working_string, 4)
elseif data_t.digraphs[twoletters[1]] then
table.insert(IPA, data_t.digraphs[twoletters[1]])
working_string = sub(working_string, 3)
elseif data_t.vowels[letter[1]] then
if data_t.before_ng[letter[1]] and ( letter[2] == "nk" or letter[2] == "ng" ) then
table.insert(IPA, data_t.before_ng[letter[1]])
elseif data_t.longvowels[letter[1]] and (not data_t.vowels[letter[2]]) and data_t.vowels[letter[3]] then
table.insert(IPA, data_t.longvowels[letter[1]])
else
table.insert(IPA, data_t.vowels[letter[1]])
end
working_string = sub(working_string, 2)
elseif data_t.internal[letter[1]] then
table.insert(IPA, data_t.internal[letter[1]])
working_string = sub(working_string, 2)
else
table.insert(IPA, letter[1])
working_string = sub(working_string, 2)
end
end
IPA = table.concat(IPA)
for ordering, set_of_rules in ipairs(rules_t) do
for regex, replacement in pairs(set_of_rules) do
IPA = gsub(IPA, regex, replacement)
end
end
elseif mode == "m" then
if find(firstletter, consonant) then
if data_m.initial[firsttwoletters] then
table.insert(IPA, data_m.initial[firsttwoletters])
working_string = sub(working_string, 3)
elseif data_m.initial[firstletter] then
table.insert(IPA, data_m.initial[firstletter])
working_string = sub(working_string, 2)
else
table.insert(IPA, firstletter)
working_string = sub(working_string, 2)
end
end
while mw.ustring.len(working_string) > 0 do
local letter = { sub(working_string, 1, 1), sub(working_string, 2, 3) }
local twoletters = { sub(working_string, 1, 2), sub(working_string, 3, 4) }
local threeletters = { sub(working_string, 1, 3), sub(working_string, 4, 5) }
if data_m.trigraphs[threeletters[1]] then
table.insert(IPA, data_m.trigraphs[threeletters[1]])
working_string = sub(working_string, 4)
elseif data_m.digraphs[twoletters[1]] then
table.insert(IPA, data_m.digraphs[twoletters[1]])
working_string = sub(working_string, 3)
elseif data_m.vowels[letter[1]] then
if data_m.before_ng[letter[1]] and ( letter[2] == "nk" or letter[2] == "ng" ) then
table.insert(IPA, data_m.before_ng[letter[1]])
else
table.insert(IPA, data_m.vowels[letter[1]])
end
working_string = sub(working_string, 2)
elseif data_m.internal[letter[1]] then
table.insert(IPA, data_m.internal[letter[1]])
working_string = sub(working_string, 2)
else
table.insert(IPA, letter[1])
working_string = sub(working_string, 2)
end
end
IPA = table.concat(IPA)
for ordering, set_of_rules in ipairs(rules_m) do
for regex, replacement in pairs(set_of_rules) do
IPA = gsub(IPA, regex, replacement)
end
end
end
IPA = gsub(IPA, "%-", "")
return IPA
end
-- Phonemic
function export.show_M(frame)
local params = {
[1] = {},
[2] = {}
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.text
local accent = args[2]
local ipa = export.toIPA("m", term, accent)
ipa = "/" .. ipa .. "/"
ipa = require("Module:IPA").format_IPA_full {
lang = require("Module:languages").getByCode("is"),
items = {{ pron = ipa }},
}
return ipa
end
-- Phonetic
function export.show_T(frame)
local params = {
[1] = {},
[2] = {}
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.text
local accent = args[2]
local ipa = export.toIPA("t", term, accent)
ipa = "[" .. ipa .. "]"
ipa = require("Module:IPA").format_IPA_full {
lang = require("Module:languages").getByCode("is"),
items = {{ pron = ipa }},
}
return ipa
end
return export