Module:tyz-pron
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match
local u = require("Module:string/char")
local export = {}
--àằầèềìòồờùừỳ áắấéếíóốớúứý ảẳẩẻểỉỏổởủửỷ ãẵẫẽễĩõỗỡũữỹ ạặậẹệịọộợụựỵ a̱ă̱â̱e̱ê̱i̱o̱ô̱ơ̱u̱ư̱y̱
local tone_diacritics = { ["̀"] = 2, ["́"] = 3, ["̉"] = 4, ["̃"] = 5, ["̣"] = 6, ["̱"] = 7 }
local tone_contour = {
["tatd"] = { [1] = "˧˧", [2] = "˧˨", [3] = "˧˥", [4] = "˨˩˧", [5] = "˦ˀ˥", [6] = "˧˨ʔ", [7] = "˩˩" },
["tk"] = { [1] = "˦˥", [2] = "˩", [3] = "˦", [4] = "˨˦", [5] = "˦ˀ˥", [6] = "˩˧", [7] = "˧" },
}
local initial_ipa = {
["b"] = { "ɓ", "ɓ" },
["bh"] = { "p", "bʱ" },
["bhj"] = { "pʲ","bʱʲ" },
["bj"] = { "ɓʲ", "ɓʲ" },
["c"] = { "k", "k" },
["ch"] = { "c", "c" },
["d"] = { "z", "j" },
["dh"] = { "t", "dʱ" },
["đ"] = { "ɗ", "ɗ" },
["f"] = { "f", "f" },
["g"] = { "ɣ", "ɣ" },
["gh"] = { "k", "gʱ" },
["h"] = { "h", "h" },
["k"] = { "k", "k" },
["kh"] = { "kʰ", "kʰ" },
["l"] = { "l", "l" },
["m"] = { "m", "m" },
["mj"] = { "mʲ", "mʲ" },
["n"] = { "n", "n" },
["ng"] = { "ŋ", "ŋ" },
["ngh"] = { "ŋ", "ŋ" },
["nh"] = { "ɲ", "ɲ" },
["p"] = { "p", "p" },
["ph"] = { "pʰ", "pʰ" },
["phj"] = { "pʰʲ", "pʰʲ" },
["pj"] = { "pʲ", "pʲ" },
["q"] = { "k", "k" },
["qu"] = { "kw", "kw" },
["r"] = { "r", "r" },
["s"] = { "ʂ", "ʂ" },
["sl"]= { "ɬ", "θ" },
["t"] = { "t", "t" },
["th"] = { "tʰ", "tʰ" },
["tr"] = { "ʈ", "ʈ" },
["v"] = { "v", "w" },
["vh"] = { "f", "v" },
["x"] = { "s", "s" },
["z"] = { "ð", "ð"},
["zh"] = { "c", "z"},
[""] = { "ʔ", "ʔ" },
["-"] = { "", "" },
}
local final_ipa = {
["a"] = { "aː", "aː" },
["ac"] = { "aːk̚", "aːk̚" },
["ach"] = { "ajk̟̚", "ajk̟̚" },
["ai"] = { "aːj", "aːj" },
["am"] = { "aːm", "aːm" },
["an"] = { "aːn", "aːn" },
["ang"] = { "aːŋ", "aːŋ" },
["anh"] = { "ajŋ̟", "ajŋ̟" },
["ao"] = { "aːw", "aːw" },
["ap"] = { "aːp̚", "aːp̚" },
["at"] = { "aːt̚", "aːt̚" },
["au"] = { "aw", "aw" },
["ay"] = { "aj", "aj" },
["ăc"] = { "ak̚", "ak̚" },
["ăm"] = { "am", "am" },
["ăn"] = { "an", "an" },
["ăng"] = { "aŋ", "aŋ" },
["ăp"] = { "ap̚", "ap̚" },
["ăt"] = { "at̚", "at̚" },
["â"] = { "ə", "ə" },
["âc"] = { "ək̚", "ək̚" },
["âm"] = { "əm", "əm" },
["ân"] = { "ən", "ən" },
["âng"] = { "əŋ", "əŋ" },
["âp"] = { "əp̚", "əp̚" },
["ât"] = { "ət̚", "ət̚" },
["âu"] = { "əw", "əw" },
["âư"] = { "əɯ", "əɯ" },
["ây"] = { "əj", "əj" },
["e"] = { "ɛ", "ɛ" },
["ec"] = { "ɛk̚", "ɛk̚" },
["em"] = { "ɛm", "ɛm" },
["en"] = { "ɛn", "ɛn" },
["eng"] = { "ɛŋ", "ɛŋ" },
["eo"] = { "ɛw", "ɛw" },
["ep"] = { "ɛp̚", "ɛp̚" },
["et"] = { "ɛt̚", "ɛt̚" },
["ê"] = { "e", "e" },
["êc"] = { "ek̚", "ek̚" },
["êch"] = { "əjk̟̚", "əjk̟̚" },
["êm"] = { "em", "em" },
["ên"] = { "en", "en" },
["êng"] = { "eŋ", "eŋ" },
["ênh"] = { "əjŋ̟", "əjŋ̟" },
["êp"] = { "ep̚", "ep̚" },
["êt"] = { "et̚", "et̚" },
["êu"] = { "ew", "ew" },
["i"] = { "i", "i" },
["ia"] = { "iə", "iə" },
["ic"] = { "ïk̟̚", "ïk̟̚" },
["ich"] = { "ïk̟̚", "ïk̟̚" },
["iêc"] = { "iək̚", "iək̚" },
["iêm"] = { "iəm", "iəm" },
["iên"] = { "iən", "iən" },
["iêng"] = { "iəŋ", "iəŋ" },
["iêp"] = { "iəp̚", "iəp̚" },
["iêt"] = { "iət̚", "iət̚" },
["iêu"] = { "iəw", "iəw" },
["im"] = { "im", "im" },
["in"] = { "in", "in" },
["inh"] = { "ïŋ", "ïŋ" },
["ip"] = { "ip̚", "ip̚" },
["it"] = { "it̚", "it̚" },
["iu"] = { "iw", "iw" },
["o"] = { "ɔ", "ɔ" },
["oa"] = { "waː", "waː" },
["oac"] = { "waːk̚", "waːk̚" },
["oach"] = { "wajk̟̚", "wajk̟̚" },
["oai"] = { "waːj", "waːj" },
["oam"] = { "waːm", "waːm" },
["oan"] = { "waːn", "waːn" },
["oang"] = { "waːŋ", "waːŋ" },
["oanh"] = { "wajŋ̟", "wajŋ̟" },
["oao"] = { "waːw", "waːw" },
["oap"] = { "waːp̚", "waːp̚" },
["oat"] = { "waːt̚", "waːt̚" },
["oay"] = { "waj", "waj" },
["oă"] = { "wa", "wa" },
["oăc"] = { "wak̚", "wak̚" },
["oăm"] = { "wam", "wam" },
["oăn"] = { "wan", "wan" },
["oăng"] = { "waŋ", "waŋ" },
["oăt"] = { "wat̚", "wat̚" },
["oc"] = { "awk͡p̚", "awk͡p̚" },
["oe"] = { "wɛ", "wɛ" },
["oem"] = { "wɛm", "wɛm" },
["oen"] = { "wɛn", "wɛn" },
["oeo"] = { "wɛw", "wɛw" },
["oet"] = { "wɛt̚", "wɛt̚" },
["oi"] = { "ɔj", "ɔj" },
["om"] = { "ɔm", "ɔm" },
["on"] = { "ɔn", "ɔn" },
["ong"] = { "awŋ͡m", "awŋ͡m" },
["ooc"] = { "ɔk̚", "ɔk̚" },
["oong"] = { "ɔŋ", "ɔŋ" },
["op"] = { "ɔp̚", "ɔp̚" },
["ot"] = { "ɔt̚", "ɔt̚" },
["ô"] = { "o", "o" },
["ôc"] = { "əwk͡p̚", "əwk͡p̚" },
["ôi"] = { "oj", "oj" },
["ôm"] = { "om", "om" },
["ôn"] = { "on", "on" },
["ông"] = { "əwŋ͡m", "əwŋ͡m" },
["ôôc"] = { "ok̚", "ok̚" },
["ôông"] = { "oŋ", "oŋ" },
["ôp"] = { "op̚", "op̚" },
["ôt"] = { "ot̚", "ot̚" },
["ơ"] = { "əː", "əː" },
["ơi"] = { "əːj", "əːj" },
["ơm"] = { "əːm", "əːm" },
["ơn"] = { "əːn", "əːn" },
["ơng"] = { "əːŋ", "əːŋ" },
["ơp"] = { "əːp̚", "əːp̚" },
["ơt"] = { "əːt̚", "əːt̚" },
["u"] = { "u", "u" },
["ua"] = { "uə", "uə" },
["uac"] = { "waːk̚", "waːk̚" },
["uach"] = { "wajk̟̚", "wajk̟̚" },
["uai"] = { "waːj", "waːj" },
["uan"] = { "waːn", "waːn" },
["uang"] = { "waːŋ", "waːŋ" },
["uanh"] = { "wajŋ̟", "wajŋ̟"},
["uao"] = { "waːw", "waːw" },
["uap"] = { "waːp̚", "waːp̚" },
["uat"] = { "waːt̚", "waːt̚" },
["uau"] = { "waw", "waw" },
["uay"] = { "waj", "waj" },
["uă"] = { "wa", "wa" },
["uăc"] = { "wak̚", "wak̚" },
["uăm"] = { "wam", "wam" },
["uăn"] = { "wan", "wan" },
["uăng"] = { "waŋ", "waŋ" },
["uăp"] = { "wap̚", "wap̚" },
["uăt"] = { "wat̚", "wat̚" },
["uâ"] = { "wə", "wə" },
["uâc"] = { "wək̚", "wək̚" },
["uân"] = { "wən", "wən" },
["uâng"] = { "wəŋ", "wəŋ" },
["uât"] = { "wət̚", "wət̚" },
["uây"] = { "wəj", "wəj" },
["uc"] = { "ʊwk͡p̚", "ʊwk͡p̚" },
["ue"] = { "wɛ", "wɛ" },
["uec"] = {"wɛk̚", "wɛk̚"},
["uen"] = { "wɛn", "wɛn" },
["ueo"] = { "wɛw", "wɛw" },
["uep"] = { "wɛp̚", "wɛp̚" },
["uet"] = { "wɛt̚", "wɛt̚" },
["uê"] = { "we", "we" },
["uêch"] = { "wəjk̟̚", "wəjk̟̚" },
["uên"] = { "wen", "wen" },
["uênh"] = { "wəjŋ̟", "wəjŋ̟" },
["uêt"] = { "wet̚", "wet̚" },
["uêu"] = { "weu", "weu" },
["ui"] = { "uj", "uj" },
["uin"] = { "win", "win" },
["uit"] = { "wit̚", "wit̚" },
["um"] = { "um", "um" },
["un"] = { "un", "un" },
["ung"] = { "ʊwŋ͡m", "ʊwŋ͡m" },
["uô"] = { "uə", "uə" },
["uôc"] = { "uək̚", "uək̚" },
["uôi"] = { "uəj", "uəj" },
["uôm"] = { "uəm", "uəm" },
["uôn"] = { "uən", "uən" },
["uông"] = { "uəŋ", "uəŋ" },
["uôt"] = { "uət̚", "uət̚" },
["uơ"] = { "wəː", "wəː" },
["uơi"] = { "wəːj", "wəːj" },
["uơn"] = { "uən", "uən" },
["uơt"] = { "uət̚", "uət̚" },
["up"] = { "up̚", "up̚" },
["ut"] = { "ut̚", "ut̚" },
["uy"] = { "wi", "wi" },
["uya"] = { "wiə", "wiə" },
["uych"] = { "wïk̟̚", "wïk̟̚" },
["uyn"] = { "win", "win" },
["uich"] = { "wïk̟̚", "wïk̟̚" },
["uyê"] = { "wiə", "wiə" },
["uyên"] = { "wiən", "wiən" },
["uyênh"] = { "wiəŋ̟", "wəŋ" },
["uyêt"] = { "wiət̚", "wiət̚" },
["uynh"] = { "wïŋ̟", "wïŋ̟" },
["uyp"] = { "wip̚", "wip̚" },
["uyt"] = { "wit̚", "wit̚" },
["uyu"] = { "wiw", "wiw" },
["ư"] = { "ɨ", "ɨ" },
["ưa"] = { "ɨə", "ɨə" },
["ưc"] = { "ɨk̚", "ɨk̚" },
["ưi"] = { "ɨj", "ɨj" },
["ưm"] = { "ɨm", "ɨm" },
["ưn"] = { "ɨn", "ɨn" },
["ưng"] = { "ɨŋ", "ɨŋ" },
["ươc"] = { "ɨək̚", "ɨək̚" },
["ươi"] = { "ɨəj", "ɨəj" },
["ươm"] = { "ɨəm", "ɨəm" },
["ươn"] = { "ɨən", "ɨən" },
["ương"] = { "ɨəŋ", "ɨəŋ" },
["ươp"] = { "ɨəp̚", "ɨəp̚" },
["ươt"] = { "ɨət̚", "ɨət̚" },
["ươu"] = { "ɨəw", "ɨəw" },
["ưt"] = { "ɨt̚", "ɨt̚" },
["ưu"] = { "ɨw", "ɨw" },
["y"] = { "i", "i" },
["yêc"] = { "iək̚", "iək̚" },
["yêm"] = { "iəm", "iəm" },
["yên"] = { "iən", "iən" },
["yêng"] = { "iəŋ", "iəŋ" },
["yêp"] = { "iəp̚", "iəp̚" },
["yêt"] = { "iət̚", "iət̚" },
["yêu"] = { "iəw", "iəw" },
}
local voicing = {
["pj"] = "bhj",
["p"] = "bh",
["ch"] = "zh",
["t"] = "dh",
["c"] = "gh",
["k"] = "gh",
["f"] = "vh",
}
local varieties = {
["tatd"] = { "[[:en:w:Thạch An district|Thạch An]] – [[:en:w:Tràng Định district|Tràng Định]]", 1 },
["tk"] = { "[[:en:w:Trùng Khánh district|Trùng Khánh]]", 2 },
}
function export.ipa(frame)
local p, output = {}, { ["tatd"] = {}, ["tk"] = {} }
local output_text = {}
local pronunciations = { ["tatd"] = {}, ["tk"] = {} }
local pagename = gsub(gsub(mw.ustring.lower(mw.title.getCurrentTitle().subpageText), "%-", " "), "%,", "")
local args = frame:getParent().args
if args[1] then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and mw.ustring.lower(gsub(gsub(item, "%-", " "), "%,", "")) or nil)
end
else
table.insert(p, pagename)
end
for variety, _ in ipairs(varieties) do
table.insert(pronunciations[variety], (args[variety] ~= "") and args[variety] or nil)
end
for i, word in ipairs(p) do
local pronunciations = { ["tatd"] = {}, ["tk"] = {} }
for syllable in mw.text.gsplit(word, " ", true) do
local ipa = {}
local initial, final, tone = nil, nil, nil, nil
tone = 1
syllable = mw.ustring.toNFD(syllable)
for diac_pattern, tone_num in pairs(tone_diacritics) do
if match(syllable, diac_pattern) then
tone = tone_num
break
end
end
syllable = mw.ustring.toNFC(gsub(syllable, "[̣̱̀́̉̃]", ""))
initial = match(syllable, "^g[bcdđfgjklmnpqrstvx]+") or match(syllable, "^(g[hiy])[^cmnpt]")
or match(syllable, "^g") or match(syllable, "^[bcdđfghjklmnpqrstvxz]+") or ""
initial = (match(syllable, "^giê.") and syllable ~= "giên") and "d" or initial
initial = match(syllable, "qu$") and "qu" or initial
final = sub(syllable, mw.ustring.len(initial) + 1, -1)
for loc, location in pairs(varieties) do
local ipa, seq, detoned = {}, location[2], ""
for voc, _ in pairs(voicing) do
if match(tone,"[267]") and initial == voc then
initial = voicing[initial]
break
end
end
if initial_ipa[initial] then
table.insert(ipa, initial_ipa[initial][seq])
else
local initial_cluster = ""
initial = gsub(initial, "r$", "ŕ")
for cc in mw.ustring.gcodepoint(initial) do
local ch = u(cc)
initial_cluster = initial_cluster .. initial_ipa[ch][seq]
end
initial_cluster = gsub(initial_cluster, "([cgknpt]h)" or "[mbp]j" or "phj", function(digraph)
return initial_ipa[digraph][seq] end)
table.insert(ipa, initial_cluster)
end
if final_ipa[final] then
detoned = gsub(final_ipa[final][seq], "^([wu].+)", function(nucleus)
if initial .. final == "qua" then
nucleus = final_ipa["oa"][seq]
elseif initial .. final == "qui" then
nucleus = final_ipa["uy"][seq]
end
return nucleus end)
table.insert(ipa, detoned)
else
error(("Unrecognised final: \"%s\""):format(final))
end
if tone == 1 and match(final, "[chmngpt]") then
tone = 3
end
table.insert(ipa, tone_contour[loc][tone])
table.insert(pronunciations[loc], table.concat(ipa, ""))
end
end
for loc, location in pairs(varieties) do
table.insert(output[loc], table.concat(pronunciations[loc], " "))
end
end
for loc, location in pairs(varieties) do
if args[loc] ~= "-" then
if not args[loc] then
args[loc] = table.concat(output[loc], "], [")
local alternative = args[loc]
if alternative ~= args[loc] then args[loc] = args[loc] .. "] ~ [" .. alternative end
end
table.insert(output_text, location[2], "\n* (''" .. location[1] .. "'') " ..
"[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>([[w:Tày_language|key]])</sup>: <span class=\"IPA\">[" ..
args[loc] .. "]</span>")
else
table.insert(output_text, location[2], "")
end
end
if table.concat(p, "") ~= mw.ustring.lower(pagename) then
table.insert(output_text, #output_text + 1, "\n* ''Phonetic'': " .. gsub(table.concat(p, ", "), "ŕ", "R"))
end
return (gsub(table.concat(output_text, ""), "^\n(.)", "%1"))
end
return export