Module:mk-pronunciation
Appearance
- The following documentation is located at Module:mk-pronunciation/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module converts Macedonian orthography to a phonetic transcription in the International Phonetic Alphabet.
However, IT IS NOT FULLY AUTOMATIC. For words with irregular stress or other idiosyncrasies, MANUAL RESPELLINGS ARE REQUIRED.
DO NOT ADD IT TO MACEDONIAN WORDS MISSING A TRANSCRIPTION WITHOUT CHECKING IT OR IF YOU ARE NOT PROFICIENT ENOUGH IN MACEDONIAN TO KNOW THE CORRECT PRONUNCIATION.
Testcases
11 of 63 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
нананазад (nananazad) | naˈnanazat | naˈnanazat | |
Тласолтеотл (Tlasolteotl) | tɫasɔɫˈtɛɔtɫ̩ | tɫasɔɫˈtɛɔtɫ̩ | |
њутн (njutn) | ˈɲutn̩ | ˈɲutn̩ | |
беџ (bedž) | bɛt͡ʃ | bɛt͡ʃ | |
правци (pravci) | ˈpraft͡si | ˈpraft͡si | |
грозд (grozd) | ɡrɔst | ɡrɔst | |
надежта (nadežta) | ˈnadɛʃta | ˈnadɛʃta | |
бели (beli) | ˈbɛli | ˈbɛli | |
соседство (sosedstvo) | ˈsɔsɛtstvɔ | ˈsɔsɛtstvɔ | |
зима́ва (zimáva) | ziˈmava | ziˈmava | |
одва́j (odváj) | ɔˈdvaj | ɔˈdvaj | |
Мавританија (Mavritanija) | mavriˈtani(j)a | mavriˈtani(j)a | |
’рѓа (’rǵa) | ˈr̩ɟa | ˈr̩ɟa | |
бесчестен (besčesten) | ˈbɛʃt͡ʃɛstɛn | ˈbɛʃt͡ʃɛstɛn | |
бара (bara) | ˈbaɾa | ˈbaɾa | |
станбен (stanben) | ˈstambɛn | ˈstambɛn | |
конфузен (konfuzen) | ˈkɔɱfuzɛn | ˈkɔɱfuzɛn | |
рамка (ramka) | ˈramka | ˈramka | |
амфора (amfora) | ˈaɱfɔɾa | ˈaɱfɔɾa | |
емиграциски (emigraciski) | ɛmiˈɡrat͡siski | ɛmiˈɡrat͡siski | |
соучесништво (součesništvo) | sɔuˈt͡ʃɛsniʃtvɔ | sɔuˈt͡ʃɛsniʃtvɔ | |
подмножество (podmnožestvo) | pɔdˈmnɔʒɛstvɔ | pɔdˈmnɔʒɛstvɔ | |
грнчарство (grnčarstvo) | ˈɡr̩nt͡ʃarstvɔ | ˈɡr̩nt͡ʃarstvɔ | |
стокхолмски (stokholmski) | ˈstɔkxɔɫmski | ˈstɔkxɔɫmski | |
трамвајскиот (tramvajskiot) | traɱˈvajski(j)ɔt | traɱˈvajski(j)ɔt | |
одраниот (odraniot) | ɔˈdrani(j)ɔt | ɔˈdrani(j)ɔt | |
позлатува (pozlatuva) | pɔˈzɫatuva | pɔˈzɫatuva | |
остварува (ostvaruva) | ɔˈstvaɾuva | ɔˈstvaɾuva | |
дошколува (doškoluva) | dɔˈʃkɔɫuva | dɔʃˈkɔɫuva | |
потешкотија (poteškotija) | pɔtɛʃˈkɔti(j)a | pɔtɛʃˈkɔti(j)a | |
основање (osnovanje) | ɔˈsnɔvaɲɛ | ɔˈsnɔvaɲɛ | |
потковица (potkovica) | pɔtˈkɔvit͡sa | pɔtˈkɔvit͡sa | |
инјекција (injekcija) | inˈjɛkt͡si(j)a | inˈjɛkt͡si(j)a | |
отсјаите (otsjaite) | ɔtˈsjaitɛ | ɔtˈsjaitɛ | |
подморница (podmornica) | pɔdˈmɔrnit͡sa | pɔdˈmɔrnit͡sa | |
полудневниот (poludnevniot) | pɔɫuˈdnɛvni(j)ɔt | pɔɫuˈdnɛvni(j)ɔt | |
од играчка плачка (od igračka plačka) | ɔd ˈiɡrat͡ʃka ˈpɫat͡ʃka | ɔd ˈiɡrat͡ʃka ˈpɫat͡ʃka | |
од немај-каде (od nemaj-kade) | ɔd nɛˈmajkadɛ | ɔd ˈnɛmaj ˈkadɛ | |
од почит кон (od počit kon) | ɔt ˈpɔt͡ʃit kɔn | ɔt ˈpɔt͡ʃit kɔn | |
обновува (obnovuva) | ɔbˈnɔvuva | ɔbˈnɔvuva | |
облажува (oblažuva) | ɔˈbɫaʒuva | ɔˈbɫaʒuva | |
чувствителност (čuvstvitelnost) | t͡ʃufˈstvitɛɫnɔst | t͡ʃufˈstvitɛɫnɔst | |
конфли́кт (konflíkt) | kɔɱˈflikt | kɔɱˈflikt | |
комфорен (komforen) | ˈkɔɱfɔɾɛn | ˈkɔɱfɔɾɛn | |
бара преку леб погача (bara preku leb pogača) | ˈbaɾa ˈprɛku ˈlɛp ˈpɔɡat͡ʃa | ˈbaɾa ˈprɛku lɛp ˈpɔɡat͡ʃa | |
сѐ или ништо (sè ili ništo) | ˈsɛ ili ˈniʃtɔ | sɛ ili ˈniʃtɔ | |
сѐ уште (sè ušte) | ˈsɛ uʃtɛ | sɛ ˈuʃtɛ | |
илјадити (iljaditi) | iˈʎaditi | iˈʎaditi | |
Унгарија (Ungarija) | uŋˈɡaɾi(j)a | uŋˈɡaɾi(j)a | |
архиепископ (arhiepiskop) | arxiˈɛpiskɔp | arxiˈɛpiskɔp | |
комба́јн (kombájn) | kɔmˈbajn | kɔmˈbajn | |
мјаука (mjauka) | ˈmjauka | ˈmjauka | |
скејтборд (skejtbord) | ˈskɛjdbɔrt | ˈskɛjdbɔrt | |
жанр (žanr) | ˈʒanr̩ | ˈʒanr̩ | |
подредува (podreduva) | pɔdˈrɛduva | pɔˈdrɛduva | |
разликува (razlikuva) | razˈlikuva | raˈzlikuva | |
растворени (rastvoreni) | rasˈtvɔɾɛni | rasˈtvɔɾɛni | |
олеснување (olesnuvanje) | ɔlɛsˈnuvaɲɛ | ɔlɛˈsnuvaɲɛ | |
соткаено (sotkaeno) | sɔˈtkaɛnɔ | sɔtˈkaɛnɔ | |
повторливост (povtorlivost) | pɔˈftɔrlivɔst | pɔfˈtɔrlivɔst | |
од А до Ш (od A do Š) | ɔd ˈa dɔ ˈʃə | ɔd a dɔ ʃə | |
бездејствува (bezdejstvuva) | bɛzˈdɛjstvuva | bɛzˈdɛjstvuva | |
бошњачкиот (bošnjačkiot) | bɔʃˈɲat͡ʃki(j)ɔt | bɔʃˈɲat͡ʃki(j)ɔt |
local export = {}
local u = require("Module:string/char")
local rsubn = mw.ustring.gsub
local ulower = mw.ustring.lower
local m_syllables = require("Module:syllables")
local m_utils = require("Module:utilities")
local lang = require("Module:languages").getByCode("mk")
local AC = u(0x301)
local SYLLABIC = u(0x329)
local TIE = u(0x361)
local phonetic_chars_map = {
["а"] = "a",
["е"] = "ɛ", ["ѐ"] = "ɛ",
["и"] = "i", ["ѝ"] = "i",
["о"] = "ɔ",
["у"] = "u",
["б"] = "b",
["в"] = "v",
["г"] = "ɡ",
["д"] = "d",
["ѓ"] = "ɟ",
["ж"] = "ʒ",
["з"] = "z",
["ѕ"] = "d" .. TIE .. "z",
["ј"] = "j",
["к"] = "k",
["л"] = "ɫ",
["љ"] = "ʎ",
["м"] = "m",
["н"] = "n",
["њ"] = "ɲ",
["п"] = "p",
["р"] = "r",
["с"] = "s",
["т"] = "t",
["ќ"] = "c",
["ф"] = "f",
["х"] = "x",
["ц"] = "t" .. TIE .. "s",
["ч"] = "t" .. TIE .. "ʃ",
["џ"] = "d" .. TIE .. "ʒ",
["ш"] = "ʃ",
["’"] = "ə",
["‘"] = "ə",
[AC] = "ˈ",
["`"] = "ˈ",
["/"] = "ˈ",
}
local devoicing = {
['b'] = 'p', ['d'] = 't', ['ɟ'] = 'c', ['ɡ'] = 'k',
['z'] = 's', ['ʒ'] = 'ʃ',
['v'] = 'f', [TIE] = TIE
}
local vowel = "aɛiɔuə"
local vocalic = vowel .. SYLLABIC
local vocalic_c = "[" .. vocalic .. "]"
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
function export.toIPA(text)
text = mw.ustring.toNFC(ulower(text))
-- convert commas and en/en dashes to text foot boundaries
text = rsub(text, "%s*[,–—]%s*", " | ")
-- question mark or exclamation point in the middle of a sentence -> text foot boundary
text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")
text = rsub(text, "[!?]", "") -- eliminate remaining punctuation
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
-- Convert hyphens to spaces. FIXME: Prefixes and suffixes should be unstressed unless explicitly marked for stress.
text = rsub(text, "%-", " ")
-- canonicalize multiple spaces, which may have been introduced by hyphens.
text = canon_spaces(text)
-- Put # at word beginning and end and double ## at text/foot boundary beginning/end.
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
text = rsub(text, ".", phonetic_chars_map)
-- Syllabic sonorants
text = rsub(text, "# #m#", "# #mə#")
text = rsub(text, "#m# #", "#mə# #")
text = rsub(text, "# #n#", "# #nə#")
text = rsub(text, "#n# #", "#nə# #")
text = rsub(text, "# #ɲ#", "# #ɲə#")
text = rsub(text, "#ɲ# #", "#ɲə# #")
text = rsub(text, "# #r#", "# #rə#")
text = rsub(text, "#r# #", "#rə# #")
text = rsub(text, "# #ɫ#", "# #ɫə#")
text = rsub(text, "#ɫ# $", "#ɫə# #")
text = rsub(text, "# #l#", "# #lə#")
text = rsub(text, "#l# #", "#lə# #")
text = rsub(text, "# #ʎ#", "# #ʎə#")
text = rsub(text, "#ʎ# #", "#ʎə# #")
text = rsub(text, "# #j#", "# #jə#")
text = rsub(text, "#j# #", "#jə# #")
text = rsub_repeatedly(text, "([^" .. vocalic .. "ˈ])([rɫlʎj])([^" .. vocalic .. "])", "%1%2" .. SYLLABIC .. "%3")
text = rsub_repeatedly(text, "([^" .. vocalic .. "rɫlʎjˈ])([mnɲ])([^" .. vocalic .. "rɫlʎmnɲj])", "%1%2" .. SYLLABIC .. "%3")
text = rsub(text, "ər", "r" .. SYLLABIC)
-- Mark stress
text = rsub(text, "(#[^#ˈ ]*" .. vocalic_c .. ")([^#ˈ ]*" .. vocalic_c .. "[^#ˈ ]*" .. vocalic_c .. "[^#ˈ ]*#)", "%1ˈ%2")
text = rsub(text, "(#[^#ˈ ]*" .. vocalic_c .. ")([^#ˈ ]*" .. vocalic_c .. "[^#ˈ ]*#)", "%1ˈ%2")
text = rsub(text, "([szʃʒ]?[ptckbdɟɡfxmɱnɲ]?[mɱnɲv]?[rɫljʎ]?" .. vocalic_c .. ")ˈ", "ˈ%1")
text = rsub(text, "([td]" .. TIE .. "[szʃʒ]?)ˈ", "ˈ%1")
text = rsub(text, "#([^#aɛiɔuə" .. SYLLABIC .. " ]*)ˈ", "#ˈ%1")
text = rsub(text, "aˈst", "asˈt")
text = rsub(text, "ˈbm", "bˈm")
text = rsub(text, "ˈbn", "bˈn")
text = rsub(text, "ˈbv", "bˈv")
text = rsub(text, "ˈdm", "dˈm")
text = rsub(text, "ˈdɲ", "dˈɲ")
text = rsub(text, "ˈdvr", "dˈvr")
text = rsub(text, "ˈdvɫ", "dˈvɫ")
text = rsub(text, "ˈstm", "stˈm")
text = rsub(text, "ˈfn", "fˈn")
text = rsub(text, "ˈ[mɱn]v", "ɱˈv")
text = rsub(text, "[ɫl]ˈj", "ˈʎ")
text = rsub(text, "ˈzʎ", "zˈʎ")
text = rsub(text, "ˈbj", "bˈj")
text = rsub(text, "ˈdj", "dˈj")
text = rsub(text, "ˈnj", "nˈj")
text = rsub(text, "ˈnɫ", "nˈɫ")
text = rsub(text, "ˈnr", "nˈr")
text = rsub(text, "ˈzmj", "zˈmj")
text = rsub(text, "ˈzmr", "zˈmr")
text = rsub(text, "ˈzvr", "zˈvr")
text = rsub(text, "ˈsfr", "sˈfr")
text = rsub(text, "ˈʃx", "ʃˈx")
text = rsub(text, "ˈʃɲ", "ʃˈɲ")
text = rsub(text, "ˈʃk", "ʃˈk")
text = rsub(text, "ˈxn", "xˈn")
text = rsub(text, "ɛˈzd", "ɛzˈd")
text = rsub(text, "r̩ˈt͡ʃk", "r̩t͡ʃˈk")
text = rsub(text, "r̩ˈt͡sk", "r̩t͡sˈk")
text = rsub(text, "r̩ˈzɡ", "r̩zˈɡ")
text = rsub(text, "r̩ˈpn", "r̩pˈn")
text = rsub(text, "r̩ˈst", "r̩sˈt")
text = rsub(text, "aˈt͡sk", "at͡sˈk")
text = rsub(text, "ɛˈt͡sk", "ɛt͡sˈk")
text = rsub(text, "iˈt͡sk", "it͡sˈk")
text = rsub(text, "ɔˈt͡sk", "ɔt͡sˈk")
text = rsub(text, "uˈt͡sk", "ut͡sˈk")
text = rsub(text, "uˈʃm", "uʃˈm")
text = rsub(text, "iˈst", "isˈt")
text = rsub(text, "naˈji", "najˈi")
text = rsub(text, "#ˈiɫi#", "#ili#")
text = rsub(text, "#p#", "#pə#")
text = rsub(text, "#b#", "#bə#")
text = rsub(text, "#t#", "#tə#")
text = rsub(text, "#d#", "#də#")
text = rsub(text, "#c#", "#cə#")
text = rsub(text, "#ɟ#", "#ɟə#")
text = rsub(text, "#k#", "#kə#")
text = rsub(text, "#ɡ#", "#ɡə#")
text = rsub(text, "#f#", "#fə#")
text = rsub(text, "#v#", "#və#")
text = rsub(text, "#s#", "#sə#")
text = rsub(text, "#z#", "#zə#")
text = rsub(text, "#ʃ#", "#ʃə#")
text = rsub(text, "#ʒ#", "#ʒə#")
text = rsub(text, "#x#", "#xə#")
text = rsub(text, "#t͡s#", "#t͡sə#")
text = rsub(text, "#d͡z#", "#d͡zə#")
text = rsub(text, "#t͡ʃ#", "#t͡ʃə#")
text = rsub(text, "#d͡ʒ#", "#d͡ʒə#")
-- Palatalisation
text = rsub(text, "ɫ([iɛ])", "l%1")
text = rsub(text, "ɫ([j])", "ʎ")
-- Voicing assimilation
text = rsub(text, "([bdɟɡzʒv" .. TIE .. "]*)(ˈ?[ptcksʃfx])", function(a, b)
return rsub(a, '.', devoicing) .. b end)
text = rsub(text, "b##", "p##")
text = rsub(text, "d##", "t##")
text = rsub(text, "ɟ##", "c##")
text = rsub(text, "ɡ##", "k##")
text = rsub(text, "z##", "s##")
text = rsub(text, "ʒ##", "ʃ##")
text = rsub(text, "v##", "f##")
text = rsub(text, "b# #(ˈ?)([ptcksʃfx])", "p# #%1%2")
text = rsub(text, "b# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "b# #%1%2")
text = rsub(text, "d# #(ˈ?)([ptcksʃfx])", "t# #%1%2")
text = rsub(text, "d# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "d# #%1%2")
text = rsub(text, "ɟ# #(ˈ?)([ptcksʃfx])", "c# #%1%2")
text = rsub(text, "ɟ# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "ɟ# #%1%2")
text = rsub(text, "ɡ# #(ˈ?)([ptcksʃfx])", "k# #%1%2")
text = rsub(text, "ɡ# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "ɡ# #%1%2")
text = rsub(text, "z# #(ˈ?)([ptcksʃfx])", "s# #%1%2")
text = rsub(text, "z# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "z# #%1%2")
text = rsub(text, "ʒ# #(ˈ?)([ptcksʃfx])", "ʃ# #%1%2")
text = rsub(text, "ʒ#(ˈ?)([ptcksʃfx])", "ʃ#%1%2")
text = rsub(text, "ʒ# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "ʒ# #%1%2")
text = rsub(text, "v# #(ˈ?)([ptcksʃfx])", "f# #%1%2")
text = rsub(text, "v# #(ˈ?)([bdɟɡzʒvmɱnɲvrɫljʎ])", "v# #%1%2")
text = rsub(text, "(p)(ˈ?)([bdɟɡzʒ])", "b%2%3")
text = rsub(text, "(t)(ˈ?)([bdɟɡzʒ])", "d%2%3")
text = rsub(text, "(c)(ˈ?)([bdɟɡzʒ])", "ɟ%2%3")
text = rsub(text, "(k)(ˈ?)([bdɟɡzʒ])", "ɡ%2%3")
text = rsub(text, "(s)(ˈ?)([bdɟɡzʒ])", "z%2%3")
text = rsub(text, "(ʃ)(ˈ?)([bdɟɡzʒ])", "ʒ%2%3")
text = rsub(text, "zt##", "st##")
text = rsub(text, "ʒt##", "ʃt##")
text = rsub(text, "d͡ʃ", "t͡ʃ")
text = rsub(text, "t͡ʒ", "d͡ʒ")
-- Sibilant assimilation
text = rsub(text, "[sz](ˈ?[td]?" .. TIE .. "?)([ʃʒ])", "%2%1%2")
-- Nasal assimilation
text = rsub(text, "n([ɡkx]+)", "ŋ%1")
text = rsub(text, "nˈ([ɡkx]+)", "ŋˈ%1")
text = rsub(text, "n̩([ɡkx]+)", "ŋ̩%1")
text = rsub(text, "n̩ˈ([ɡkx]+)", "ŋ̩ˈ%1")
text = rsub(text, "n([bp]+)", "m%1")
text = rsub(text, "nˈ([bp]+)", "mˈ%1")
text = rsub(text, "n([cɟ]+)", "ɲ%1")
text = rsub(text, "nˈ([cɟ]+)", "ɲˈ%1")
text = rsub(text, "[nm]([fv]+)", "ɱ%1")
text = rsub(text, "[nm]ˈ([fv]+)", "ɱˈ%1")
-- Epenthesis
text = rsub(text, "(i)j([aɛɔu])", "%1(j)%2")
text = rsub(text, "(i)([aɛɔu])", "%1(j)%2")
text = rsub(text, "(iˈ)j([aɛɔu])", "%1j%2")
text = rsub(text, "(iˈ)([aɛɔu])", "%1%2")
-- /r/ allophony
text = rsub(text, "([aɛiɔuə])r", "%1ɾ")
text = rsub(text, "ɾ([^aɛiɔuə])", "r%1")
-- Strip hashes
text = rsub(text, "#", "")
return text
end
function assign_stresscats(syllables)
syllables = mw.ustring.gsub(syllables, ".*ˈ", "")
syllables = m_syllables.getVowels(syllables, lang)
if syllables == 1 then
table.insert(syllable_cats, "Macedonian oxytone terms")
elseif syllables == 2 then
table.insert(syllable_cats, "Macedonian paroxytone terms")
elseif syllables == 3 then
table.insert(syllable_cats, "Macedonian proparoxytone terms")
end
end
function export.show(frame)
local params = {
[1] = {},
["no_stress"] = {type = "boolean", default = false},
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.nsText == "Template" and "пример" or title.text
local IPA = export.toIPA(term)
syllable_cats = {}
if mw.ustring.find(IPA, " ") == nil and args.no_stress == false then
assign_stresscats(IPA)
end
IPA = "[" .. IPA .. "]"
IPA = require("Module:IPA").format_IPA_full { lang = lang, items = {{ pron = IPA }} }
return IPA .. m_utils.format_categories(syllable_cats, lang)
end
return export