Module:User:AmazingJus/af
Jump to navigation
Jump to search
- The following documentation is located at Module:User:AmazingJus/af/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
49 of 98 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
![]() | Afrika | A‧fri‧ka | A‧fri‧ka |
![]() | Afrikaans | A‧fri‧kaans | A‧fri‧kaans |
![]() | Afrikaner | A‧fri‧ka‧ner | A‧fri‧ka‧ner |
![]() | Amerikaner | A‧me‧ri‧ka‧ner | A‧me‧ri‧ka‧ner |
![]() | André | An‧dré | An‧dré |
![]() | asyn | a‧syn | a‧syn |
![]() | belangrik | be‧lang‧rik | be‧lang‧rik |
![]() | berg | berg | berg |
![]() | berge | ber‧ge | ber‧ge |
![]() | berg+reeks | berg‧reeks | berg‧reeks |
![]() | bos+bedryf | bos‧be‧dryf | bos‧be‧dryf |
![]() | beskou | be‧skou | be‧skou |
![]() | beter | be‧ter | be‧ter |
![]() | beton | be‧ton | be‧ton |
![]() | betoon | be‧toon | be‧toon |
![]() | Botha | Bo‧tha | Bo‧tha |
![]() | braai | braai | braai |
![]() | Coetzee | Coet‧zee | Coet‧zee |
![]() | Coetzer | Coet‧zer | Coet‧zer |
![]() | dokumentasie | do‧ku‧men‧ta‧sie | do‧ku‧men‧ta‧sie |
![]() | du Plessis | du Ples‧sis | du Ples‧sis |
![]() | eggo | eg‧go | eg‧go |
![]() | feste | fes‧te | fes‧te |
![]() | geëet | ge‧eet | ge‧eet |
![]() | gegee | ge‧gee | ge‧gee |
![]() | ghitaar | ghi‧taar | ghi‧taar |
![]() | hondjie | hon‧djie | hon‧djie |
![]() | Jean Pierre | Jean Pierre | Je‧an Pier‧re |
![]() | Johannesburg | Jo‧han‧nes‧burg | Jo‧han‧nes‧burg |
![]() | karretjie | kar‧re‧tjie | kar‧re‧tjie |
![]() | klu[b] | klub | klub |
![]() | le Gran.ge | le Gran‧ge | le Gran‧ge |
![]() | Macedonië | Ma‧ce‧do‧ni‧e | Ma‧ce‧do‧ni‧e |
![]() | Nortje | Nor‧tje | Nor‧tje |
![]() | 'n | 'n | 'n |
![]() | onweer | on‧weer | on‧weer |
![]() | omstandigheid | om‧stan‧dig‧heid | om‧stan‧di‧gheid |
![]() | Paraguay | Pa‧ra‧guay | Pa‧ra‧gu‧a‧y |
![]() | Pretoria | Pre‧to‧ri‧a | Pre‧to‧ri‧a |
![]() | Schalk | Schalk | Schalk |
![]() | sjokolade | sjo‧ko‧la‧de | sjo‧ko‧la‧de |
![]() | s'n | s'n | s'n |
![]() | spieël | spie‧el | spie‧el |
![]() | Suid-Afrika | Suid-‧A‧fri‧ka | Suid-‧A‧fri‧ka |
![]() | vanaand | va‧naand | va‧naand |
![]() | Venesië | Ve‧ne‧si‧e | Ve‧ne‧si‧e |
![]() | vinger | ving‧er | ving‧er |
![]() | wîe | wî‧e | wî‧e |
![]() | zero | ze‧ro | ze‧ro |
Text | Expected | Actual | |
---|---|---|---|
![]() | Afrika | ˈɑː.fri.ka | ɑː.fri.kɑː |
![]() | Afrikaans | ˌa.friˈkɑ̃ːs, ˌa.friˈkɑːns | ɑː.fri.kɑːns |
![]() | Afrikaner | ˌa.friˈkɑː.nər | ɑː.fri.kɑː.nɛr |
![]() | Amerikaner | aˌmɪə̯.riˈkɑː.nər | ɑː.mɪə̯.ri.kɑː.nɛr |
![]() | André | ˈan.drəɪ̯ | an.dré |
![]() | asyn | aˈsəɪ̯n | ɑː.səɪ̯n |
![]() | belangrik | bəˈlaŋ.rək | be>.laŋ.rək |
![]() | berg | ˈbɛrχ | be>rχ |
![]() | berge | ˈbɛr.ɡə | be>r.ɡɪə̯ |
![]() | berg+reeks | ˈbɛrχ.rɪə̯ks | be>rχ.rɪə̯ks |
![]() | bos+bedryf | ˈbɔs.bəˌdrəɪ̯f | bɔs.bɪə̯.drəɪ̯f |
![]() | beskou | bəˈskœʊ̯ | be>.skœʊ̯ |
![]() | beter | ˈbɪə̯.tər | be>.tɛr |
![]() | beton | bəˈtɔn | be>.tɔn |
![]() | betoon | bəˈtʊə̯n | be>.tʊə̯n |
![]() | Botha | ˈbʊə̯.ta | bʊə̯.tɑː |
![]() | braai | brɑːɪ̯ | brɑːi |
![]() | Coetzee | kutˈseə̯ | kut.zɪə̯ |
![]() | Coetzer | ˈkut.sər | kut.zɛr |
![]() | dokumentasie | ˌdɔ.kju.mɛnˈtɑː.si, ˌdɔ.ky.mɛnˈtɑː.si | dʊə̯.ky.mɛn.tɑː.si |
![]() | du Plessis | dy.pləˈsi | dy plɛ.səs |
![]() | eggo | ˈɛ.χu | e.χu |
![]() | feste | ˈfɛs.tə | fɛs.tɪə̯ |
![]() | geëet | χəˈɪə̯t | χe>.ɪə̯t |
![]() | gegee | χəˈχɪə̯ | χe>.χɪə̯ |
![]() | ghitaar | ɡiˈtɑːr | ɡi.tɑːr |
![]() | hondjie | ˈɦœi̯ɲ.ci | ɦoŋ.ki |
![]() | Jean Pierre | anˈpiːr | jɪə̯.an pi.rɪə̯ |
![]() | Johannesburg | jʊə̯ˈɦa.nəsˌbœrχ | jʊə̯.ɦa.nɛs.bœrχ |
![]() | karretjie | ˈka.rəi̯.ci | ka.rɪə̯.ki |
![]() | klu[b] | klab, klœb | klub |
![]() | le Gran.ge | ləˈχran.si | lɪə̯ χran.χɪə̯ |
![]() | Macedonië | ˌma.səˈdʊə̯.ni.ə | mɑː.sɪə̯.dʊə̯.ni.ɪə̯ |
![]() | Nortje | nɔrˈkɪə̯ | nɔr.ʧɪə̯ |
![]() | 'n | ə(n) | ə(n) |
![]() | onweer | ˈɔn.vɪə̯r | ɔn.vɪə̯r |
![]() | omstandigheid | ɔmˈstan.dəχˌɦəɪ̯t | ɔm>.stan.di.ɡəɪ̯d |
![]() | Paraguay | ˈpa.ra.ɡwaɪ̯ | pɑː.rɑː.χy.ɑː.əɪ̯ |
![]() | Pretoria | prəˈtʊə̯.ri.a | prɪə̯.tʊə̯.ri.ɑː |
![]() | Schalk | skalk | skalk |
![]() | sjokolade | ˌʃɔ.kɔˈlɑː.də | ʃʊə̯.kʊə̯.lɑː.dɪə̯ |
![]() | s'n | sən | sən |
![]() | spieël | spiːl | spi.ɛl |
![]() | Suid-Afrika | səɪ̯tˈɑː.fri.ka | suɪ̯d-.ɑː.fri.kɑː |
![]() | vanaand | fəˈnɑːnt | vɑː.nɑːnd |
![]() | Venesië | vəˈniː.si.ə | vɪə̯.nɪə̯.si.ɪə̯ |
![]() | vinger | ˈfəŋ.ər | viŋ.ɛr |
![]() | wîe | ˈvəː.(ɦ)ə | vəː.ɪə̯ |
![]() | zero | ˈzɪə̯.ru | zɪə̯.ru |
local export = {}
local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local str = require("Module:string")
local tbl = require("Module:table")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- list of constants
local GR = u(0x0300) -- grave
local AC = u(0x0301) -- acute
local CR = u(0x0302) -- circumflex
local DR = u(0x0308) -- diaresis
local accents = GR .. AC .. CR .. DR
local vowels = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local syll_boundary = "‧#"
-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
["aai"] = "ɑːɪ̯",
["eeu"] = "iʊ̯",
["ieu"] = "iʊ̯",
["oei"] = "uɪ̯",
["ooi"] = "oːɪ̯",
["aa"] = "ɑː",
["ae"] = "ɑː",
["ai"] = "aɪ̯",
["au"] = "œʊ̯",
["ee"] = "ɪə̯",
["ei"] = "əɪ̯",
["eu"] = "iʊ̯",
["ie"] = "į", -- temporary value
["oe"] = "ů", -- temporary value
["oi"] = "ɔɪ̯",
["oo"] = "ʊə̯",
["ou"] = "œʊ̯",
["ui"] = "uɪ̯",
["uu"] = "ü" -- temporary value
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)
-- list of various grapheme sets
local sets = {
["vowel_length"] = { -- long-short vowels
["a"] = {"a", "ɑː"},
["e"] = {"ɛ", "ɪə̯"},
["i"] = {"ə", "i"},
["o"] = {"ɔ", "ʊə̯"},
["u"] = {"œ", "y"}
},
["cons_voice"] = { -- voiced/voiceless consonants
{"b", "p"},
{"d", "t"},
{"ʤ", "ʧ"},
{"ɡ", "k"},
{"v", "f"},
{"z", "s"},
{"ʒ", "ʃ"},
}
}
-- list of defined affixes
local affixes = {
["prefixes"] = { -- prefixes
"aan",
"agter",
"be",
"deur",
"er",
"ge",
"her",
"om",
"ont",
"onder",
"ver",
"voor"
},
["suffixes"] = { -- suffixes
"agtig",
"baar",
"dom",
"end",
"heid",
"lik",
"loos",
"nis",
"sel",
"skap",
}
}
-- sort prefixes and suffixes in ascending order
table.sort(affixes.prefixes, function(a, b) return len(a) < len(b) end)
table.sort(affixes.suffixes, function(a, b) return len(a) < len(b) end)
-- list of unstressed words
local unstressed = {
"die",
"dit",
"is",
"nie",
"'n"
}
-- list of respelling substitutions
local subs = {
-- 'N
{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise
-- CH
{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
{"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
{"ch", "k", "-"}, -- otherwise /k/
-- NG
{"ng", "ŋ", "-"}, -- pronounced /ŋ/
-- SH/SJ
{"s[hj]", "ʃ", "-"}, -- pronounced /ʃ/
-- DJ/TJ
{"[dt]jie", "kį", "-"}, -- suffix "djie"/"tjie" is pronounced /-ci/
{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/
-- GH
{"gh", "ɡ", "-"}, -- pronounced /ɡ/
-- C
{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
{"c", "k", "-"}, -- otherwise /k/
-- G
{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
{"g", "χ", "-"}, -- otherwise /χ/
{"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/
-- V
{"v", "f", "af"}, -- pronounced /f/ in native words
-- W
{"w", "w", "en"}, -- pronounced /w/ in english loans
{"w", "v", "-"}, -- otherwise /v/
-- EAU
{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in French loans
-- OI
{"oi", "wA", "fr"}, -- pronounced /wa/ in French loans
-- X
{"#x", "#s", "-"}, -- pronounced /s/ word-initially
{"x", "ks", "-"}, -- otherwise /ks/
-- H
{"([" .. cons .. vowels .. "])h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
{"h", "ɦ", "-"}, -- otherwise /ɦ/
-- O
{"o([" .. syll_boundary .. "])", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position
-- U
{"u([" .. cons .. "])", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
{"u", "jů", "en"}, -- otherwise /ju/ in english loans
-- Y
{"y", "EI", "-"}, -- otherwise /əɪ̯/
-- circumflex accent
{CR, "ː", "-"} -- lengthens a vowel with its short quality
}
-- syllabify words
local function syllabify(term, orig, pos)
-- decompose accents
term = decomp(term)
-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
term = rsub(term, "([" .. vowels .. "])" .. DR, "‧%1")
-- mark trigraphs and digraphs with angle brackets
for _, graph in ipairs(graphemes_sorted) do
term = rsub(term, graph, "{" .. graph .. "}")
end
-- add > and < for prefix and suffixes respectively
for _, prefix in ipairs(affixes.prefixes) do
if find(term, "#" .. prefix) then
term = rsub(term, "#" .. prefix, "#" .. prefix .. ">")
break
end
end
for _, suffix in ipairs(affixes.suffixes) do
if find(term, suffix .. "#") then
term = rsub(term, suffix .. "#", "<" .. suffix .. "#")
break
end
end
-- add dot before consonant + vowel
term = rsub(term, "([" .. cons .. "]?{?)([" .. vowels .. "][" .. accents .. "]?)", "‧%1%2")
-- remove any dots inside brackets
term = rsub(term, "{[^}]*}", function(a) return rsub(a, "‧", "") end)
-- shift dot before certain consonant clusters and digraphs
term = rsub(term, "([bcfgkpvw])‧l", "‧%1l") -- clusters with l
term = rsub(term, "([bcdfgkptwv])‧r", "‧%1r") -- clusters with r
term = rsub(term, "([dst])‧j", "‧%1j") -- digraphs with j
term = rsub(term, "([ckgt])‧h", "‧%1h") -- digraphs with h
term = rsub(term, "n‧g", "ng‧") -- ng is syllable-final
term = rsub(term, ">s‧", ">‧s") -- s can form a cluster after a prefix
-- remove leading dots and brackets
term = rsub(term, "#([^" .. vowels .. "]*)‧", "#%1")
term = rsub(term, "%.", "‧")
term = rsub(term, "[{}+]", "") -- comment out to debug
return rsub_repeatedly(term, "‧‧", "‧")
end
-- hyphenation function
function export.hyphenation(term)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
-- mark all word borders
term = rsub(term, "([^ ]+)", "#%1#")
-- format hyphenation
-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "[#%[%]<>]", ""), "%.")}} }
-- return hyphen.format_hyphenations(data)
return rsub(recomp(syllabify(term)), "[#%[%]<>]", "")
end
-- pronunciation function
local function pron(term, orig, pos)
-- make text lowercase
term = lower(term)
-- mark word borders with #
term = rsub(term, "([^ ]+)", "#%1#")
-- syllabify term
term = syllabify(term, orig, pos)
-- substitute phonemes
local subbed = {}
for _, s in ipairs(subs) do
if not subbed[s[1]] then
if orig ~= "-" and s[3] == orig then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
elseif s[3] == "-" then
term = rsub(term, s[1], s[2])
subbed[s[1]] = true
end
end
end
-- make text lowercase again
term = lower(term)
-- substitute graphemes
for graph, phoneme in pairs(graphemes) do
term = rsub(term, graph, phoneme)
end
-- substitute single-letter vowels
term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)
if match("[‧#]", b) then
return sets.vowel_length[a][2] .. b -- for open syllables
else
return sets.vowel_length[a][1] .. b -- for closed syllables
end
end)
-- replace į, ů, ü with their actual phonetic values
term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})
-- remove double consonants
term = rsub(term, "(.)(‧?)%1", "%2%1")
-- final adjustments
term = rsub(term, "‧", ".")
return rsub(term, "[#%[%]]", "")
end
-- main export function
function export.toIPA(term, orig, pos)
-- get user input as table
if type(term) == "table" then
term = term.args[1]
end
return pron(term, orig, pos)
end
return export