Jump to content

Module:User:AmazingJus/af

From Wiktionary, the free dictionary

145 of 148 tests failed. (refresh)

TextExpectedActualComments
test_hyphen:
FailedAfrikaA‧fri‧ka‧A‧fri‧ka
FailedAfrikaansA‧fri‧kaans‧A‧fri‧kaans
FailedAfrikanerA‧fri‧ka‧ner‧A‧fri‧ka‧ner
FailedAmerikanerA‧me‧ri‧ka‧ner‧A‧me‧ri‧ka‧ner
Failedasyna‧syn‧a‧syn
Failedbelangrikbe‧lang‧rik‧be‧lang‧rik
Failedbergberg‧berg
Failedbergeber‧ge‧ber‧ge
Failedberg-reeksberg‧reeks‧berg-‧reeks
Failedbos-bedryfbos‧be‧dryf‧bos-‧be‧dryf
Failedbeskoube‧skou‧bes‧kou
Failedbe+terbe‧ter‧be+‧ter
Failedbetonbe‧ton‧be‧ton
Failedbetoonbe‧toon‧be‧toon
FailedBothaBo‧tha‧Bo‧tha
Failedbraaibraai‧braai
Faileddokumentasiedo‧ku‧men‧ta‧sie‧do‧ku‧men‧ta‧sie
Failedeggoeg‧go‧eg‧go
Failedfestefes‧te‧fes‧te
Failedgeëetge‧eet‧geë‧et
Failedgegeege‧gee‧ge‧gee
Failedghitaarghi‧taar‧ghi‧taar
Failedhondjiehon‧djie‧hon‧djie
FailedJohannesburgJo‧han‧nes‧burg‧Jo‧han‧nes‧burg
Failedkarretjiekar‧re‧tjie‧kar‧re‧tjie
Failedklu[b]klub‧klub
FailedMacedoniëMa‧ce‧do‧ni‧e‧Ma‧ce‧do‧nië
Passed'n'n'n
Failedonweeron‧weer‧on‧weer
Failedomstandigheidom‧stan‧dig‧heid‧oms‧tan‧di‧gheid
FailedParaguayPa‧ra‧guay‧Pa‧ra‧gu‧a‧y
FailedPretoriaPre‧to‧ri‧aP‧re‧to‧ri‧a
Failedsjokoladesjo‧ko‧la‧de‧sjo‧ko‧la‧de
Passeds'ns'ns'n
Failedspieëlspie‧els‧pieël
FailedSuid-AfrikaSuid-‧A‧fri‧ka‧Suid-‧A‧fri‧ka
Failedvanaandva‧naand‧va‧naand
FailedVenesiëVe‧ne‧si‧e‧Ve‧ne‧sië
Failedvingerving‧er‧ving‧er
Passedwîewî‧ewî‧e
Failedzeroze‧ro‧ze‧ro
FailedAndréAn‧dré‧André
FailedBarnardBar‧nard‧Bar‧nard
FailedBlignautBlig‧nautB‧lig‧naut
FailedBlignaultBlig‧naultB‧lig‧nault
FailedCilliersCil‧liers‧Cil‧liers
FailedCoetzeeCoet‧zee‧Coet‧zee
FailedCoetzerCoet‧zer‧Coet‧zer
Failedde Villiersde Vil‧liers‧de ‧Vil‧liers
Faileddu Plessisdu Ples‧sis‧du P‧les‧sis
Faileddu Preezdu Preez‧du P‧reez
Faileddu Toitdu Toit‧du ‧Toit
FailedFouchéFou‧ché‧Fouché
FailedFourieFou‧rie‧Fou‧rie
FailedGrovéGro‧véG‧rové
FailedJean PierreJean Pierre‧Je‧an ‧Pier‧re
FailedJoubertJou‧bert‧Jou‧bert
FailedLa.bus.chag.neLa‧bus‧chag‧ne‧La‧bus‧chag‧ne
FailedLa.bu.schagneLa‧bu‧schagne‧La‧bu‧s‧chag‧ne
Failedle Gran.gele Gran‧ge‧le G‧ran‧ge
Failedle Rouxle Roux‧le R‧oux
FailedMalanMa‧lan‧Ma‧lan
FailedMalherbeMal‧her‧be‧Mal‧her‧be
FailedMaraisMa‧rais‧Ma‧rais
FailedMeintjesMein‧tjes‧Mein‧tjes
FailedNaudéNau‧dé‧Naudé
FailedNortjeNor‧tje‧Nor‧tje
FailedPienaarPie‧naar‧Pie‧naar
FailedSchalkSchalkS‧chalk
FailedTerblancheTer‧blanche‧Ter‧blan‧che
FailedTheronThe‧ronT‧he‧ron
FailedViljoenVil‧joen‧Vil‧joen
FailedVisagieVi‧sa‧gie‧Vi‧sa‧gie
FailedViviersVi‧vi‧ers‧Vi‧viers
TextExpectedActualComments
test_pron:
FailedAfrikaˈɑː.fri.ka‧a‧fri‧ka
FailedAfrikaansˌa.friˈkɑ̃ːs, ˌa.friˈkɑːns‧a‧fri‧kaans
FailedAfrikanerˌa.friˈkɑː.nər‧a‧fri‧ka‧ner
FailedAmerikaneraˌmɪə̯.riˈkɑː.nər‧a‧me‧ri‧ka‧ner
Failedasynaˈsəɪ̯n‧a‧syn
Failedbelangrikbəˈlaŋ.rək‧be>‧lang‧rik
Failedbergˈbɛrχ‧berg
Failedbergeˈbɛr.ɡə‧ber‧ge
Failedberg-reeksˈbɛrχ.rɪə̯ks‧berg-‧reeks
Failedbos-bedryfˈbɔs.bəˌdrəɪ̯f‧bos-‧be>‧dryf
Failedbeskoubəˈskœʊ̯‧be>s‧kou
Failedbe+terˈbɪə̯.tər‧be+‧ter
Failedbetonbəˈtɔn‧be>‧ton
Failedbetoonbəˈtʊə̯n‧be>‧toon
FailedBothaˈbʊə̯.ta‧bo‧tha
Failedbraaibrɑːɪ̯‧braai
Faileddokumentasieˌdɔ.kju.mɛnˈtɑː.si, ˌdɔ.ky.mɛnˈtɑː.si‧do‧ku‧men‧ta‧sie
Failedeggoˈɛ.χu‧eg‧go
Failedfesteˈfɛs.tə‧fes‧te
Failedgeëetχəˈɪə̯t‧ge‧eet
Failedgegeeχəˈχɪə̯‧ge>‧gee
Failedghitaarɡiˈtɑːr‧ghi‧taar
Failedhondjieˈɦœi̯ɲ.ci‧hon‧djie
FailedJohannesburgjʊə̯ˈɦa.nəsˌbœrχ‧jo‧han‧nes‧burg
Failedkarretjieˈka.rəi̯.ci‧kar‧re‧tjie
Failedklu[b]klab, klœb‧klub
FailedMacedoniëˌma.səˈdʊə̯.ni.ə‧ma‧ce‧do‧ni‧e
Failed'nə(n)'n
Failedonweerˈɔn.vɪə̯r‧on‧weer
Failedomstandigheidɔmˈstan.dəχˌɦəɪ̯t‧om>s‧tan‧dig<‧heid
FailedParaguayˈpa.ra.ɡwaɪ̯‧pa‧ra‧gu‧a‧y
FailedPretoriaprəˈtʊə̯.ri.a‧pre‧to‧ri‧a
Failedsjokoladeˌʃɔ.kɔˈlɑː.də‧sjo‧ko‧la‧de
Faileds'nsəns'n
Failedspieëlspiːls‧pie‧el
FailedSuid-Afrikasəɪ̯tˈɑː.fri.ka‧suid-‧a‧fri‧ka
Failedvanaandfəˈnɑːnt‧va‧naand
FailedVenesiëvəˈniː.si.ə‧ve‧ne‧si‧e
Failedvingerˈfəŋ.ər‧ving‧er
Failedwîeˈvəː.(ɦ)ə‧wî‧e
Failedzeroˈzɪə̯.ru‧ze‧ro
FailedAndréˈan.drəɪ̯‧an‧dré
FailedBarnardˈbar.nart‧bar‧nard
FailedBlignautˈbləχ.nœʊ̯t, ˈbli.nœʊ̯‧blig‧naut
FailedBlignaultˈbləχ.nœʊ̯t, ˈbli.nœʊ̯‧blig‧nault
FailedCillierssəlˈjeə̯‧cil‧liers
FailedCoetzeekutˈseə̯‧coet‧zee
FailedCoetzerˈkut.sər‧coet‧zer
Failedde Villiersdə.fəlˈjeə̯‧de ‧vil‧liers
Faileddu Plessisdy.pləˈsi‧du ‧ples‧sis
Faileddu Preezdəˈpreə̯‧du ‧preez
Faileddu Toitdəˈtoːɪ̯‧du ‧toit
FailedFouchéfuˈʃeə̯‧fou‧ché
FailedFouriefuˈri‧fou‧rie
FailedGrovéχruˈveə̯‧gro‧vé
FailedJean Pierreanˈpiːr‧je‧an ‧pier‧re
FailedJoubertjuˈbæːr‧jou‧bert
FailedLa.bus.chag.nela.busˈkaχ.nə‧la‧bus‧chag‧ne
FailedLa.bu.schagneˈla.bu.ʃəɪ̯n‧la‧bu‧s‧chag‧ne
Failedle Gran.geləˈχran.si‧le ‧gran‧ge
Failedle Rouxləˈruː‧le ‧roux
FailedMalanmaˈlan, maˈlaŋ‧ma‧lan
FailedMalherbemalˈɦɛr.bə‧mal‧her‧be
FailedMaraismaˈrɛː‧ma‧rais
FailedMeintjesməɪ̯ɲˈcis‧mein‧tjes
FailedNaudénœʊ̯ˈdeə̯‧nau‧dé
FailedNortjenɔrˈkɪə̯‧nor‧tje
FailedPienaarˈpi.nɑːr‧pie‧naar
FailedSchalkskalks‧chalk
FailedTerblanchetərˈblɑːnʃ‧ter‧blan‧che
FailedTheront(ə)ˈron‧the‧ron
FailedViljoenfəlˈjun‧vil‧joen
FailedVisagiefəˈsɑː.χi, fəˈsɑː.si‧vi‧sa‧gie
FailedViviersfə.fəˈjeə̯‧vi‧viers

--[[
This module implements the template {{af-IPA}}.

Author: AmazingJus

Sources:
- Donaldson, Bruce C. (1993). A Grammar of Afrikaans.
- Wissing, Daan (2016). "Afrikaans phonology". Taalportaal.
--]]

local export = {}

local lang = require("Module:languages").getByCode("af")
local sc = require("Module:scripts").getByCode("Latn")
local hyph = require("Module:hyphenation")
local str = require("Module:string")
local tbl = require("Module:table")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

local u = require("Module:string/char")
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local lower = mw.ustring.lower

local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local split = mw.text.split
local gsplit = mw.text.gsplit
local sub = mw.ustring.sub

local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.gmatch

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- list of constants
local grave = u(0x0300) -- grave
local acute = u(0x0301) -- acute
local circ = u(0x0302) -- circumflex
local dia = u(0x0308) -- diaresis
local syll = "‧" -- syllable dot

-- list of char classes
local accent = grave .. acute .. circ .. dia
local vowel = "aeiouyAEIOUY"
local cons = "bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZ"
local bound = syll .. "#"

-- put them into classes
local A = "[" .. accent .. "]" -- all accents
local V = "[" .. vowel .. "]" -- all vowels
local non_V = "[^" .. vowel .. "]" -- all non-vowels
local C = "[" .. cons .. "]" -- all consonants
local non_C = "[^" .. cons .. "]" -- all non-consonants
local CV = "[" .. cons .. vowel .. "]" -- all consonants and vowels
local S = "[" .. bound .. "]" -- any syllable boundary

-- list of valid trigraphs and digraphs, including diphthongs and long vowels
local graphemes = {
	["aai"] = "ɑːɪ̯",
	["eeu"] = "iʊ̯",
	["ieu"] = "iʊ̯",
	["oei"] = "uɪ̯",
	["ooi"] = "oːɪ̯",
	["aa"] = "ɑː",
	["ae"] = "ɑː",
	["ai"] = "aɪ̯",
	["au"] = "œʊ̯",
	["ee"] = "ɪə̯",
	["ei"] = "əɪ̯",
	["eu"] = "iʊ̯",
	["ie"] = "į", -- temporary value
	["oe"] = "ů", -- temporary value
	["oi"] = "ɔɪ̯",
	["oo"] = "ʊə̯",
	["ou"] = "œʊ̯",
	["ui"] = "uɪ̯",
	["uu"] = "ü" -- temporary value
}
-- sort trigraphs and digraphs in descending order
local graphemes_sorted = {}
for k, _ in pairs(graphemes) do
	table.insert(graphemes_sorted, k)
end
table.sort(graphemes_sorted, function(a, b) return len(a) > len(b) end)

-- list of various grapheme sets
local sets = {
	["vowel_length"] = { -- long-short vowels
		["a"] = {"a", "ɑː"},
		["e"] = {"ɛ", "ɪə̯"},
		["i"] = {"ə", "i"},
		["o"] = {"ɔ", "ʊə̯"},
		["u"] = {"œ", "y"}
	},
	["cons_voice"] = { -- voiced/voiceless consonants
		{"b", "p"},
		{"d", "t"},
		{"ʤ", "ʧ"},
		{"ɡ", "k"},
		{"v", "f"},
		{"z", "s"},
		{"ʒ", "ʃ"},
	}
}

-- list of defined affixes
local affixes = {
	-- prefixes
	["pre"] = {
		{"aan"},
		{"agter"},
		{"be", restriction = "^[^" .. accent .. "eiu]"},
		{"deur"},
		{"er"},
		{"ge", restriction = "^[^" .. accent .. "eiu]"},
		{"her"},
		{"om"},
		{"ont"},
		{"onder"},
		{"van", pos = "d"},
		{"ver"},
		{"voor"}
	},
	-- suffixes
	["suf"] = {
		{"agtig"},
		{"baar"},
		{"dom"},
		{"end"},
		{"heid"},
		{"lik"},
		{"loos"},
		{"nis"},
		{"sel"},
		{"skap"}
	}
}

-- list of unstressed words
local unstressed = {
	"die",
	"dit",
	"is",
	"nie",
	"'n"
}

-- list of stressed endings (mostly in loanwords)
local stressed_endings = {
	"aa[lt]",
	"aans?",
	"aard?",
	"ant",
	"asie",
	"a[mt]",
	"ee[lmnrst]?",
	"ein",
	{"el", orig = "loan"}, -- only in loanwords
	"ent",
	"eu[rst]",
	"e[kst]",
	"ieel",
	"ie[fklmn]",
	"ine",
	"ie[rt]",
	{"o", orig = "fr"}, -- only in french loanwords
	"oen",
	"on",
	"oo[fgilmnr]",
	{"sie", stress = "pre"},
	"teek",
	"teit",
	"uu[mrst]",
	"u",
	"y[ns]?",
}

-- list of respelling substitutions
local subs = {
	-- 'N
	{"#'n#", "#ə(n)#", "-"}, -- pronounced /ə(n)/ as the article 'n
	{"'n#", "ən#", "-"}, -- pronounced /ən/ otherwise

	-- CH
	{"ch", "ʃ", "fr"}, -- pronounced /ʃ/ in french loans
	{"sch", "sk", "-"}, -- pronounced /sk/ in the sequence "sch"
	{"ch([" .. cons .. "]?[ei])", "χ%1", "-"}, -- pronounced /χ/ before optional consonant cluster and "e" or "i"
	{"ch", "k", "-"}, -- otherwise /k/

	-- NG
	{"ng", "ŋ", "-"}, -- pronounced /ŋ/

	-- SH/SJ
	{"s[hj]", "ʃ", "-"}, -- pronounced /ʃ/

	-- DJ/TJ
	{"[dt]jie", "kį", "-"}, -- pronounced /-ci/ in the suffix "-djie"/"-tjie"
	{"dj", "ʤ", "-"}, -- "dj" is otherwise /d͡ʒ/
	{"tj", "ʧ", "-"}, -- "tj" is otherwise /t͡ʃ/

	-- C
	{"c([ei])", "s%1", "-"}, -- pronounced /s/ before "e" or "i"
	{"c", "k", "-"}, -- otherwise /k/

	-- GH
	{"gh", "ɡ", "-"}, -- pronounced /ɡ/

	-- G
	{"g", "ɡ", "en"}, -- pronounced /ɡ/ in english loans
	{"r‧ge", "r‧ɡe", "-"}, -- pronounced /ɡ/ between /r/ and /ə/
	{"g", "χ", "-"}, -- otherwise /χ/
	{"n(‧?[kɡ])", "ŋ%1", "-"}, -- /ŋ/ is an allophone of /n/ before /ɡ/ and /k/

	-- V
	{"v", "f", "af"}, -- pronounced /f/ in native words

	-- W
	{"w", "w", "en"}, -- pronounced /w/ in english loans
	{"w", "v", "-"}, -- otherwise /v/

	-- EAU
	{"eaux?", "OU", "fr"}, -- pronounced /œʊ̯/ in french loans

	-- OI
	{"oi", "wA", "fr"}, -- pronounced /wa/ in french loans

	-- IJ
	{"ij(" .. non_V .. ")", "EI%1", "-"}, -- pronounced /əɪ̯/ in dutch-based names

	-- X
	{"#x", "#s", "-"}, -- pronounced /s/ word-initially
	{"x", "ks", "-"}, -- otherwise /ks/

	-- H
	{"(" .. CV .. ")h", "%1", "-"}, -- silent if part of consonant digraph or syllable-final
	{"h", "ɦ", "-"}, -- otherwise /ɦ/

	-- O
	{"o(" .. S .. ")", "OU%1", "en"}, -- pronounced /œʊ̯/ in open syllables in english loans
	{"o#", "ů#", "-"}, -- otherwise /u/ in word-final position

	-- U
	{"u(" .. C .. ")", "A%1", "en"}, -- pronounced /a/ in closed syllables in english loans
	{"u", "jů", "en"}, -- otherwise /ju/ in english loans

	-- Y
	{"y", "j", "en"}, -- pronounced /j/ in english loans
	{"y", "EI", "-"}, -- otherwise /əɪ̯/

	-- circumflex accent
	{circ, "ː", "-"} -- lengthens a vowel with its short quality
}

-- canonicalisation function
local function canonicalise(text)
	-- decompose accents
	text = decomp(text)

	-- make text lowercase
	text = lower(text)

	-- remove extrenous spaces
	text = rsub(text, "%s+", " ")
	text = rsub(text, "^ ", "")
	text = rsub(text, " $", "")

	-- treat commas as a pause
	text = rsub_repeatedly(text, "%s*,%s*", " | ")

	-- return as array of words
	return split(text, " ")
end

-- syllabification function
local function syllabify(word, etyl, pos)
	-- remove diaresis and split syllable (note: diaresis shouldn't be displayed in its hyphenation form)
	word = rsub(word, "(" .. V .. ")" .. dia, syll .. "%1")

	-- mark trigraphs and digraphs with curly braces
	for _, graph in ipairs(graphemes_sorted) do
		word = rsub(word, graph, "{" .. graph .. "}")
	end

	-- add dot before consonant + vowel
	word = rsub(word, "(" .. C .. "?{?" .. V .. A .. "?)", syll .. "%1")

	-- remove any dots inside brackets
	word = rsub(word, "{[^}]*}", function(a) return rsub(a, syll, "") end)

	-- shift dot before certain consonant clusters and digraphs
	word = rsub(word, "([bcfgkpvw])‧l", syll .. "%1l") -- clusters with l
	word = rsub(word, "([bcdfgkptwv])‧r", syll .. "%1r") -- clusters with r
	word = rsub(word, "([dst])‧j", syll .. "%1j") -- digraphs with j
	word = rsub(word, "([ckgt])‧h", syll .. "%1h") -- digraphs with h
	word = rsub(word, "n‧g", "ng‧") -- ng is syllable-final

	-- remove leading dots and brackets
	-- word = rsub(word, "^(" .. non_V .. "*)" .. syll, "%1")
	word = rsub(word, "%.", syll)
	word = rsub(word, "[{}]", "") -- comment out to debug
	return rsub(word, syll .. "+", syll)
end

-- hyphen depth check function
local function is_hyphen_depth(depth)
	return (depth == 1) and "%-" or ""
end

-- onset validation function
local function is_valid_onset(string)
	-- check if matching syllable onset (including ones starting with s)
	if find(string, "^" .. syll) or find(string, "^s" .. syll .. "[cklmnpt]") then
		return true
	end
	return false
end

-- rest of string function
local function get_rest_string(string, affix, affix_type)
	if affix_type == "pre" then
		return sub(string, len(affix[1]) + 1)
	else
		return sub(string, 1, -len(affix[1]) - 1)
	end
end

-- affix validation function
local function is_valid_affix(string, affix, affix_type, pos, depth)
	-- get rest of string
	local rest = get_rest_string(string, affix, affix_type)

	-- check for existing pos restriction
	if affix.pos and not find(pos, affix.pos) then
	-- then for explicit non-boundaries
	elseif affix.restriction and not find(rest, affix.restriction) and affix_type == "pre" then
	-- then for matching syllable onset
	elseif not is_valid_onset(syllabify(rest)) and affix_type == "pre" then
	-- then for explicit word boundary
	elseif find(rest, "^%+") and affix_type == "pre" then
	-- then for no vowels
	elseif not find(rest, V) and affix_type == "pre" then
	-- then only for two or less chars
	elseif find(rest, "^..?$") then
	else
		-- match hyphen at appropriate depth
		local hyphen = is_hyphen_depth(depth)
		-- match appropriate pattern
		local pattern = affix_type == "pre" and "^" .. affix[1] .. hyphen or hyphen .. affix[1] .. "$"
		return true and find(string, pattern) or false
	end

	return false
end

-- affix application function
local function apply_affixes(string, depth, pos)
	-- match hyphen at appropriate depth
	local hyphen = is_hyphen_depth(depth)
	-- process prefixes
	for _, affix in ipairs(affixes.pre) do
		if is_valid_affix(string, affix, "pre", pos, depth) then
			-- add prefix marker >
			string = rsub(string, "^" .. affix[1] .. hyphen, affix[1] .. ">")
			break
		end
	end
	-- process suffixes
	for _, affix in ipairs(affixes.suf) do
		if is_valid_affix(string, affix, "suf", pos, depth) then
			-- add suffix marker <
			string = rsub(string, hyphen .. affix[1] .. "$", "<" .. affix[1])
			break
		end
	end
	return string
end

-- components parsing function
local function split_components(word, depth, etyl, pos)
	-- initialise some variables
	depth = depth or 0
	pos = pos or ".*"

	-- depth 0: handle double hyphen compounds first
	if depth == 0 then
		local parts = split(word, "%-%-")
		if #parts > 1 then
			local result = {}
			for _, part in ipairs(parts) do
				table.insert(result, split_components(part, depth + 1, etyl, pos))
			end
			return table.concat(result, "--")
		else
			return split_components(word, depth + 1, etyl, pos)
		end
	end

	-- depth 1: handle single hyphen compounds and hyphenated affixes
	if depth == 1 then
		-- explicitly mark ambiguous prefix and suffixes with a hyphen with < and > respectively
		word = apply_affixes(word, depth, pos)

		local parts = split(word, "%-")
		if #parts > 1 then
			local result = {}
			for _, part in ipairs(parts) do
				table.insert(result, split_components(part, depth + 1, etyl, pos))
			end
			return table.concat(result, "-")
		else
			return split_components(word, depth + 1, etyl, pos)
		end
	end

	-- depth 2: handle non-hyphenated affixes
	if depth == 2 then
		-- add < and > for prefix and suffixes respectively
		word = apply_affixes(word, depth, pos)
		-- apply syllabification
		word = syllabify(word, etyl, pos)
		return word
	end

	return word
end

-- component generation function
local function to_components(words, etyl, pos)
	-- loop over every word
	local results = {}
	for _, word in ipairs(words) do
		-- get term as split components
		local w = split_components(word, 0, etyl, pos)
		table.insert(results, "#" .. w .. "#")
	end
	-- join processed words
	return table.concat(results, " ")
end

-- hyphenation function
function export.hyphenation(term, etyl, pos)
	-- get user input as table
	if type(term) == "table" then
		term = term.args[1]
	end

	-- mark all word borders
	term = rsub(term, "([^ ]+)", "#%1#")

	-- format hyphenation
	-- local data = { lang = lang, sc = sc, hyphs = {{hyph = rsub(syllabify(term), "[#%[%]<>]", ""), "%.")}} }

	-- return hyphen.format_hyphenations(data)
	return rsub(recomp(syllabify(term)), "[#%[%]<>]", "")
end

-- generate substitutions function
local function generate_subs(term, etyl, pos)
	local to_sub = {}
	local seen_patterns = {}

	for _, s in ipairs(subs) do
		local s_patt, s_repl, s_etyl = s[1], s[2], s[3]

		-- only add if pattern wasn't added already
		if not seen_patterns[s_patt] then
			-- add substitution for etymology-specific rules
			if etyl ~= "-" and s_etyl == etyl then
				table.insert(to_sub, {s_patt, s_repl})
				seen_patterns[s_patt] = true
			-- otherwise add substitution for default rules
			elseif s_etyl == "-" then
				table.insert(to_sub, {s_patt, s_repl})
				seen_patterns[s_patt] = true
			end
		end
	end

	return to_sub
end

-- stress assignment function
local function stress(term, etyl, pos)
	-- words with certain endings are syllable-final stressed
	for _, ending in ipairs(stressed_endings) do
		if find(term, ending .. "#") then
			if ending == "el" then -- "-el" is only stressed in loanwords
				if not etyl and etyl ~= "af" then
					return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
				else
					break
				end
			elseif ending == "o" then -- "-o" is only stressed in french loanwords
				if etyl == "fr" then
					return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
				else
					break
				end
			else
				return rsub(term, ending .. "#", "ˈ" .. ending .. "#")
			end
		end
	end

	-- add stress mark to first syllable if no ending was stressed
	return rsub(term, "^#", "#ˈ")
end

-- pronunciation function
local function toIPA(text, etyl, pos)
	-- canonicalise term as array of words
	local words = canonicalise(text)

	-- mark text with appropriate components
	local term = to_components(words, etyl, pos)

	-- add stress to term
	-- term = stress(term, etyl, pos)

	-- shift stress rightwards to a syllable boundary
	-- term = rsub(term, "([^" .. syll_boundary .. "]*)ˈ", "ˈ%1")

	--[[
	-- prepare table to substitute the appropriate phonemes based on etymology and part of speech
	local to_sub = generate_subs(term, etyl, pos)

	-- go over substitution table
	for _, s in ipairs(to_sub) do
		local k, v = s[1], s[2]
		rsub(term, k, v)
	end

	-- make text lowercase again
	term = lower(term)

	-- substitute graphemes
	for graph, phoneme in pairs(graphemes) do
		term = rsub(term, graph, phoneme)
	end

	-- substitute single-letter vowels
	term = rsub(term, "([aeiou])([‧#ː" .. cons .. "])", function(a, b)
		if match("[‧#]", b) then
			return sets.vowel_length[a][2] .. b -- for open syllables
		else
			return sets.vowel_length[a][1] .. b -- for closed syllables
		end
	end)

	-- replace į, ů, ü with their actual phonetic values
	term = rsub(term, "[įůü]", {["į"] = "i", ["ů"] = "u", ["ü"] = "y"})

	-- remove double consonants
	term = rsub(term, "(.)(‧?)%1", "%2%1")
	]]--

	-- final adjustments
	-- term = rsub(term, "‧", ".")
	return rsub(term, "[#%[%]]", "")
end

-- main export function
function export.show(term, etyl, pos)
	-- get user input as table
	if type(term) == "table" then
		term = term.args[1]
	end

	return toIPA(term, etyl, pos)
end

return export