Module:User:AmazingJus/sco/testcases

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of AmazingJus, for his own experimentation. Items in this module may be added and removed at AmazingJus's discretion; do not rely on this module's stability.


local tests = require('Module:UnitTests')
local m_IPA = require('Module:User:AmazingJus/sco')
local link = require("Module:links")
local lang = require("Module:languages").getByCode("gd")
local sc = require("Module:scripts").getByCode("Latn")

local gsub = mw.ustring.gsub
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local u = require("Module:string/char")

local function tag_IPA(IPA)
	return '<span class="IPA">' .. IPA .. '</span>'
end

local function input_to_term(input)
	-- decompose term from its accents
	local term = decomp(input)

	-- remove certain punctuation marks and combining accents, excluding grave and acute accents
	term = gsub(term, "[%$%^%*_=̂-ͯ]", "") -- FIXME: use unicode codepoints instead

	-- replace acute accents with graves
	term = gsub(term, u(0x0301), u(0x0300))

	-- recompose all accents
	return recomp(term)
end

function tests:check_output(term, expected, input, pos, etyl)
	if not input then
		input, term = term, input_to_term(term)
	end
	if input == term then
		input = nil
	end
	local actual = input or term
	term = link.full_link({ term = term, lang = lang, sc = sc } )

	return tests:equals(
		input and term .. " (input: <code>" .. input .. "</code>)" or term,
		tag_IPA(m_IPA.toIPA(actual)),
		tag_IPA(expected)
	)
end

function tests:test_pron()
	local list = {
		-- misc
		{ "á", "ˈa" },
		{ "acras", "ˈaʰkɾəs̪" },
		{ "ailp", "ˈalp" },
		{ "ailt", "ˈaltʲ" },
		{ "aingeal", "ˈaiŋʲɡʲəl̪ˠ" },
		{ "aird", "ˈaːɾʃt̪" },
		{ "àite", "ˈaːʰtʲə" },
		{ "àlainn", "ˈaːl̪ˠɪɲ" },
		{ "ann", "ˈaun̪ˠ" },
		{ "baile", "ˈpalə" },
		{ "bainne", "ˈpaɲə" },
		{ "balach", "ˈpal̪ˠəx" },
		{ "balla", "ˈpal̪ˠə" },
		{ "bann", "ˈpaun̪ˠ" },
		{ "barr", "ˈpaːrˠ" },
		{ "barra", "ˈparˠə" },
		{ "bàta", "ˈpaːʰt̪ə" },
		{ "beagan", "ˈpekan" },
		{ "bealach", "ˈpjal̪ˠəx" },
		{ "bealltainn", "ˈpjaul̪ˠt̪ɪɲ" },
		{ "bean", "ˈpɛn" },
		{ "beò", "ˈpjɔː" },
		{ "bigein", "ˈpikʲəɲ" },
		{ "bó", "ˈpoː" },
		{ "bòid", "ˈpɔːtʲ" },
		{ "borb", "ˈpɔɾɔp" },
		{ "bord", "ˈpɔːɾʃt̪" },
		{ "bràmar", "ˈpɾaːməɾ" },
		{ "brice", "ˈpɾʲiʰkʲə" },
		{ "broinn", "ˈpɾəiɲ" },
		{ "cadal", "ˈkʰat̪əl̪ˠ" },
		{ "caibe", "ˈkʰapə" },
		{ "càil", "ˈkʰaːl" },
		{ "caill", "ˈkʰaiʎ" },
		{ "cailleach", "ˈkʰaʎəx" },
		{ "caillte", "ˈkʰaiʎtʲə" },
		{ "cainnt", "ˈkʰaiɲtʲ" },
		{ "càise", "ˈkʰaːʃə" },
		{ "cait", "ˈkʰɛʰtʲ" },
		{ "calltainn", "ˈkʰaul̪ˠt̪ɪɲ" },
		{ "cam", "ˈkʰaum" },
		{ "campa", "ˈkʰaumpə" },
		{ "cana", "ˈkʰanə" },
		{ "cànanan", "ˈkʰananən", nil, "pl" },
		{ "caraich", "ˈkʰaɾɪç" },
		{ "caran", "ˈkʰaɾan" },
		{ "cas", "ˈkʰas̪" },
		{ "casan", "ˈkʰas̪an" },
		{ "ceannaich", "ˈkʰʲan̪ˠɪç" },
		{ "ceannard", "ˈkʰʲan̪ˠəɾʃt̪" },
		{ "cearr", "ˈkʰʲaːrˠ" },
		{ "céile", "ˈkʰʲeːlə" },
		{ "ceist", "ˈkʰʲeʃtʲ" },
		{ "ceòl", "ˈkʰʲɔːl̪ˠ" },
		{ "ceu$d", "ˈkʰʲiat̪" },
		{ "ceum", "ˈkʰʲeːm" },
		{ "ciall", "ˈkʰʲiəl̪ˠ" },
		{ "cìoch", "ˈkʰʲiəx" },
		{ "cipean", "ˈkʰʲiʰpan" },
		{ "cìr", "ˈkʰʲiːɾʲ" },
		{ "ciùil", "ˈkʰʲuːl" },
		{ "ciurr", "ˈkʰʲuːrˠ" },
		{ "cneap", "ˈkʰɾʲɛʰp" },
		{ "cneasta", "ˈkʰɾʲes̪t̪ə" },
		{ "cnèatag", "ˈkʰɾʲiaʰt̪ak" },
		{ "cóig", "ˈkʰoːkʲ" },
		{ "coi^leach", "ˈkʰɤləx" },
		{ "coi^re", "ˈkʰɤɾʲə" },
		{ "coire", "ˈkʰɔɾʲə" },
		{ "corc", "ˈkʰɔɾʰk" },
		{ "crè", "ˈkʰɾʲɛː" },
		{ "creagan", "ˈkʰɾʲekən", nil, "pl" },
		{ "cuan", "ˈkʰuan" },
		{ "cuilc", "ˈkʰulʰkʲ" },
		{ "cuilean", "ˈkʰulan" },
		{ "cuine", "ˈkʰuɲə" },
		{ "cuip", "ˈkʰuiʰp" },
		{ "cuir", "ˈkʰuɾʲ" },
		{ "cùis", "ˈkʰuːʃ" },
		{ "cuideĭgin", "ˈkʰutʲekʲɪn" },
		{ "cum", "ˈkʰuːm" },
		{ "cumail", "ˈkʰumal" },
		{ "cupa", "ˈkʰuʰpə" },
		{ "dé", "ˈtʲeː" },
		{ "dèan", "ˈtʲian" },
		{ "dearg", "ˈtʲɛɾak" },
		{ "deich", "ˈtʲeç" },
		{ "deoch", "ˈtʲɔx" },
		{ "dia^", "ˈtʲia" },
		{ "dìcheall", "ˈtʲiːçəl̪ˠ" },
		{ "diù", "ˈtʲuː" },
		{ "diùid", "ˈtʲuːtʲ" },
		{ "dòchas", "ˈt̪ɔːxəs̪" },
		{ "dona", "ˈt̪ɔnə" },
		{ "dornan", "ˈt̪ɔːɾnan" },
		{ "dorra", "ˈt̪ɔrˠə" },
		{ "druim", "ˈt̪ɾɯim" },
		{ "duais", "ˈt̪uəʃ" },
		{ "duine", "ˈt̪ɯɲə" },
		{ "eadar", "ˈet̪əɾ" },
		{ "eallach", "ˈjal̪ˠəx" },
		{ "earrach", "ˈjarˠəx" },
		{ "earrann", "ˈjarˠən̪ˠ" },
		{ "eòin", "ˈjɔːɲ" },
		{ "eòlas", "ˈjɔːl̪ˠəs̪" },
		{ "eu^dail", "ˈeːt̪al" },
		{ "fada", "ˈfat̪ə" },
		{ "faoileag", "ˈfɯːlak" },
		{ "farsaing", "ˈfaɾs̪ɪŋʲɡʲ" },
		{ "fear", "ˈfɛɾ" },
		{ "fearna", "ˈfjaːɾnə" },
		{ "féill", "ˈfeːʎ" },
		{ "feòl", "ˈfjɔːl̪ˠ" },
		{ "feur", "ˈfiaɾ" },
		{ "fionnar", "ˈfjun̪ˠəɾ" },
		{ "fionntach", "ˈfjuːn̪ˠt̪əx" },
		{ "fios", "ˈfis̪" },
		{ "fiùran", "ˈfjuːɾan" },
		{ "fliuch", "ˈflux" },
		{ "follais", "ˈfɔl̪ˠɪʃ" },
		{ "fuirich", "ˈfuɾʲɪç" },
		{ "fulang", "ˈful̪ˠəŋɡ" },
		{ "gad", "ˈkat̪" },
		{ "gafann", "ˈkafən̪ˠ" },
		{ "gille", "ˈkʲiʎə" },
		{ "gnè", "ˈkɾʲɛː" },
		{ "gob", "ˈkop" },
		{ "goid", "ˈkɤtʲ" },
		{ "greim", "ˈkɾʲeim" },
		{ "guib", "ˈkɤp", "goi^b" },
		{ "Hearach", "ˈhɛɾəx" },
		{ "ionnsaich", "ˈjuːn̪ˠs̪ɪç" },
		{ "lachan", "ˈl̪ˠaxən", nil, "pl" },
		{ "lainnir", "ˈl̪ˠaɲɪɾʲ" },
		{ "le", "le" },
		{ "leig", "ˈʎekʲ" },
		{ "leum", "ˈʎeːm" },
		{ "lice", "ˈʎiʰkʲə" },
		{ "lionn", "ˈʎuːn̪ˠ" },
		{ "lionntachd", "ˈʎuːn̪ˠt̪əxk" },
		{ "lios", "ˈʎis̪" },
		{ "litir", "ˈʎiʰtʲɪɾʲ" },
		{ "loch", "ˈl̪ˠɔx" },
		{ "lochan", "ˈl̪ˠɔxan" },
		{ "luchag", "ˈl̪ˠuxak" },
		{ "mac", "ˈmaʰk" },
		{ "maise", "ˈmaʃə" },
		{ "màla", "ˈmaːl̪ˠə" },
		{ "maol", "ˈmɯːl̪ˠ" },
		{ "meall", "ˈmjaul̪ˠ" },
		{ "meòg", "ˈmjɔːk" },
		{ "meòir", "ˈmjɔːɾʲ" },
		{ "mic", "ˈmiʰkʲ" },
		{ "milis", "ˈmilɪʃ" },
		{ "mill", "ˈmiːʎ" },
		{ "mìltean", "ˈmiːltʲən", nil, "pl" },
		{ "mìos", "ˈmiəs̪" },
		{ "molta", "ˈmɔl̪ˠt̪ə" },
		{ "mucan", "ˈmuʰkən", nil, "pl" },
		{ "muinntir", "ˈmɯiɲtʲɪɾʲ" },
		{ "mura", "ˈmuɾə" },
		{ "murn", "ˈmuːɾn" },
		{ "neart", "ˈɲɛɾʃt̪" },
		{ "neul", "ˈɲial̪ˠ" },
		{ "norrag", "ˈn̪ˠɔrˠak" },
		{ "oillt", "ˈəiʎtʲ" },
		{ "òir", "ˈɔːɾʲ" },
		{ "pài^pear", "ˈpʰɛːʰpəɾ" },
		{ "peall", "ˈpʰjaul̪ˠ" },
		{ "peann", "ˈpʰjaun̪ˠ" },
		{ "peasair", "ˈpʰes̪ɪɾʲ" },
		{ "peinnseăn", "ˈpʰeiɲʃan" },
		{ "post", "ˈpʰɔs̪t̪" },
		{ "put", "ˈpʰuʰt̪" },
		{ "rinnear", "ˈrˠiɲəɾ" },
		{ "rionnag", "ˈrˠun̪ˠak" },
		{ "rudeigin", "ˈrˠut̪əkʲɪn" },
		{ "sàr", "ˈs̪aːɾ" },
		{ "sean", "ˈʃɛn" },
		{ "seinneadair", "ˈʃeɲət̪ɪɾʲ" },
		{ "seòl", "ˈʃɔːl̪ˠ" },
		{ "Seumas", "ˈʃeːməs̪" },
		{ "sgia^n", "ˈs̪kʲian" },
		{ "sinnsear", "ˈʃiːɲʃəɾ" },
		{ "sionnach", "ˈʃun̪ˠəx" },
		{ "siùcair", "ˈʃuːʰkɪɾʲ" },
		{ "siursach", "ˈʃuːɾs̪əx" },
		{ "sràc", "ˈs̪t̪ɾaːʰk" },
		{ "strì", "ˈs̪t̪ɾʲiː" },
		{ "sùil", "ˈs̪uːl" },
		{ "sunndach", "ˈs̪uːn̪ˠt̪əx" },
		{ "tasdan", "ˈt̪ʰas̪t̪an" },
		{ "teanga", "ˈtʰʲɛŋɡə" },
		{ "teannta", "ˈtʰʲaun̪ˠt̪ə" },
		{ "teine", "ˈtʰʲenə" },
		{ "till", "ˈtʰʲiːʎ" },
		{ "tìm", "ˈtʰʲiːm" },
		{ "tinn", "ˈtʰʲiːɲ" },
		{ "tinneas", "ˈtʰʲiɲəs̪" },
		{ "toil", "ˈt̪ʰɔl" },
		{ "tolltach", "ˈt̪ʰɔul̪ˠt̪əx" },
		{ "tom", "ˈt̪ʰɔum" },
		{ "torr", "ˈt̪ʰɔːrˠ" },
		{ "treu^n", "ˈt̪ʰɾʲeːn" },
		{ "tui$t", "ˈt̪ʰuʰtʲ" },
		{ "uaine", "ˈuaɲə" },
		{ "ugan", "ˈukan" },
		{ "uiseag", "ˈɯʃak" },
		{ "uisge", "ˈɯʃkʲə" },
		-- lenited combinations
		{ "adhbran", "ˈɤːpɾan" },
		{ "aghaidh", "ˈɤː.ɪ" },
		{ "aibhne", "ˈaivɲə" },
		{ "aimhreid", "ˈaivɾʲɪtʲ" },
		{ "àmhainn", "ˈaːvɪɲ" },
		{ "an-diugh", "ˈəɲˈtʲu" },
		{ "aoibhneas", "ˈɯivɲəs̪" },
		{ "bacadh", "ˈpaʰkəɣ" },
		{ "beatha", "ˈpɛhə" },
		{ "bogha", "ˈpo.ə" },
		{ "buidhe", "ˈpɯi.ə" },
		{ "cabhag", "ˈkʰavak" },
		{ "cabhlach", "ˈkʰaul̪ˠəx" },
		{ "càineadh", "ˈkʰaːɲəɣ" },
		{ "caoimhin", "ˈkʰɯːvɪɲ" },
		{ "caoimhneas", "ˈkʰɯivɲəs̪" },
		{ "caomh", "ˈkʰɯːv" },
		{ "carnadh", "ˈkʰaːɾnəɣ" },
		{ "cladhadh", "ˈkʰl̪ˠɤ.əɣ" },
		{ "cluinneadh", "ˈkl̪ˠɯɲəɣ" },
		{ "cnòthan", "ˈkɾɔː.ən", nil, "pl" },
		{ "comhairle", "ˈkʰo.əɾlə" },
		{ "cuimhne", "ˈkʰuiɲə" },
		{ "cuimhneachadh", "ˈkʰuiɲəxəɣ" },
		{ "cuireadh", "ˈkʰuɾʲəɣ" },
		{ "dearrsadh", "ˈtʲaːrˠs̪əɣ" },
		{ "dhà", "ˈɣaː" },
		{ "dhìol", "ˈʝiəl̪ˠ" },
		{ "dhìth", "ˈʝiː" },
		{ "dhuibh", "ˈɣɯiv" },
		{ "dibhe", "ˈtʲivə" },
		{ "doimhne", "ˈt̪ɔiɲə" },
		{ "doimhneachd", "ˈt̪ɔiɲəxk" },
		{ "doirbh", "ˈt̪ɤɾʲɤv" },
		{ "dragh", "ˈt̪ɾɤɣ" },
		{ "dubh", "ˈt̪u" },
		{ "fathan", "ˈfahan" },
		{ "feadh", "ˈfjɤɣ" },
		{ "fh=athast", "ˈha.əs̪t̪" },
		{ "fh=éin", "ˈheːn" },
		{ "fhios", "ˈis̪" },
		{ "fh=uair", "ˈhuəɾʲ" },
		{ "fiughar", "ˈfju.əɾ" },
		{ "foghnaidh", "ˈfoːnɪ" },
		{ "Gài^dheal", "ˈkɛː.əl̪ˠ" },
		{ "gealladh", "ˈkʲal̪ˠəɣ" },
		{ "geamhradh", "ˈkʲɛuɾəɣ" },
		{ "goibhle", "ˈkɤilə" },
		{ "goibhnean", "ˈkɤinən", nil, "pl" },
		{ "iarraidh", "ˈiərˠɪ" },
		{ "ioghnadh", "ˈiənəɣ" },
		{ "iubhar", "ˈju.əɾ" },
		{ "ladhran", "ˈl̪ˠɤːɾan" },
		{ "lagh", "ˈl̪ˠɤɣ" },
		{ "laghail", "ˈl̪ˠɤɣal" },
		{ "làimh", "ˈl̪ˠaiv" },
		{ "leabaidh", "ˈʎepɪ" },
		{ "leamhrag", "ˈʎɛuɾak" },
		{ "leotha", "ˈlɔ.ə" },
		{ "leth", "ˈʎe" },
		{ "liath", "ˈʎiə" },
		{ "maighdean", "ˈmɤitʲən" },
		{ "mholainn", "ˈvɔl̪ˠɪɲ" },
		{ "modh", "ˈmɔɣ" },
		{ "modhail", "ˈmɔɣal" },
		{ "nàbaidh", "ˈn̪ˠaːpi" },
		{ "nigh", "ˈɲiː" },
		{ "nigheann", "ˈɲiː.an̪ˠ" },
		{ "odhar", "ˈo.ər" },
		{ "oighre", "ˈɤiɾʲə" },
		{ "piuthar", "ˈpʰju.əɾ" },
		{ "pòsaidh", "ˈpʰɔːs̪ɪ" },
		{ "ràmh", "ˈrˠaːv" },
		{ "rìbhinn", "ˈrˠiːvɪɲ" },
		{ "ro bheò", "ˈrɔ vjɔː" },
		{ "ro thinn", "ˈrˠɔ hiːɲ" },
		{ "roghnaich", "ˈrˠoːnɪç" },
		{ "sàbh", "ˈs̪aːv" },
		{ "saighdear", "ˈs̪ɤitʲəɾ" },
		{ "samhradh", "ˈs̪auɾəɣ" },
		{ "seagh", "ˈʃɤɣ" },
		{ "sèimh", "ˈʃɛːv" },
		{ "siubhal", "ˈʃu.əl̪ˠ" },
		{ "sloighre", "ˈs̪l̪ˠɤiɾʲə" },
		{ "snaidhm", "ˈs̪n̪ˠaim" },
		{ "strath", "ˈs̪t̪ɾa" },
		{ "suidhe", "ˈs̪ɯi.ə" },
		{ "teth", "ˈtʰʲe" },
		{ "thig", "ˈhikʲ" },
		{ "tìodhlaic", "ˈtʰʲiəl̪ˠɪʰkʲ" },
		{ "tiugh", "ˈtʰʲu" },
		{ "tuagh", "ˈt̪ʰuəɣ" },
		{ "tuilleadh", "ˈt̪ʰɯʎəɣ" },
		{ "tumadh", "ˈt̪ʰuməɣ" },
		{ "ubh", "ˈu" },
		-- irregular epenthesis
		{ "cal*pa", "ˈkʰal̪ˠapə" },
		{ "ean*raich", "ˈɛnaɾɪç" },
		{ "for*fhais", "ˈfɔɾɔhɪʃ" },
		{ "Glas*chu", "ˈkl̪ˠas̪əxu" },
		{ "on*fhadh", "ˈɔnɔhəɣ" },
		{ "ul*fhart", "ˈul̪ˠuhəɾʃt̪" },
		-- non-initial stress
		{ "buntàta", "pən̪ˠˈt̪aːʰt̪ə" },
		{ "hăló", "haˈloː" },
		-- irregular pronunciations
		{ "Aŏnghas", "ˈɯnɯ.əs̪" },
		{ "banrigh", "ˈpaun̪ˠrɪ", "bannrigh" },
		{ "banrighinn", "ˈpaun̪ˠrɪ.ɪɲ", "bannrighinn" },
		{ "bliadhna", "ˈpliən̪ˠə", "bliadhnna" },
		{ "bràth", "ˈpɾaːx", "bràch" },
		{ "ceangal", "ˈkʲʰɛ.əl̪ˠ", "ceamhal" },
		{ "cothrom", "ˈkʰɔrˠəm", "cothrrom" },
		-- ...
		-- multiword
		{ "a a a", "ə‿ə‿ə" },
		{ "a dh'aithghearr", "ə‿ˈɣɛçər̪ˠ" },
		{ "a fharadh", "arəɣ" },
		{ "a h-athair", "ə‿ˈhahɪɾʲ" },
		{ "a h-uile", "ə‿ˈxulə", "a ch-uile" },
		{ "a laoidh", "ə‿ˈl̪ˠɯj" },
		{ "a mhàthair", "ə‿ˈvaːhɪɾʲ" },
		{ "a sheòladh", "ə‿ˈhjɔːl̪ˠəɣ" },
		{ "air an fhéill", "eɾʲ‿əɲ‿ˈeːʎ" },
		{ "air neo", "eɾʲ‿ɲɔː", "air neò" },
		{ "an duine agad", "ən̪ˠ‿ˈd̪ɯɲ‿ˈakət̪" },
		{ "bainne bainne, bainne", "ˈpaɲə ˈpaɲə | ˈpaɲə" },
		{ "bho leac", "vɔ‿lɛʰk" },
		{ "bho nàbaidh", "vɔ‿naːpɪ" },
		{ "bho phàiste", "vɔ‿faːʃtʲə" },
		{ "bho réic", "vɔ‿ɾeːʰkʲ" },
		{ "cuid-eigin", "ˈkʰutʲekʲɪn" },
		{ "dà bhealltainn", "t̪aː vjaul̪ˠt̪ɪɲ" },
		{ "dà leabaidh", "t̪aː lepɪ" },
		{ "dà mheall", "t̪aː vjaul̪ˠ" },
		{ "dà nead", "t̪aː nɛt̪" },
		{ "dà pheasair", "t̪aː fes̪ɪɾʲ" },
		{ "dà shìl", "t̪aː hiːʎ" },
		{ "de shiùcair", "tʲe hjuːʰkɪɾʲ" },
		{ "do mhealladh", "t̪ɔ vjal̪ˠəɣ" },
		{ "do rùn", "tɔ ˈɾuːn" },
		{ "do thaigh", "t̪ə hɤj" },
		{ "do thaigh", "t̪ɔ hɤʝ" },
		{ "do theaghlach", "t̪ə hjɤːl̪ˠəx" },
		{ "do theaghlach", "t̪ɔ hjɔːl̪ˠəx" },
		{ "glé shean", "kleː ˈhɛn" },
		{ "glé shoilleir", "kleː ˈhɤʎɪɾʲ" },
		{ "glé thiugh", "kleː ˈhju" },
		{ "gu bràth", "kə‿ˈpɾaːx", "gu bràch" },
		{ "mo ghille", "mə ʝiʎə" },
		{ "mo ghoc", "mə ɣɔʰk" },
		{ "mo nàire", "mɔ naːɾʲɪ" },
		{ "mo nighean", "mɔ ni.an" },
		{ "mo pheata", "mɔ fɛʰt̪ə" },
		{ "mo phiuthar", "mɔ fju.əɾ" },
		{ "mo thòn", "mɔ hɔːn" },
		{ "ro shalach", "ɾɔ hal̪ˠəx" },
		{ "sa pheann", "s̪ə fjaun̪ˠ" },
		{ "sa phost", "s̪ə fɔs̪t̪" }
	}
	self:iterate(list, "check_output")
end

--[[
			Additions take this form –
		{ "entry name", "ˈIPA" },
		{ "", "ˈ" },
			or, if you are generating IPA from a respelling of the term –
		{ "respelling", "ˈIPA", "ˈentry name" }
		{ "", "ˈ", "ˈ" },
			Make sure to include the comma, or the module will return an error.
]]--

return tests