Jump to content

Module:fo-pron

From Wiktionary, the free dictionary

This is a largely unfinished and ugly module for generating IPA pronunciations for Faroese lemmas. It is mostly based off of the Icelandic module and uses the rules given at Faroese phonology and Faroese orthography. All improvements by other users are very welcome!


local export = {}

local lang = require("Module:languages").getByCode("fo")
local sc = require("Module:scripts").getByCode("Latn")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub

local U = require("Module:string/char")
local voiceless = U(0x325)
local habove = "ʰ"
local long = "ː"
local primary_stress = "ˈ"

local consonants = "bdðfghjklmnprstv"
local consonant = "[" .. consonants .. "]"

local vowels = "aɛeɪiʏyœøɔoʊu"
local vowel = "[" .. vowels .. "]"

data = {
	["consonants"] = {
		["b"] = "p",
		["d"] = "t",
		["dj"] = "tʃ",
		["g"] = "k",
		["gj"] = "tʃ",
		["hj"] = "j",
		["hv"] = "kv",
		["k"] = "kʰ",
		["kj"] = "tʃʰ",
		["ll"] = "tl" .. voiceless,
		["nj"] = "ɲ",
		["ng"] = "ŋk",
		["nk"] = "ŋ̊kʰ",
		["p"] = "pʰ",
		["r"] = "ɹ",
		["sj"] = "ʃ",
		["skj"] = "ʃ",
		["stj"] = "ʃ",
		["t"] = "tʰ",
		["tj"] = "tʃʰ",
		["ð"] = "",
	},
	["short_vowels"] = {
		["a"] = "a",
		["á"] = "ɔ",
		["e"] = "ɛ",
		["i"] = "ɪ",
		["í"] = "ʊi",
		["o"] = "ɔ",
		["ó"] = "œ",
		["u"] = "ʊ",
		["ú"] = "ʏ",
		["y"] = "ɪ",
		["ý"] = "ʊi",
		["æ"] = "a",
		["ø"] = "œ",
		["ei"] = "ai",
		["ey"] = "ɛi",
		["oy"] = "ɔi",
	},
	["long_vowels"] = {
		["a"] = "ɛa" .. long,
		["á"] = "ɔa" .. long,
		["e"] = "e" .. long,
		["i"] = "i" .. long,
		["í"] = "ʊi" .. long,
		["o"] = "o" .. long,
		["ó"] = "ɔu" .. long,
		["u"] = "u" .. long,
		["ú"] = "ʉu" .. long,
		["y"] = "i" .. long,
		["ý"] = "ʊi" .. long,
		["æ"] = "ɛa" .. long,
		["ø"] = "ø" .. long,
		["ei"] = "ai" .. long,
		["ey"] = "ɛi" .. long,
		["oy"] = "ɔi" .. long,
	},
}

-- Aspiration
local asp_letters = {"p", "t", "k", "kj", "tj"}

for _, letter in ipairs(asp_letters) do
	data.consonants[vowel .. letter .. "([aɛeœøɔo])"] = vowel .. habove .. letter .. "([aɛeœøɔo])"
	data.consonants[letter .. letter] = habove .. letter .. long
	data.consonants[letter .. "n"] = habove .. letter .. "n"
	data.consonants["([mnɲŋɹl])" .. letter] = "%1" .. voiceless .. letter
end

-- Glide
local glide_letters = {"ð", "g"}

for _, letter in ipairs(glide_letters) do
	data.long_vowels["([iːʊiːɛiːɔiː])" .. letter .. vowel] = "%1j%2"
	data.long_vowels["([uːɔuːʉuː])" .. letter .. vowel] = "%1w%2"
	data.long_vowels["([ɛaːɔaːeːoːøː])" .. letter .. "ɪ"] =  "%1j%2"
	data.long_vowels["([ɛaːɔaːeːoːøː])" .. letter .. "ʊ"] = "%1v%2"
	data.long_vowels["([ɛaːɔaːeːoːøː])" .. letter .. "a"] = "%1a"
end

rules = {
	[1] = {
		["([lɹ])" .. "([ptkʃfshʂ])"] = "%1" .. voiceless .. "%2",
		["v" .. "([ptkʃfshʂ])"] = "f%2"
	},
	[2] = {
		["[k]" .. "([iːɪeːɛj])"] = "tʃ%2",
		["[kʰ]" .. "([iːɪeːɛj])"] = "tʃʰ%2",
		["[sk]" .. "([iːɪeːɛj])"] = "ʃ%2",
		[vowel .. "i" .. "s" .. consonant] = "%1ʃ%2"
	},
	[3] = {
		["r" .. "d"] = "ɻʈ",
		["r" .. "t"] = "ʂʈ",
		["r" .. "n"] = "tn" .. voiceless,
		["r" .. "l"] = "ɻɭ",
		["r" .. "s"] = "ʂː"
	},
	[4] = {
		["ó" .. "gv"] = "ɛ%2",
		["ú" .. "gv"] = "ɪ%2",
		["ey" .. "ggj"] = "ɛ" .. "tʃː",
		["í" .. "ggj"] = "ʊ" .. "tʃː",
		["ý" .. "ggj"] = "ʊ" .. "tʃː",
		["ei" .. "ggj"] = "a" .. "tʃː",
		["oy" .. "ggj"] = "ɔ" .. "tʃː",
	}
}

function export.toIPA(mode, term, accent)
	if type(term) ~= "string" then
		error('The function "toIPA" requires a string argument.')
	end
	
	local IPA = {}
	
	if accent ~= "off" then
		table.insert(IPA, primary_stress)
	end
	
	local working_string = mw.ustring.lower(term)
	local firstletter = sub(working_string, 1, 1)
	local firsttwoletters = sub(working_string, 1, 2)

	if find(firstletter, consonant) then
		if data.consonants[firsttwoletters] then
			table.insert(IPA, data.consonants[firsttwoletters])
			working_string = sub(working_string, 3)
		elseif data.consonants[firstletter] then
			table.insert(IPA, data.consonants[firstletter])
			working_string = sub(working_string, 2)
		else
			table.insert(IPA, firstletter)
			working_string = sub(working_string, 2)
		end

		while mw.ustring.len(working_string) > 0 do
			local letter = { sub(working_string, 1, 1), sub(working_string, 2, 3) }
			local twoletters = { sub(working_string, 1, 2), sub(working_string, 3, 4) }
			local threeletters = { sub(working_string, 1, 3), sub(working_string, 4, 5) }
		
			if data.short_vowels[letter[1]] then
				if data.long_vowels[letter[1]] and (not data.short_vowels[letter[2]]) and data.short_vowels[letter[3]] then
					table.insert(IPA, data.long_vowels[letter[1]])
				else
					table.insert(IPA, data.short_vowels[letter[1]])
				end
				working_string = sub(working_string, 2)
			else
				table.insert(IPA, letter[1])
				working_string = sub(working_string, 2)
			end
		end
		
		IPA = table.concat(IPA)
		
		for _, set_of_rules in ipairs(rules) do
			for regex, replacement in pairs(set_of_rules) do
				IPA = gsub(IPA, regex, replacement)
			end
		end
	end
	
	IPA = gsub(IPA, "%-", "")
	
	return IPA
end

function export.show(frame)
	local params = {
		[1] = {},
		[2] = {}
	}
	
	local title = mw.title.getCurrentTitle()
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.text
	local accent = args[2]

	local ipa = export.toIPA(term, accent)
	
	ipa = "[" .. ipa .. "]"
	ipa = require("Module:IPA").format_IPA_full {
		lang = require("Module:languages").getByCode("fo"),
		items = {{ pron = ipa }},
	}

	return ipa
end

return export