Jump to content

Module:User:Erutuon/en-pronunciation

From Wiktionary, the free dictionary

IPA keyword
RP
æ trap
ɒ lot
price
mouth
ɑː bath, palm, start
e dress
ə comma, letter
ʌ strut
square
face
əʊ goat
ɜː nurse
ɪ kit
i happy
ɪə near
fleece
ɔɪ choice
ɔː force, north, thought
ʊ foot
ʊə cure
goose
SSB
a trap
ɑj price
aw mouth
ɑː bath, palm, start
ʌ strut
ə comma, letter
əw goat
əː nurse
ɪ kit
ɪj fleece, happy
ɪː near
ɔ lot
oj choice
force, north, thought
ɵ foot
ʉw goose
ɵː cure
ɛ dress
ɛj face
ɛː square
front central back
high /ɪj ɪː/ /ʉw ɵː/
mid /ɛj ɛː/ /əw əː/ /oj oː/
low /aw/ [aː] /ɑj ɑː/

local export = {}

local fun = require "Module:fun"

local usub = mw.ustring.sub
local ulen = mw.ustring.len

local function tsort(t)
	table.sort(t)
	return t
end

-- Reverses key–value pairs. Assumes two keys do not have the same value. If
-- they do, the first encountered value will be used as a key.
local function invert(t)
	local new_t = {}
	
	for k, v in pairs(t) do
		if new_t[v] == nil then
			new_t[v] = k
		end
	end
	
	return new_t
end

local sort_value = {
	["ɑ"] = "a", ["ɒ"] = "a", ["æ"] = "a",
	["ɜ"] = "e", ["ʌ"] = "e", ["ə"] = "e", 
	["ɪ"] = "i", 
	["ɔ"] = "o",
	["ʊ"] = "u", ["ɵ"] = "u", ["ʉ"] = "u", -- ʉ stands in for ɵ
}

local aliases = {
	Lindsey = "SSB",
}

local systems = {
	RP = { -- from [[w:Lexical set]]
		kit = "ɪ", dress = "e", trap = "æ", lot = "ɒ", strut = "ʌ", foot = "ʊ",
		palm = "ɑː", bath = "ɑː", nurse = "ɜː", fleece = "iː", face = "eɪ",
		thought = "ɔː", goat = "əʊ", goose = "uː", price = "aɪ", choice = "ɔɪ",
		mouth = "aʊ", near = "ɪə", square = "eə", start = "ɑː", north = "ɔː",
		force = "ɔː", cure = "ʊə", happy = "i", letter = "ə", comma = "ə",
	},
	SSB = { -- from http://englishspeechservices.com/blog/bre-vowel-symbols/
		kit = "ɪ", fleece = "ɪj", happy = "ɪj", near = "ɪː",
		trap = "a", price = "ɑj", mouth = "aw", palm = "ɑː", start = "ɑː", bath = "ɑː",
		lot = "ɔ", choice = "oj", thought = "oː", north = "oː", force = "oː",
		strut = "ʌ", letter = "ə", comma = "ə", goat = "əw", nurse = "əː",
		foot = "ɵ", goose = "ʉw", cure = "ɵː", -- also oː, ʉwə
		dress = "ɛ", face = "ɛj", square = "ɛː",
	}
}

-- http://englishspeechservices.com/blog/smoothing-then-and-now/
-- Key changes to value. Schwa is omitted: iː and ɪj mean iːə and ɪjə.
local smoothing = {
	RP = {
		["iː"] = "ɪə",
		["eɪ"] = "eə",
		["aɪ"] = "aə",
		["ɔɪ"] = "ɔə",
		["uː"] = "ʊə",
		["ɑʊ"] = "ɑə",
		["əʊ"] = "əə",
	},
	SSB = {
		["ɪj"] = "ɪː",
		["ɛj"] = "ɛː",
		["ɑj"] = "ɑː",
		["oj"] = "oː",
		["ʉw"] = "ɵː", -- Really, this should be ɵwə → ɵː. There is not a change in vowel quality. ɵw was changed to ʉw to distinguish the phoneme from əw.
		["aw"] = "aː", -- This sound is not considered a phoneme by Lindsey, so is not found in the lexical set list above.
		["əw"] = "əː",
	}
}

local shortcuts = {
	SSB = {
		-- CUBE: capitalized vowel means stress
		a = "a",	e = "ɛ",	y = "ʌ",	i = "ɪ",	o = "ɔ",	u = "ɵ",	x = "ə",
		
		aj = "ɑj",	ej = "ɛj",				ij = "ɪ",	oj = "ɔj",
		aJ = "ɑj",	eJ = "ɛj",				iJ = "ɪ",	oJ = "ɔj", -- CUBE
		
								yw = "əw",
		aw = "aw",				ew = "əw",							uw = "ʉw",
								yW = "əw",							-- CUBE; which to use?
		aW = "aw",				eW = "əw",							uW = "ʉw",
		
		aa = "ɑː",	ee = "ɛː",	yy = "əː",	ii = "ɪː",	oo = "oː",	uu = "ɵː",
		aH = "ɑː",	eH = "ɛː",	yH = "əː",	iH = "ɪː",	oH = "oː",	uH = "ɵː", -- CUBE; which to use?
		
		T = "θ",	S = "ʃ",	C = "tʃ",	N = "ŋ",	-- CUBE
		D = "ð",	Z = "ʒ",	G = "dʒ",
	},
	RP = {
		a = "æ",	e = "e",	i = "i",	o = "ɒ",	u = "u",	y = "ʌ",	x = "ə",
								I = "ɪ",				u = "ʊ",
		
		au = "aʊ",	eu = "əʊ",				ou = "ɒʊ",	--[[ "ɑʊ"? ]]
		
		ai = "aɪ",	ei = "eɪ",				oi = "ɔɪ",
		
		ae = "ɑə",	ee = "eə",	ie = "ɪə",	oe = "ɔə",	ue = "ʊə",
		ay = "ɑə",	ey = "eə",	iy = "ɪə",	oy = "ɔə",	uy = "ʊə",	-- which to use?
	--	archaic		or "ɛə"?				archaic
		
		aa = "ɑː",				ii = "iː",	oo = "ɔː",	uu = "uː",
		aH = "ɑː",				iH = "iː",	oH = "ɔː",	uH = "uː",	yH = "ɜː",	-- CUBEish; which to use?
		
		T = "θ",	S = "ʃ",	C = "tʃ",	N = "ŋ",	-- CUBE
		D = "ð",	Z = "ʒ",	G = "dʒ",
	},
}

local function validate_system(system)
	system = aliases[system] or system
	
	if not systems[system] then
		error("No system called " .. system " is found in [[Module:User:Erutuon/en-pronunciation]].")
	end
	
	return system
end

function export.convert(text, from, to)
	from = validate_system(from)
	to = validate_system(to)
	
	local output = {}
	local length = ulen(text)
	local from_table = invert(systems[from])
	local to_table = systems[to]
	local i = 1
	
	while i <= length do
		local char, chars = usub(text, i, i), usub(text, i, i + 1)
		if from_table[chars] then
			table.insert(output, to_table[from_table[chars]])
			i = i + 2
		else
			table.insert(output, to_table[from_table[char]] or char)
			i = i + 1
		end
	end
	
	return table.concat(output)
end

function export.smooth(word, system)
	system = validate_system(system)
	
	-- Naive and inefficient method.
	for vowel_preceding_schwa, smoothed in pairs(smoothing[system]) do
		word = word:gsub(vowel_preceding_schwa .. "%.?ə", smoothed)
	end
	
	return word
end

local function make_row(cells)
	-- | cell1 || cell2 || cell3 ...
	return table.concat(
		fun.map(
			function(cell)
				return "| " .. cell
			end,
			cells),
		" |")
end
	
local function tag(IPA_transcription)
	return '<span class="IPA">' .. IPA_transcription .. '</span>'
end

local function get_compare_value(IPA)
	return (IPA:gsub("[%z\1-\127\194-\244][\128-\191]*", sort_value))
end

local function compare(IPA1, IPA2)
	return get_compare_value(IPA1) < get_compare_value(IPA2)
end

local function make_IPA_keyword_table(IPA_keyword_data)
	local rows = {}
	local i = 0
	-- local column_length = math.floor((require "Module:table".size(IPA_keyword_data) + 2) / 2)
	
	for IPA, keywords in require "Module:table".sortedPairs(IPA_keyword_data, compare) do
		i = i + 1
		if column_length and i == column_length then
			i = 1
		end
		rows[i] = rows[i] or {}
		table.insert(rows[i], tag(IPA))
		table.insert(rows[i], table.concat(tsort(keywords), ", "))
	end
	
	local output = {}
	
	for i, cells in ipairs(rows) do
		output[i] = make_row(cells)
	end
	
	return table.concat(output, "\n|-\n")
end

function export.show(frame)
	--[[
	local output = fun.map(
		function(lexical_sets, system_name)
			return fun.map(
				function(IPA, keyword)
					
				end,
				lexical_sets)
		end,
		systems)
	--]]
	
	local collection = {}
	local already_seen = {}
	
	for system, lexical_sets in pairs(systems) do
		if not already_seen[lexical_sets] then
			local keywords_by_IPA = {}
			collection[system] = keywords_by_IPA
			for keyword, IPA in pairs(lexical_sets) do
				keywords_by_IPA[IPA] = keywords_by_IPA[IPA] or {}
				table.insert(keywords_by_IPA[IPA], keyword)
			end
		end
		already_seen[lexical_sets] = true
	end
	
	local output = { '{| class="wikitable"', "! IPA !! keyword" }
	for system, keywords_by_IPA in pairs(collection) do
		table.insert(output, '|-\n! colspan="2" | ' .. system .. "\n|-")
		table.insert(output, make_IPA_keyword_table(keywords_by_IPA))
	end
	
	table.insert(output, "|}")
	
	return table.concat(output, "\n")
end

local function merge(t1, t2)
	local t = {}
	for k, v in pairs(t1) do
		t[k] = v
	end
	for k, v in pairs(t2) do
		if t[k] == nil then
			t[k] = v
		else
			error("Table 1 has a value for " .. k .. ".")
		end
	end
	return t
end

local labels = {
	RP = "[[w:Received Pronunciation|Received Pronunciation]]",
	SSB = "Standard Southern British", -- probably not notable enough for a Wikipedia article yet
}

function export.template(frame)
	local params = {
		RP = {},
		SSB = {},
	}
	
	local args = require("Module:parameters").process(merge(frame.args, frame:getParent().args), params)
	
	local provided_system = next(args)
	local already_seen = {}
	for system, lexical_sets in pairs(systems) do
		if not (already_seen[lexical_sets] or args[system]) then
			args[system] = export.convert(args[provided_system], provided_system, system)
		end
		already_seen[lexical_sets] = true
	end
	
	local output = {}
	local i = 0
	for code, transcription in pairs(args) do
		i = i + 1
		output[i] = "* " .. (labels[code] or code) .. ": " .. tag("/" .. transcription .. "/")
	end
	
	return table.concat(output, "\n")
end

return export