Jump to content

Module:ote-IPA

From Wiktionary, the free dictionary

This module is not to be directly used. It is used by Template:ote-IPA, see there for usage.


-- Based on [[Module:es-pronunc]] by: Benwing
-- Adapted by Santi2222

local export = {}

local m_IPA = require("Module:IPA")

local lang = require("Module:languages").getByCode("ote")

local u = mw.ustring.char
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower

local HIGH = u(0x0301) 
local LOW = u(0x0300)
local RISING = u(0x030C)
local BAR = u(0x0331)

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

--
function export.IPA(text)
	text = ulower(text or mw.title.getCurrentTitle().text)

	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[¡!¿?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	-- Make certain monosyllabic words unstressed (not implemented yet)
	local words = rsplit(text, " ")
	text = table.concat(words, " ")
	-- Convert hyphens to spaces
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[¡!¿?']", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"

	--transcription (with some fake symbols for convenience)
	--stops
	text = rsub(text, "ꞌ", "ʔ")
	text = rsub(text, "ch", "č") --fs
	text = rsub(text, "ts", "c") --fs
	text = rsub(text, "([ptcčk])ʔ", "%1ʼ") --ejectives
	--fricatives
	text = rsub(text,  "b", "β")
	text = rsub(text, "ʔβ", "b") -- is /b/ the value of <ꞌb>?
	text = rsub(text, "d", "ð")
	text = rsub(text, "zy", "ʒ")
	text = rsub(text, "g", "ɡ")
	text = rsub(text, "f", "ɸ")
	text = rsub(text, "th", "θ")
	text = rsub(text, "x", "ʃ")
	text = rsub(text, "j", "x")
	
	--nasals
	text = rsub(text, "n", "ń") --fs
	text = rsub(text, "ñ", "n")
	
	--semivowels
	text = rsub(text, "y", "j")
	
	--vowels
	--the vowels a, e, i, o, u and their forms with diacritics don't need any processing
	
	--ü, using precomposed characters
	text = rsub(text, "ǘ", "ũ" .. HIGH)
	text = rsub(text, "ǜ", "ũ" .. LOW)
	text = rsub(text, "ǚ", "ũ" .. RISING)
	text = rsub(text, "ü", "ũ")
	
	text = rsub(text, "ë", "ẽ")
	text = rsub(text, "ä", "ã")
	text = rsub(text, "ï", "ĩ")
	
	--letters with a bar
	text = rsub(text, "e" .. BAR, "æ")
	text = rsub(text, "é" .. BAR, "ǽ")
	text = rsub(text, "è" .. BAR, "æ̀")
	text = rsub(text, "ě" .. BAR, "æ̌")
	
	text = rsub(text, "o" .. BAR, "ʌ")
	text = rsub(text, "ó" .. BAR, "ʌ́")
	text = rsub(text, "ò" .. BAR, "ʌ̀̀")
	text = rsub(text, "ǒ" .. BAR, "ʌ̌̌")
	
	text = rsub(text, "u" .. BAR, "ɨ")
	text = rsub(text, "ú" .. BAR, "ɨ́")
	text = rsub(text, "ù" .. BAR, "ɨ̀̀")
	text = rsub(text, "ǔ" .. BAR, "ɨ̌̌")
	
	text = rsub(text, HIGH .. HIGH, HIGH)
	text = rsub(text, LOW .. LOW, LOW)
	text = rsub(text, RISING .. RISING, RISING)
	
	text = rsub(text, "u([æʌɨaeioáéíóàèìòǎěǐǒẽãĩ])", "w%1")

	-- convert fake symbols to the real ones
	local final_conversions = {
		["č"] = "t͡ʃ",
		["c"] = "t͡s",
		["ń"] = "n̪"
	}

	text = rsub(text, "[čcń]", final_conversions)
	text = rsub(text, "%.", "")

	-- remove # symbols at word and text boundaries
	text = rsub(text, "#", "")

	return mw.ustring.toNFC(text)
end

function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
	}

	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	local results = {}

	local text = args[1] or mw.title.getCurrentTitle().text
	
	table.insert(results, { pron = "/" .. export.IPA(text) .. "/" })
	
	local pre = args.pre and args.pre .. " " or ""

	return "* " .. pre .. m_IPA.format_IPA_full { lang = lang, items = results }
end

return export