Module:ro-pronunciation

The following documentation is located at Module:ro-pronunciation/documentation. ^[edit] Categories were auto-generated by Module:module categorization. ^[edit]

Useful links: subpage list • links • transclusions • testcases • sandbox

Testcases

40 of 40 tests failed. (refresh)

test_to_phonemic:
Text	Expected	Actual
copíl	koˈpil	kopˈil
copíi	koˈpij	kopˈiʲ
copíii	koˈpi.ji	kopˈiij
lupi	lupʲ	lˈupʲ
șárpe	ˈʃar.pe	ʃˈarpe
țáră	ˈt͡sa.rə	t͡sˈarə
înăuntru	ɨ.nəˈun.tru	ɨnəẃntru
xilofón	ksi.loˈfon	ksilofˈon
quarc	kwark	kẃark
fiert	fjert	fˈiert
viteză	viˈte.zə	vitˈezə
viteáză	viˈte̯a.zə	viteˈazə
minge	ˈmin.d͡ʒe	mˈind͡ʒe
mingeac	minˈd͡ʒe̯ak	mind͡ʒˈeak
ghețár	ɡeˈt͡sar	ɡet͡sˈar
ghíndă	ˈɡin.də	ɡˈində
jargón	ʒarˈɡon	ʒarɡˈon
cireáșă	t͡ʃiˈre̯a.ʃə	t͡ʃireˈaʃə
cétină	ˈt͡ʃe.ti.nə	t͡ʃˈetinə
chiar	kjar	kˈiar
chestie	ˈkes.ti.e	kˈestie
mlădíță	mləˈdi.t͡sə	mlədˈit͡sə
târșă	ˈtɨr.ʃə	tˈɨrʃə
oaie	ˈo̯a.je	oˈaie
râu	rɨw	rˈɨu
continuu	konˈti.nuw	kontˈinuw
câine	ˈkɨj.ne	kɨj́ne
mea	me̯a	mˈea
socoteai	so.koˈte̯aj	sokoteˈaʲ
leoaică	leˈo̯aj.kə	leoaj́kə
accelerasem	ak.t͡ʃe.leˈra.sem	akt͡ʃelerˈasem
creează	kreˈe̯a.zə	kreeˈazə
crează	ˈkre̯a.zə	kreˈazə
beau	be̯aw	beˈau
suiau	suˈjaw	swiˈau
piei	pjej	pjˈeʲ
pleoape	ˈple̯o̯a.pe	pleoˈape
creioane	kreˈjo̯a.ne	krejoˈane
sculptură	skulpˈtu.rə	skulp.tˈurə
poezíe	po.eˈzi.e	poezˈie

test_to_phonetic:
	Text	Expected	Actual

local u = require("Module:string/char")

local export = {}

local stress = "ˈ"
local long = "ː"
local acute = u(0x301)
local grave = u(0x300)
local circumflex = u(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeiouəɨ"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[ij]"
local fronted = u(0x031F)
local voiced_consonant = "[bdɡlmnrvz]"

local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }

-- ʦ, ʣ, ʧ, ʤ used for
-- t͡s, d͡z, t͡ʃ, d͡ʒ in body of function.

function export.to_phonemic(word, single_character_affricates)
	word = mw.ustring.lower(word):gsub("'", ""):gsub("â", "ɨ"):gsub("î", "ɨ"):gsub("ă", "ə"):gsub("j", "ʒ"):gsub("ș", "ʃ"):gsub("ț", "ʦ"):gsub("cc", "kc"):gsub("uu", "uw")
	
	-- Decompose combining characters: for instance, è → e + ◌̀
	local decomposed = mw.ustring.toNFD(word):gsub("x", "ks"):gsub("y", "i")
		:gsub("ck", "k"):gsub("sh", "ʃ")

	-- Transcriptions must contain an acute or grave, to indicate stress position.
	-- This does not handle phrases containing more than one stressed word.
	-- Default to penultimate stress rather than throw error?
	local vowel_count
	if not mw.ustring.find(decomposed, acute_or_grave) then
		-- Allow monosyllabic unstressed words.
		vowel_count = select(2, decomposed:gsub(vowel, "%1"))
		if vowel_count ~= 1 then
			-- Add acute accent on second-to-last vowel.
			decomposed = mw.ustring.gsub(decomposed, 
				"(" .. vowel .. ")(" .. not_vowel .. "*[iu]?" .. vowel .. not_vowel .. "*)$",
				"%1" .. acute .. "%2")
		end
	end
	
	local transcription = decomposed

	-- ci, gi + vowel
	-- Do ci, gi + e, é, è sometimes contain /j/?
	transcription = mw.ustring.gsub(transcription,
		"([cg])([cg]?)i(" .. vowel .. ")",
		function (consonant, double, vowel)
			local out_consonant
			if consonant == "c" then
				out_consonant = "ʧ"
			else
				out_consonant = "ʤ"
			end
			
			if double ~= "" then
				if double ~= consonant then
					error("Invalid sequence " .. consonant .. double .. ".")
				end
				
				out_consonant = out_consonant .. out_consonant
			end
			
			return out_consonant .. vowel
		end)
	
	-- Handle other cases of c, g.
	transcription = mw.ustring.gsub(transcription,
		"(([cg])([cg]?)(h?))(.?)",
		function (consonant, first, double, second, next)
			-- Don't allow the combinations cg, gc.
			-- Or do something else?
			if double ~= "" and double ~= first then
				error("Invalid sequence " .. first .. double .. ".")
			end
			
			-- c, g is soft before e, i.
			local consonant
			if (next == "e" or next == "i") and second ~= "h" then
				if first == "c" then
					consonant = "ʧ"
				else
					consonant = "ʤ"
				end
			else
				if first == "c" then
					consonant = "k"
				else
					consonant = "ɡ"
				end
			end
			
			if double ~= "" then
				consonant = consonant .. consonant
			end
			
			return consonant .. next
		end)
	
	-- ⟨qu⟩ represents /kw/.
	transcription = transcription:gsub("qu", "kw")
	
	transcription = mw.ustring.gsub(transcription,  "i$", "ʲ")
	transcription = mw.ustring.gsub(transcription, "iiʲ$", "iji")
	transcription = mw.ustring.gsub(transcription, "iʲ$", "ij")

	-- u or i (without accent) before another vowel is a semivowel.
	transcription = mw.ustring.gsub(transcription,
		"([iu])(" .. vowel .. ")",
		function (semivowel, vowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return semivowel .. vowel
		end)

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")([iu])",
		function (vowel, semivowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return vowel .. semivowel
		end)

	transcription = mw.ustring.gsub(transcription, "je$", "ie")
	
	-- Replace acute and grave with stress mark.
	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")" .. acute_or_grave, stress .. "%1")
	
	transcription = mw.ustring.gsub(transcription, "lpt", "lp.t")
	transcription = mw.ustring.gsub(transcription, "mpt", "mp.t")
	transcription = mw.ustring.gsub(transcription, "nct", "nc.t")
	transcription = mw.ustring.gsub(transcription, "ncʦ", "nc.ʦ")
	transcription = mw.ustring.gsub(transcription, "ncʃ", "nc.ʃ")
	transcription = mw.ustring.gsub(transcription, "ndv", "nd.v")
	transcription = mw.ustring.gsub(transcription, "rct", "rc.t")
	transcription = mw.ustring.gsub(transcription, "rtf", "rt.f")
	transcription = mw.ustring.gsub(transcription, "stm", "st.m")

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowels .. ")" .. "(bkhdɡlmnrvz)" .. "(" .. vowels .. ")" ,
		function (vowel, consonant, anothervowel)
			return vowel .. "." .. consonant .. anothervowel
		end)


	-- Move stress before syllable onset, and add syllable breaks.
	-- This rule may need refinement.
--	transcription = mw.ustring.gsub(transcription,
--		"()(" .. not_vowel .. "?)([^" .. vowels .. stress .. "]*)(" .. stress
--			.. "?)(" .. vowel .. ")",
--		function (position, first, rest, syllable_divider, vowel)
--			-- beginning of word, that is, at the moment, beginning of string
--			if position == 1 then
--				return syllable_divider .. first .. rest .. vowel
--			end
--			if syllable_divider == "" then
--				syllable_divider = "."
--			end
--			if rest == "" then
--				return syllable_divider .. first .. vowel
--			else
--				return first .. syllable_divider .. rest .. vowel
--			end
--		end)
	
	if not single_character_affricates then
		transcription = mw.ustring.gsub(transcription, "([ʦʣʧʤ])([%." .. stress .. "]*)([ʦʣʧʤ]*)",
			function (affricate1, divider, affricate2)
				local full_affricate = full_affricates[affricate1]
				
				if affricate2 ~= "" then
					return mw.ustring.sub(full_affricate, 1, 1) .. divider .. full_affricate
				end
				
				return full_affricate .. divider
			end)
	end
	
	transcription = mw.ustring.gsub(transcription, "[h%-" .. circumflex .. "]", "")
	transcription = transcription:gsub("%.ˈ", "ˈ")
	
	return transcription
end

function export.show(frame)
	local m_IPA = require("Module:IPA")
	
	local args = require("Module:parameters").process(
		frame:getParent().args,
		{
			-- words to transcribe
			[1] = { list = true, default = mw.title.getCurrentTitle().text }
		})
	
	local Array = require("Module:array")
	
	local transcriptions = Array(args[1])
		:map(
			function (word, i)
				return { pron = "/" .. export.to_phonemic(word) .. "/" }
			end)
	
	return m_IPA.format_IPA_full {
		lang = require("Module:languages").getByCode("ro"),
		items = transcriptions,
	}
end

return export