Module:ro-pronunciation
Appearance
- The following documentation is located at Module:ro-pronunciation/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Testcases
[edit]40 of 40 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
copíl | koˈpil | kopˈil | |
copíi | koˈpij | kopˈiʲ | |
copíii | koˈpi.ji | kopˈiij | |
lupi | lupʲ | lˈupʲ | |
șárpe | ˈʃar.pe | ʃˈarpe | |
țáră | ˈt͡sa.rə | t͡sˈarə | |
înăuntru | ɨ.nəˈun.tru | ɨnəẃntru | |
xilofón | ksi.loˈfon | ksilofˈon | |
quarc | kwark | kẃark | |
fiert | fjert | fˈiert | |
viteză | viˈte.zə | vitˈezə | |
viteáză | viˈte̯a.zə | viteˈazə | |
minge | ˈmin.d͡ʒe | mˈind͡ʒe | |
mingeac | minˈd͡ʒe̯ak | mind͡ʒˈeak | |
ghețár | ɡeˈt͡sar | ɡet͡sˈar | |
ghíndă | ˈɡin.də | ɡˈində | |
jargón | ʒarˈɡon | ʒarɡˈon | |
cireáșă | t͡ʃiˈre̯a.ʃə | t͡ʃireˈaʃə | |
cétină | ˈt͡ʃe.ti.nə | t͡ʃˈetinə | |
chiar | kjar | kˈiar | |
chestie | ˈkes.ti.e | kˈestie | |
mlădíță | mləˈdi.t͡sə | mlədˈit͡sə | |
târșă | ˈtɨr.ʃə | tˈɨrʃə | |
oaie | ˈo̯a.je | oˈaie | |
râu | rɨw | rˈɨu | |
continuu | konˈti.nuw | kontˈinuw | |
câine | ˈkɨj.ne | kɨj́ne | |
mea | me̯a | mˈea | |
socoteai | so.koˈte̯aj | sokoteˈaʲ | |
leoaică | leˈo̯aj.kə | leoaj́kə | |
accelerasem | ak.t͡ʃe.leˈra.sem | akt͡ʃelerˈasem | |
creează | kreˈe̯a.zə | kreeˈazə | |
crează | ˈkre̯a.zə | kreˈazə | |
beau | be̯aw | beˈau | |
suiau | suˈjaw | swiˈau | |
piei | pjej | pjˈeʲ | |
pleoape | ˈple̯o̯a.pe | pleoˈape | |
creioane | kreˈjo̯a.ne | krejoˈane | |
sculptură | skulpˈtu.rə | skulp.tˈurə | |
poezíe | po.eˈzi.e | poezˈie |
Text | Expected | Actual |
---|
local u = require("Module:string/char")
local export = {}
local stress = "ˈ"
local long = "ː"
local acute = u(0x301)
local grave = u(0x300)
local circumflex = u(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeiouəɨ"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[ij]"
local fronted = u(0x031F)
local voiced_consonant = "[bdɡlmnrvz]"
local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }
-- ʦ, ʣ, ʧ, ʤ used for
-- t͡s, d͡z, t͡ʃ, d͡ʒ in body of function.
function export.to_phonemic(word, single_character_affricates)
word = mw.ustring.lower(word):gsub("'", ""):gsub("â", "ɨ"):gsub("î", "ɨ"):gsub("ă", "ə"):gsub("j", "ʒ"):gsub("ș", "ʃ"):gsub("ț", "ʦ"):gsub("cc", "kc"):gsub("uu", "uw")
-- Decompose combining characters: for instance, è → e + ◌̀
local decomposed = mw.ustring.toNFD(word):gsub("x", "ks"):gsub("y", "i")
:gsub("ck", "k"):gsub("sh", "ʃ")
-- Transcriptions must contain an acute or grave, to indicate stress position.
-- This does not handle phrases containing more than one stressed word.
-- Default to penultimate stress rather than throw error?
local vowel_count
if not mw.ustring.find(decomposed, acute_or_grave) then
-- Allow monosyllabic unstressed words.
vowel_count = select(2, decomposed:gsub(vowel, "%1"))
if vowel_count ~= 1 then
-- Add acute accent on second-to-last vowel.
decomposed = mw.ustring.gsub(decomposed,
"(" .. vowel .. ")(" .. not_vowel .. "*[iu]?" .. vowel .. not_vowel .. "*)$",
"%1" .. acute .. "%2")
end
end
local transcription = decomposed
-- ci, gi + vowel
-- Do ci, gi + e, é, è sometimes contain /j/?
transcription = mw.ustring.gsub(transcription,
"([cg])([cg]?)i(" .. vowel .. ")",
function (consonant, double, vowel)
local out_consonant
if consonant == "c" then
out_consonant = "ʧ"
else
out_consonant = "ʤ"
end
if double ~= "" then
if double ~= consonant then
error("Invalid sequence " .. consonant .. double .. ".")
end
out_consonant = out_consonant .. out_consonant
end
return out_consonant .. vowel
end)
-- Handle other cases of c, g.
transcription = mw.ustring.gsub(transcription,
"(([cg])([cg]?)(h?))(.?)",
function (consonant, first, double, second, next)
-- Don't allow the combinations cg, gc.
-- Or do something else?
if double ~= "" and double ~= first then
error("Invalid sequence " .. first .. double .. ".")
end
-- c, g is soft before e, i.
local consonant
if (next == "e" or next == "i") and second ~= "h" then
if first == "c" then
consonant = "ʧ"
else
consonant = "ʤ"
end
else
if first == "c" then
consonant = "k"
else
consonant = "ɡ"
end
end
if double ~= "" then
consonant = consonant .. consonant
end
return consonant .. next
end)
-- ⟨qu⟩ represents /kw/.
transcription = transcription:gsub("qu", "kw")
transcription = mw.ustring.gsub(transcription, "i$", "ʲ")
transcription = mw.ustring.gsub(transcription, "iiʲ$", "iji")
transcription = mw.ustring.gsub(transcription, "iʲ$", "ij")
-- u or i (without accent) before another vowel is a semivowel.
transcription = mw.ustring.gsub(transcription,
"([iu])(" .. vowel .. ")",
function (semivowel, vowel)
if semivowel == "i" then
semivowel = "j"
else
semivowel = "w"
end
return semivowel .. vowel
end)
transcription = mw.ustring.gsub(transcription,
"(" .. vowel .. ")([iu])",
function (vowel, semivowel)
if semivowel == "i" then
semivowel = "j"
else
semivowel = "w"
end
return vowel .. semivowel
end)
transcription = mw.ustring.gsub(transcription, "je$", "ie")
-- Replace acute and grave with stress mark.
transcription = mw.ustring.gsub(transcription,
"(" .. vowel .. ")" .. acute_or_grave, stress .. "%1")
transcription = mw.ustring.gsub(transcription, "lpt", "lp.t")
transcription = mw.ustring.gsub(transcription, "mpt", "mp.t")
transcription = mw.ustring.gsub(transcription, "nct", "nc.t")
transcription = mw.ustring.gsub(transcription, "ncʦ", "nc.ʦ")
transcription = mw.ustring.gsub(transcription, "ncʃ", "nc.ʃ")
transcription = mw.ustring.gsub(transcription, "ndv", "nd.v")
transcription = mw.ustring.gsub(transcription, "rct", "rc.t")
transcription = mw.ustring.gsub(transcription, "rtf", "rt.f")
transcription = mw.ustring.gsub(transcription, "stm", "st.m")
transcription = mw.ustring.gsub(transcription,
"(" .. vowels .. ")" .. "(bkhdɡlmnrvz)" .. "(" .. vowels .. ")" ,
function (vowel, consonant, anothervowel)
return vowel .. "." .. consonant .. anothervowel
end)
-- Move stress before syllable onset, and add syllable breaks.
-- This rule may need refinement.
-- transcription = mw.ustring.gsub(transcription,
-- "()(" .. not_vowel .. "?)([^" .. vowels .. stress .. "]*)(" .. stress
-- .. "?)(" .. vowel .. ")",
-- function (position, first, rest, syllable_divider, vowel)
-- -- beginning of word, that is, at the moment, beginning of string
-- if position == 1 then
-- return syllable_divider .. first .. rest .. vowel
-- end
-- if syllable_divider == "" then
-- syllable_divider = "."
-- end
-- if rest == "" then
-- return syllable_divider .. first .. vowel
-- else
-- return first .. syllable_divider .. rest .. vowel
-- end
-- end)
if not single_character_affricates then
transcription = mw.ustring.gsub(transcription, "([ʦʣʧʤ])([%." .. stress .. "]*)([ʦʣʧʤ]*)",
function (affricate1, divider, affricate2)
local full_affricate = full_affricates[affricate1]
if affricate2 ~= "" then
return mw.ustring.sub(full_affricate, 1, 1) .. divider .. full_affricate
end
return full_affricate .. divider
end)
end
transcription = mw.ustring.gsub(transcription, "[h%-" .. circumflex .. "]", "")
transcription = transcription:gsub("%.ˈ", "ˈ")
return transcription
end
function export.show(frame)
local m_IPA = require("Module:IPA")
local args = require("Module:parameters").process(
frame:getParent().args,
{
-- words to transcribe
[1] = { list = true, default = mw.title.getCurrentTitle().text }
})
local Array = require("Module:array")
local transcriptions = Array(args[1])
:map(
function (word, i)
return { pron = "/" .. export.to_phonemic(word) .. "/" }
end)
return m_IPA.format_IPA_full {
lang = require("Module:languages").getByCode("ro"),
items = transcriptions,
}
end
return export