Module:pt-pronunc/testcases
Appearance
- The following documentation is located at Module:pt-pronunc/testcases/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • tested module • sandbox
1 of 0 tests failed. (refresh)
Text | Expected | Actual | Comments | |
---|---|---|---|---|
a | ||||
Script error during testing: Module:pt-pronunc:1616: bad argument #1 to 'ipairs' (table expected, got nil)stack traceback: [C]: ? [C]: in function 'ipairs' Module:pt-pronunc:1616: in function 'dostyle' Module:pt-pronunc:1736: in function 'express_styles' Module:pt-pronunc/testcases:400: in function 'func' Module:UnitTests:296: in function 'iterate' Module:pt-pronunc/testcases:590: in function <Module:pt-pronunc/testcases:589> (tail call): ? [C]: in function 'xpcall' Module:UnitTests:370: in function <Module:UnitTests:329> (tail call): ? mw.lua:527: in function <mw.lua:507> [C]: ? [C]: in function 'expandTemplate' mw.lua:333: in function 'expandTemplate' Module:documentation:896: in function 'chunk' mw.lua:527: in function <mw.lua:507> [C]: ? |
local tests = require("Module:UnitTests")
local m_pt_pronunc = require("Module:pt-pronunc")
local m_links = require("Module:links")
local m_table = require("Module:table")
local pt = require("Module:languages").getByCode("pt")
local rsplit = mw.text.split
local function tag_IPA(IPA)
return '<span class="IPA">' .. IPA .. '</span>'
end
local function link(text)
return m_links.full_link{ term = text, lang = pt }
end
local options = { display = tag_IPA }
local all_style_set = m_table.listToSet(m_pt_pronunc.all_styles)
--[=[
In the following examples, each line is either a section header beginning with a # or an example.
Examples consist of tab-separated fields. The first field is the actual spelling of the term in question.
Each following field consaists of a respelling associated with a particular style and the corresponding expected
IPA pronunciation. A style corresponds approximately to a particular dialect and is one of the following:
(1) A "basic style":
* "gbr" = "General" Brazilian
* "sp" = São Paulo
* "rio" = Rio de Janeiro
* "cpt" = Portugal outside of Lisbon
(2) A "combined style":
* "br" = Brazil = "gbr" + "sp" + "rio"
* "pt" = Portugal = "gpt" + "cpt"
* "all" = all basic styles
(3) A "style group", which is a list of hyphen-separated styles;
* e.g. "br-cpt" = "br" + "cpt" = "gbr" + "sp" + "rio" + "cpt"
If a combined style or style group is specified, the respelling applies to all individual styles.
The format of a respelling field is RESPELLING:IPA (respelling associated with all styles) or STYLE=RESPELLING:IPA
(respelling associated with the specified style). If RESPELLING is omitted (but the colon kept), the term's original
spelling is used. IPA in turn is either a single expected IPA pronunciation (between /.../ to represent phonemic
pronunciation or between [...] to represent phonetic pronunciation), a style-tagged pronunciation of the form
STYLE=PRONUN, or a semicolon-separated list of style-tagged pronunciations.
Examples:
* Hungria :[ũˈɡɾi.ɐ]
This means the word [[Hungria]], respelled the same way (note the omitted respelling before the colon), has the
expected phonetic pronunciation [ũˈɡɾi.ɐ] in all styles (dialects). The actual pronunciation for all styles will
be generated, and the phonetic output of each in turn will be compared against [ũˈɡɾi.ɐ]. Phonemic output will
not be checked.
* jogging br=djógguing:/ˈd͡ʒɔ.ɡĩ/
This means the word [[jogging]] has the respelling 'djógguing' and expected phonemic pronunciation /ˈd͡ʒɔ.ɡĩ/
in all Brazilian styles. Portugal styles are omitted and will not be checked.
* abrangência abrangêncya:br=/a.bɾɐ̃ˈʒẽ.sjɐ/;pt=[ɐ.βɾɐ̃ˈʒẽ.sjɐ]
This means the word [[abrangência]] has the respelling 'abrangêncya' for all styles, which in turn has the
expected phonemic pronunciation /a.bɾɐ̃ˈʒẽ.sjɐ/ in Brazil and expected phonetic pronunciation [ɐ.βɾɐ̃ˈʒẽ.sjɐ] in
Portugal. The phonemic pronunciation for all Brazilian styles ("General" Brazilian, Rio and São Paulo) will be
checked against /a.bɾɐ̃ˈʒẽ.sjɐ/, and the phonetic pronunciation for all Portugal styles (Lisbon as well a
non-Lisbon Central Portugal) will be checked against [ɐ.βɾɐ̃ˈʒẽ.sjɐ].
* ninguém :br-cpt=/nĩˈɡẽj̃/;lisbon=/nĩˈɡɐ̃j̃/
This means the word [[ninguém]], respelled the same way for all styles, has the phonemic pronunciation /nĩˈɡẽj̃/
in all styles other than Lisbon, and /nĩˈɡɐ̃j̃/ in Lisbon.
* long neck br=lòngh nécke,lòngue nécke:/ˌlõɡ ˈnɛ.ki/,/ˌlõ.ɡi ˈnɛ.ki/
This means the term [[long neck]] has two possible respellings 'lòngh nécke' and 'lòngue nécke' in Brazil, with
expected respective phonemic pronunciations /ˌlõɡ ˈnɛ.ki/ and /ˌlõ.ɡi ˈnɛ.ki/. Portugal styles will not be
checked.
* distinguir pt=distinguir:[diʃ.tĩˈɡiɾ] br=distingüir:gbr=[d͡ʒis.t͡ʃĩˈɡwi(h)];rio=[d͡ʒiʃ.t͡ʃĩˈɡwi(χ)];sp=[d͡ʒis.t͡ʃĩˈɡwi(ɾ)]
This means the word [[distinguir]] has respelling 'distinguir' in Portugal with expected phonetic pronunciation
[diʃ.tĩˈɡiɾ] in Portugal (both Lisbon and elsewhere in Central Portugal), but has the respelling 'distingüir'
in Brazil. The Brazilian respelling has different phonetic pronunciations [d͡ʒis.t͡ʃĩˈɡwi(h)] in "General" Brazilian,
[d͡ʒiʃ.t͡ʃĩˈɡwi(χ)] in Rio and [d͡ʒis.t͡ʃĩˈɡwi(ɾ)] in São Paulo.
]=]
local examples = [[
# a
as pessoas as pessôas:pt=[ɐʃ pɨˈso.ɐʃ]
às pessoas às pessôas:pt=[aʃ pɨˈso.ɐʃ]
saca :[ˈsa.kɐ]
acá :pt=[ɐˈka];br=[aˈka]
# b
baba :/ˈba.bɐ/
baba :br=[ˈba.bɐ];pt=[ˈba.βɐ]
a baba :br=[a ˈba.bɐ];pt=[ɐ ˈβa.βɐ]
braça :[ˈbɾa.sɐ]
abraça :br=[aˈbɾa.sɐ];pt=[ɐˈβɾa.sɐ]
a braça :br=[a ˈbɾa.sɐ];pt=[ɐ ˈβɾa.sɐ]
# cc
cóccix :br=/ˈkɔk.siks/
occitano br=occitano:/ok.siˈtɐ̃.nu/
# d
de :br=[d͡ʒi];pt=[dɨ]
praça de Londres :pt=[ˈpɾa.sɐ ðɨ ˈlõ.dɾɨʃ];rio=[ˈpɾa.sɐ d͡ʒi ˈlõ.dɾiʃ];gbr-sp=[ˈpɾa.sɐ d͡ʒi ˈlõ.dɾis]
Pedro Pêdro:br=[ˈpe.dɾu];pt=[ˈpe.ðɾu]
os dentes :rio=[uʒ ˈdẽ.t͡ʃiʃ];gbr-sp=[uz ˈdẽ.t͡ʃis];pt=[uʒ ˈðẽ.tɨʃ];pt=[uʒ ˈdẽ.tɨʃ]
adjetivo :br=/ad.ʒeˈt͡ʃi.vu/
Reguengos de Monsaraz :pt=[ʁɨˈɣẽ.ɡuʒ ðɨ mõ.sɐˈɾaʃ];pt=[ʁɨˈɣẽ.ɡuʒ dɨ mõ.sɐˈɾaʃ]
# e
elegante :pt=/i.lɨˈɡɐ̃.tɨ/
eterno etérno:pt=/iˈtɛɾ.nu/
pregar :pt=/pɾɨˈɡaɾ/;gbr-rio=/pɾeˈɡa(ʁ)/;sp=/pɾeˈɡa(ɾ)/
pregar pt=prègar:/ˌpɾɛˈɡaɾ/
eleito :br=/eˈlej.tu/
ema :br=/ˈẽ.mɐ/
exame ezame:pt=/iˈzɐ.mɨ/;br=/eˈzɐ̃.mi/
exames ezames:pt=/iˈzɐ.mɨʃ/;rio=/eˈzɐ̃.miʃ/;gbr-sp=/eˈzɐ̃.mis/
córtex :pt-sp=/ˈkɔɾ.tɛks/;rio-gbr=/ˈkɔʁ.tɛks/
desde pt=dêsde:/ˈdeʒ.dɨ/
mesmo pt=mêsmo:/ˈmeʒ.mu/
português :pt=/puɾ.tuˈɡeʃ/
vês :pt=/ˈveʃ/
líder :pt=/ˈli.dɛɾ/
cadáver :pt=/kɐˈda.vɛɾ/
repórter :pt=/ʁɨˈpɔɾ.tɛɾ/
# initial o
obra pt=óbra:/ˈɔ.bɾɐ/
obrar :pt=/ɔˈbɾaɾ/
olho pt=ôlho:/ˈo.ʎu/
olho pt=ólho:/ˈɔ.ʎu/
olhar :pt=/ɔˈʎaɾ/
odor pt=odôr:/ɔˈdoɾ/
obrigado :pt=/ɔ.bɾiˈɡa.du/
oculto :pt=/ɔˈkul.tu/
operar :pt=/ɔ.pɨˈɾaɾ/
oração :pt=/ɔ.ɾɐˈsɐ̃w̃/
Holanda :pt=/ɔˈlɐ̃.dɐ/
homófono :pt=/ɔˈmɔ.fu.nu/
hospital :pt=/ɔʃ.piˈtal/
O Porto O Pôrto:pt=[u ˈpoɾ.tu]
Os Lobos Os Lôbos:pt=[uʒ ˈlo.βuʃ]
# g
guerra guérra:/ˈɡɛ.ʁɐ/
a guerra a guérra:rio=[a ˈɡɛ.χɐ];sp-gbr=[a ˈɡɛ.hɐ];pt=[ɐ ˈɣɛ.ʁɐ]
guarda :rio-gbr=/ˈɡwaʁ.dɐ/;sp-pt=/ˈɡwaɾ.dɐ/
gelo gêlo:/ˈʒe.lu/
# l, lh
elo élo:/ˈɛ.lu/
velho vélho:/ˈvɛ.ʎu/
conselho consêlho:gpt=[kõˈsɐ(j).ʎu];br-cpt=/kõˈse.ʎu/
# Coda l
mal :pt=/ˈmal/;br=/ˈmaw/
mal :pt=[ˈmaɫ];br=[ˈmaʊ̯]
calca :pt=[ˈkaɫ.kɐ];br=[ˈkaʊ̯.kɐ]
alto :pt=[ˈaɫ.tu];br=[ˈaʊ̯.tu]
alpaca :pt=[aɫˈpa.kɐ];br=[aʊ̯ˈpa.kɐ]
anel anél:pt=/ɐˈnɛl/;br=/aˈnɛw/
anel anél:pt=[ɐˈnɛɫ];br=[aˈnɛʊ̯]
feltro féltro:pt=/ˈfɛl.tɾu/
helvético :pt=/ɛlˈvɛ.ti.ku/
relvado :pt=[ʁɛɫˈva.ðu];spt=[ʁɛɫˈva.du]
evitável :pt=/i.viˈta.vɛl/
Belmonte :pt=/bɛlˈmõ.tɨ/
palpável :br=[paʊ̯ˈpa.veʊ̯];pt=[paɫˈpa.vɛɫ]
bolsa bôlsa:br=/ˈbow.sɐ/;pt=/ˈbol.sɐ/
bolsa bôlsa:br=[ˈboʊ̯.sɐ];pt=[ˈboɫ.sɐ]
bolsada :pt=[boɫˈsa.ðɐ];spt=[boɫˈsa.dɐ]
polvilhar :pt=[poɫ.viˈʎaɾ]
molda mólda:pt=[ˈmɔɫ.dɐ]
moldar mọldar:pt=[mɔɫˈdaɾ]
soldado :pt=[soɫˈda.ðu];spt=[soɫˈda.du]
soldado sọldado:pt=[sɔɫˈda.ðu];spt=[sɔɫˈda.du]
álcool :pt=[aɫ.ˈkwɔɫ]
azul e branco :br=[aˈzuʊ̯ i ˈbɾɐ̃.ku];pt=[ɐˈzul i ˈβɾɐ̃.ku]
# Diphthong au
mau :/ˈmaw/
mau :pt=[ˈmaw];br=[ˈmaʊ̯]
auto :pt=/ˈaw.tu/
aumenta :pt=/awˈmẽ.tɐ/
# Diphthong ai
vai :/ˈvaj/
Cairo :/ˈkaj.ɾu/
airoso airôso:/ajˈɾo.zu/
bailado :pt=/bajˈla.du/
# m, n
ano :br=/ˈɐ̃.nu/;pt=/ˈɐ.nu/
cama :br=/ˈkɐ̃.mɐ/;pt=/ˈkɐ.mɐ/
entendo :br=/ĩˈtẽ.du/;pt=/ẽˈtẽ.du/
falámos :pt=/fɐˈla.muʃ/
andaime :pt=/ɐ̃ˈdaj.mɨ/;br=[ɐ̃ˈdɐ̃ɪ̯̃.mi]
andaime andáime:br=[ɐ̃ˈdaɪ̯.mi]
Jaime Jáime:br=/ˈʒaj.mi/
Jaime :br=/ˈʒɐ̃j̃.mi/;pt=/ˈʒaj.mɨ/
regímen :br=/ʁeˈʒĩ.mẽj̃/;pt=/ʁɨˈʒi.mɛn/
Renan :br=/ʁeˈnɐ̃/;pt=/ʁɨˈnan/
íon :br=/ˈi.õ/;pt=/ˈi.ɔn/
Bombaim :br=/bõ.baˈĩ/;pt=/bõ.bɐˈĩ/
Coimbra :br=/koˈĩ.bɾɐ/
saindo :br=/saˈĩ.du/;pt=/sɐˈĩ.du/
fauna :br=/ˈfɐ̃w̃.nɐ/
Iaundé :br=/ja.ũˈdɛ/;pt=/jɐ.ũˈdɛ/
# Final -em/-ens
bem :pt=/ˈbɐ̃j̃/
bens :pt=/ˈbɐ̃j̃ʃ/
também :br=/tɐ̃ˈbẽj̃/;pt=/tɐ̃ˈbɐ̃j̃/
ninguém :br=/nĩˈɡẽj̃/;pt=/nĩˈɡɐ̃j̃/
parabéns :gbr-sp=/pa.ɾaˈbẽj̃s/;rio=/pa.ɾaˈbẽj̃ʃ/;pt=/pɐ.ɾɐˈbɐ̃j̃ʃ/
# ng
abrangência abrangêncya:br=/a.bɾɐ̃ˈʒẽ.sjɐ/;pt=[ɐ.βɾɐ̃ˈʒẽ.sjɐ]
camping br=câmping:/ˈkɐ̃.pĩ/
doping br=dóping:/ˈdɔ.pĩ/
jogging br=djógguing:/ˈd͡ʒɔ.ɡĩ/
Beijing :br=/be(j)ˈʒĩ/
Wellington br=Wéllington:/ˈwɛ.lĩ.tõ/
Washington br=Wóshington:/ˈwɔ.ʃĩ.tõ/
distinguir pt=distinguir:[diʃ.tĩˈɡiɾ] br=distingüir:gbr=[d͡ʒis.t͡ʃĩˈɡwi(h)];rio=[d͡ʒiʃ.t͡ʃĩˈɡwi(χ)];sp=[d͡ʒis.t͡ʃĩˈɡwi(ɾ)]
Hungria :[ũˈɡɾi.ɐ]
interrobang pt=intẹrrobangue:/ĩ.tɛ.ʁuˈbɐ̃.ɡɨ/ br=interrobangue:/ĩ.te.ʁoˈbɐ̃.ɡi/
linguiça lingu.iça,lingüiça:/lĩ.ɡuˈi.sɐ/,/lĩˈɡwi.sɐ/
long neck br=lòngh nécke,lòngue nécke:/ˌlõɡ ˈnɛ.ki/,/ˌlõ.ɡi ˈnɛ.ki/
Los Angeles br=Lộs Ângeles:gbr-sp=/loz ˈɐ̃.ʒe.lis/;rio=/loz ˈɐ̃.ʒe.liʃ/
single br=síngol:/ˈsĩ.ɡow/
Stonehenge sp=Stòwnn.rrendj:[ˌstoʊ̯nˈhẽd͡ʒ]
viking br=víking,víkingue:/ˈvi.kĩ/,/ˈvi.kĩ.ɡi/
zângão :/ˈzɐ̃.ɡɐ̃w̃/
# nh
banho :br=/ˈbɐ̃.ɲu/;pt=/ˈbɐ.ɲu/
Congonhinhas sp=Còngonhinhas:/ˌkõ.ɡõˈɲĩ.ɲɐs/
Congonhinhas sp=Còngonhinhas:[ˌkõ.ɡõˈj̃ĩ.j̃ɐs]
nheengatu br=nhengatu:/ɲẽ.ɡaˈtu/
ainhum :br=/ɐ̃j̃ˈɲũ/
rainha :br=/ʁaˈĩ.ɲɐ/;pt=/ʁɐˈi.ɲɐ/
moinho br=moinho,muinho:/moˈĩ.ɲu/,/muˈĩ.ɲu/ pt=muinho,mu.inho:/ˈmwi.ɲu/,/muˈi.ɲu/
# q
ablaquear ablaquyar:pt=/ɐ.blɐˈkjaɾ/
acqua alta :pt=/ˈa.kwɐ ˈal.tɐ/
freqüentemente :br=/fɾeˌkwẽ.t͡ʃiˈmẽ.t͡ʃi/
obséquio obzéquyo:br=/obˈzɛ.kju/
quando :/ˈkwɐ̃.du/
que :br=/ki/;pt=/kɨ/
quebra-nozes québra-nózes:pt=[ˈkɛ.βɾɐ ˈnɔ.zɨʃ]
qüiproquó br=qüìproquó:/ˌkwi.pɾoˈkwɔ/
# r
bilro :rio=[ˈbiʊ̯.χu];pt=[ˈbiɫ.ʁu];gbr-sp=[ˈbiʊ̯.hu]
carro :rio=[ˈka.χu];pt=[ˈka.ʁu];gbr-sp=[ˈka.hu]
genro :rio=[ˈʒẽ.χu];pt=[ˈʒẽ.ʁu];gbr-sp=[ˈʒẽ.hu]
Israel Israél:pt=[iʒ.ʁɐˈɛɫ];gbr-sp=[iz.haˈɛʊ̯];rio=[iʒ.χaˈɛʊ̯]
redor redór:gbr=[heˈdɔh];rio=[χeˈdɔχ];sp=[heˈdɔɾ]
parte :rio=[ˈpaχ.t͡ʃi];gbr=[ˈpah.t͡ʃi];sp=[ˈpaɾ.t͡ʃi];pt=[ˈpaɾ.tɨ]
pardo :gbr=[ˈpaɦ.du];rio=[ˈpaʁ.du];sp=[ˈpaɾ.du];pt=[ˈpaɾ.ðu];spt=[ˈpaɾ.du]
workstation wồrkstêishon:gbr=/ˌwoʁksˈtej.ʃõ/;rio=/ˌwoʁkʃˈtej.ʃõ/
fazer fazêr:gbr-rio=/faˈze(ʁ)/;sp=/faˈze(ɾ)/;pt=/fɐˈzeɾ/
fazer a coisa fazêr a coisa:br=/faˈze(ɾ) a ˈkoj.zɐ/;pt=/fɐˈzeɾ ɐ ˈkoj.zɐ/
pôr :gbr-rio=/ˈpoʁ/;sp-pt=/ˈpoɾ/
mar :pt=/ˈmaɾ/ br=marh:gbr-rio=/ˈmaʁ/;sp=/ˈmaɾ/
# s
cansar :sp=/kɐ̃ˈsa(ɾ)/;gbr-rio=/kɐ̃ˈsa(ʁ)/
intransigente :br=/ĩ.tɾɐ̃.ziˈʒẽ.t͡ʃi/;pt=/ĩ.tɾɐ̃.ziˈʒẽ.tɨ/
passo :/ˈpa.su/
caso :/ˈka.zu/
mesmo mêsmo:rio-pt=/ˈmeʒ.mu/;gbr-sp=/ˈmez.mu/
lesma lêsma:pt=/ˈleʒ.mɐ/
está :rio=/iʃˈta/;pt=/(i)ʃˈta/;sp=/isˈta/
esbarra :rio=/iʒˈba.ʁɐ/;pt=/(i)ʒˈba.ʁɐ/;sp=/izˈba.ʁɐ/
os árvores :rio=/uz ˈaʁ.vo.ɾiʃ/;gbr=/uz ˈaʁ.vo.ɾis/;sp=/uz ˈaɾ.vo.ɾis/;pt=/uz ˈaɾ.vu.ɾɨʃ/
os habitantes :rio=/uz a.biˈtɐ̃.t͡ʃiʃ/;gbr-sp=/uz a.biˈtɐ̃.t͡ʃis/;pt=/uz ɐ.biˈtɐ̃.tɨʃ/
as gentes :rio=/a ˈʒẽ.t͡ʃiʃ/;gbr-sp=/az ˈʒẽ.t͡ʃis/;pt=/ɐ ˈʒẽ.tɨʃ/
os sucos :rio=/u ˈsu.kuʃ/;gbr-sp=/u ˈsu.kus/;pt=/uʃ ˈsu.kuʃ/
os pés os péss:rio-pt=/uʃ ˈpɛʃ/;gbr-sp=/us ˈpɛs/
nós :rio=/ˈnɔ(j)ʃ/;gbr-sp=/ˈnɔ(j)s/;pt=/ˈnɔʃ/
nós nóss:rio-pt=/ˈnɔʃ/;gbr-sp=/ˈnɔs/
nós nóhs:rio-pt=/ˈnɔʃ/;gbr-sp=/ˈnɔs/
excelente escelente:br=/e.seˈlẽ.t͡ʃi/
nascimento :br=/na.siˈmẽ.tu/;pt=/nɐʃ.siˈmẽ.tu/
sueca suéca:pt=/ˈswɛ.kɐ/
suave :pt=/ˈswa.vɨ/
stresse sstrésse:pt=/ˈstɾɛ.sɨ/
stress sstréç:pt=/ˈstɾɛs/
Donbass Donbáç:pt=/dõˈbas/
# x
xérox :br=/ˈʃɛ.ɾɔks/
baixo :br=/ˈba(j).ʃu/;pt=/ˈbaj.ʃu/
peixe :br=/ˈpe(j).ʃi/;cpt=/ˈpej.ʃɨ/;gpt=/ˈpɐj.ʃɨ/;spt=/ˈpe.ʃɨ/
peixe :br=[ˈpe(ɪ̯).ʃi];cpt=[ˈpej.ʃɨ];gpt=[ˈpɐj.ʃɨ];spt=[ˈpe.ʃɨ]
frouxo :gpt=/ˈfɾo(w).ʃu/
axé ashé:br=/aˈʃɛ/;pt=/ɐˈʃɛ/
# y
Itamaraty :br=/i.ta.ma.ɾaˈt͡ʃi/
Sydney Sýdjney:br=/ˈsid͡ʒ.nej/
# z
prazo :/ˈpɾa.zu/
dez déz:rio=/ˈdɛ(j)ʃ/;gbr-sp=/ˈdɛ(j)s/;pt=/ˈdɛʃ/
faz :rio=/ˈfa(j)ʃ/;gbr-sp=/ˈfa(j)s/;pt=/ˈfaʃ/
dez árvores déz árvores:rio=/ˈdɛ(j)z ˈaʁ.vo.ɾiʃ/;gbr=/ˈdɛ(j)z ˈaʁ.vo.ɾis/;sp=/ˈdɛ(j)z ˈaɾ.vo.ɾis/;pt=/ˈdɛz ˈaɾ.vu.ɾɨʃ/
dez habitantes déz habitantes:rio=/ˈdɛ(j)z a.biˈtɐ̃.t͡ʃiʃ/;gbr-sp=/ˈdɛ(j)z a.biˈtɐ̃.t͡ʃis/;pt=/ˈdɛz ɐ.biˈtɐ̃.tɨʃ/
dez gentes déz gentes:rio=/ˈdɛ(j) ˈʒẽ.t͡ʃiʃ/;gbr-sp=/ˈdɛ(j)z ˈʒẽ.t͡ʃis/;pt=/ˈdɛ ˈʒẽ.tɨʃ/
dez sucos déz sucos:rio=/ˈdɛ(j) ˈsu.kuʃ/;gbr-sp=/ˈdɛ(j) ˈsu.kus/;pt=/ˈdɛʃ ˈsu.kuʃ/
dez pés déz péss:rio=/ˈdɛ(j)ʃ ˈpɛʃ/;gbr-sp=/ˈdɛ(j)s ˈpɛs/;pt=/ˈdɛʃ ˈpɛʃ/
topázio :pt=/tuˈpa.zju/
disprósio :pt=/diʃˈpɾɔ.zju/
césio :pt=/ˈsɛ.zju/
Ásia Ásya:pt-rio=/ˈa.zjɐ/
casual :pt=/kɐˈzwal/
casuar :pt=/kɐˈzwaɾ/
zoar zuar:pt=/ˈzwaɾ/
# -mente
afortunadamente :gbr-rio=/a.foʁ.tuˌna.daˈmẽ.t͡ʃi/;sp=/a.foɾ.tuˌna.daˈmẽ.t͡ʃi/;pt=[ɐ.fuɾ.tuˌna.ðɐˈmẽ.tɨ];spt=[ɐ.fuɾ.tuˌna.dɐˈmẽ.tɨ]
alertamente alértamente:gbr-rio=/aˌlɛʁ.taˈmẽ.t͡ʃi/;sp=/aˌlɛɾ.taˈmẽ.t͡ʃi/;pt=/ɐˌlɛɾ.tɐˈmẽ.tɨ/
anticristãmente :rio=/ɐ̃.t͡ʃi.kɾiʃˌtɐ̃ˈmẽ.t͡ʃi/;gbr-sp=/ɐ̃.t͡ʃi.kɾisˌtɐ̃ˈmẽ.t͡ʃi/;pt=/ɐ̃.ti.kɾiʃˌtɐ̃ˈmẽ.tɨ/
comummente comum.mente:pt=/kuˌmũˈmẽ.tɨ/
dormente dormênte:br=/doʁˈmẽ.t͡ʃi/;sp=/doɾˈmẽ.t͡ʃi/;pt=/duɾˈmẽ.tɨ/
posteriormente posteriôrmente:gbr=/pos.te.ɾiˌoʁˈmẽ.t͡ʃi/;rio=/poʃ.te.ɾiˌoʁˈmẽ.t͡ʃi/;sp=/pos.te.ɾiˌoɾˈmẽ.t͡ʃi/;pt=/puʃ.tɨˌɾjoɾˈmẽ.tɨ/
somente sómente:br=/ˌsɔˈmẽ.t͡ʃi/;pt=/ˌsɔˈmẽ.tɨ/
simplesmente :pt=/ˌsĩ.plɨʒˈmẽ.tɨ/;rio=/ˌsĩ.pliʒˈmẽ.t͡ʃi/;gbr-sp=/ˌsĩ.plizˈmẽ.t͡ʃi/
# -zinho
balãozinho :br=/baˌlɐ̃w̃ˈzĩ.ɲu/
bauzinho baúzinho:br=/baˌuˈzĩ.ɲu/
coraçãozinho cồraçãozinho:br=/ˌko.ɾaˌsɐ̃w̃ˈzĩ.ɲu/
finalzinho :br=/fiˌnawˈzĩ.ɲu/;pt=/fiˌnalˈzi.ɲu/
homenzinho :br=/ˌõ.mẽj̃ˈzĩ.ɲu/
nenenzinho nenénzinho:br=/neˌnẽj̃ˈzĩ.ɲu/
pobrezinho póbrezinho:br=[ˌpɔ.bɾiˈzĩ.j̃u];pt=[ˌpɔ.βɾɨˈzi.ɲu]
sozinho sózinho:br=/ˌsɔˈzĩ.ɲu/;pt=/ˌsɔˈzi.ɲu/
vizinho br=vizínho:/viˈzĩ.ɲu/ pt=vizínho,vezínho:/viˈzi.ɲu/,/vɨˈzi.ɲu/ -- (This should be a different rule, see https://www.flip.pt/Duvidas-Linguisticas/Duvida-Linguistica/DID/1303)
# double letters
Accra :/ˈa.kɾɐ/
Aleppo Aléppo:br=/aˈlɛ.pu/
buffer bâfferh:gbr-rio=/ˈbɐ.feʁ/;sp=/ˈbɐ.feɾ/
cheddar chéddarh:gbr-rio=/ˈʃɛ.daʁ/;sp=/ˈʃɛ.daɾ/
Hanna br=Ranna:/ˈʁɐ̃.nɐ/
jazz djézz:gbr-sp=/ˈd͡ʒɛ(j)s/;rio=/ˈd͡ʒɛ(j)ʃ/
Minnesota Mìnnessôta:br=/ˌmi.neˈso.tɐ/
nutella nutélla:/nuˈtɛ.lɐ/
shopping br=shópping,shóppem:/ˈʃɔ.pĩ/,/ˈʃɔ.pẽj̃/
Yunnan :br=/juˈnɐ̃/;pt=/juˈnan/
# multiword expressions
água mole em pedra dura tanto bate até que fura água móle em pédra dura tanto bate até que fura:br=/ˈa.ɡwɐ ˈmɔ.li ẽj̃ ˈpɛ.dɾɐ ˈdu.ɾɐ ˈtɐ̃.tu ˈba.t͡ʃi aˈtɛ ki ˈfu.ɾɐ/;gpt=[ˈa.ɣwɐ ˈmɔ.l(ɨ) ɐ̃j̃ ˈpɛ.ðɾɐ ˈðu.ɾɐ ˈtɐ̃.tu ˈβa.t(ɨ) ɐ.ˈtɛ kɨ ˈfu.ɾɐ]
era só o que me faltava éra só o que me faltava:br=/ˈɛ.ɾɐ ˈsɔ u ki mi fawˈta.vɐ/
# nasalized vowels
mãe :/ˈmɐ̃j̃/
mãe :pt=[ˈmɐ̃j̃];br=[ˈmɐ̃ɪ̯̃]
põem põeem:br-cpt=/ˈpõj̃.ẽj̃/;gpt=/ˈpõj̃.ɐ̃j̃/
pãozão :/pɐ̃w̃ˈzɐ̃w̃/
Itapoã :br=/i.ta.poˈɐ̃/
# oral diphthongs
saiba :/ˈsaj.bɐ/
saiba :br=[ˈsaɪ̯.bɐ];pt=[ˈsaj.βɐ]
papeis :gpt=/pɐˈpɐjʃ/;cpt=/pɐˈpejʃ/
papéis :gpt=/pɐˈpɐjʃ/;cpt=/pɐˈpɛjʃ/
anéis :gpt=/ɐˈnɐjʃ/
noite :br=/ˈnoj.t͡ʃi/;pt=/ˈnoj.tɨ/
noite :br=[ˈnoɪ̯.t͡ʃi]
Paulo :/ˈpaw.lu/
Paulo :pt=[ˈpaw.lu];br=[ˈpaʊ̯.lu]
deusa :/ˈdew.zɐ/
deusa :br=[ˈdeʊ̯.zɐ]
ouro :br-pt=/ˈo(w).ɾu/;spt=/ˈo.ɾu/
ouro :br=[ˈo(ʊ̯).ɾu]
sair :gbr-rio=/saˈi(ʁ)/;sp=/saˈi(ɾ)/;pt=/sɐˈiɾ/
Raul :br=/ʁaˈuw/;pt=/ʁɐˈul/
queimar :gbr-rio=/kejˈma(ʁ)/;sp=/kejˈma(ɾ)/;cpt=/kejˈmaɾ/;gpt=/kɐjˈmaɾ/
baile :br=/ˈbaj.li/;pt=/ˈbaj.lɨ/
beira :br=/ˈbe(j).ɾɐ/;gpt=/ˈbɐj.ɾɐ/;cpt=/ˈbej.ɾɐ/
saia :/ˈsaj.ɐ/
saia :br=[ˈsaɪ̯.ɐ]
saiu :br=/saˈiw/;pt=/sɐˈiw/
saiu :br=[saˈiʊ̯]
saído :br=/saˈi.du/;pt=/sɐˈi.du/
sinusoidal sinusòidal:pt=/si.nuˌzɔjˈdal/
asteroidal asteròidal:pt=/ɐʃ.tɨˌɾɔjˈdal/
# hiatus
vieira :br=/viˈe(j).ɾɐ/;gpt=/ˈvjɐj.ɾɐ/;cpt=/ˈvjej.ɾɐ/
ia :/ˈi.ɐ/
iogurte br=i.ogurte,iogurte:gbr-rio=/i.oˈɡuʁ.t͡ʃi/,/joˈɡuʁ.t͡ʃi/;sp=/i.oˈɡuɾ.t͡ʃi/,/joˈɡuɾ.t͡ʃi/ pt=iọgurte:/jɔˈɡuɾ.tɨ/
]]
function tests:check_ipa(spelling, expected, comment)
local inputs = {}
for style, expected_obj in pairs(expected) do
inputs[style] = expected_obj.respellings
end
local expressed_styles = m_pt_pronunc.express_styles(inputs)
for _, style_group in ipairs(expressed_styles) do
for _, style_obj in ipairs(style_group.styles) do
options.comment = style_obj.tag and style_obj.tag .. (comment and "; " .. comment or "") or comment or ""
local function get_actual_ipas(ipa_type)
local actual_ipas = {}
for _, phonemic_phonetic in ipairs(style_obj.phonemic_phonetic) do
local ipa = phonemic_phonetic[ipa_type]
if ipa_type == "phonemic" then
ipa = "/" .. ipa .. "/"
else
ipa = "[" .. ipa .. "]"
end
table.insert(actual_ipas, ipa)
end
return table.concat(actual_ipas, ",")
end
-- Check if all the styles represented by this particular actual IPA have the same expected IPA.
-- If so, we can display a single test line (whether or not the actual and expected match).
-- Otherwise, display each style individually.
local matches = true
local matching_respellings = nil
local matching_expected_ipas = nil
local matching_ipa_type = nil
for _, represented_style in ipairs(style_obj.represented_styles) do
if not expected[represented_style] then
error("Internal error: Didn't generate IPA for style '" .. represented_style .. "'")
end
local this_respellings = table.concat(expected[represented_style].respellings, ",")
local this_expected_ipas = table.concat(expected[represented_style].ipas, ",")
local this_ipa_type = expected[represented_style].type
if not matching_expected_ipas then
matching_respellings = this_respellings
matching_expected_ipas = this_expected_ipas
matching_ipa_type = this_ipa_type
elseif matching_respellings ~= this_respellings or matching_expected_ipas ~= this_expected_ipas or
matching_ipa_type ~= this_ipa_type then
matches = false
break
end
end
if matches then
self:equals(
link(spelling) .. (matching_respellings == spelling and "" or ", respelled " .. matching_respellings),
get_actual_ipas(matching_ipa_type),
matching_expected_ipas,
options
)
else
for _, represented_style in ipairs(style_obj.represented_styles) do
if not expected[represented_style] then
error("Internal error: Didn't generate IPA for style '" .. represented_style .. "'")
end
local this_respellings = table.concat(expected[represented_style].respellings, ",")
local this_expected_ipas = table.concat(expected[represented_style].ipas, ",")
local this_ipa_type = expected[represented_style].type
options.comment = m_pt_pronunc.all_style_descs[represented_style] .. (comment and "; " .. comment or "")
self:equals(
link(spelling) .. (this_respellings == spelling and "" or ", respelled " .. this_respellings),
get_actual_ipas(this_ipa_type),
this_expected_ipas,
options
)
end
end
end
end
end
local function parse(examples)
-- The following is a list of parsed examples where each element is a three-element list of
-- {SPELLING, EXPECTED, COMMENT}. SPELLING is the actual spelling of the term; EXPECTED is a table giving
-- the respellings and associated expected IPA, and COMMENT is an optional comment (if given starting with a
-- # sign after a given line) or nil. EXPECTED is a table whose keys are basic styles, e.g. "rio", "lisbon",
-- and values are a table with keys 'respellings' (one or more respellings), 'ipas' (corresponding IPA values)
-- and 'type' ("phonemic" or "phonetic").
local parsed_examples = {}
-- Throw away comments starting with -- and snarf each line.
for line in examples:gsub("%s*%-%-[^\n]*", ""):gmatch "[^\n]+" do
-- Trim whitespace at beginning and end.
line = line:gsub("^%s*(.-)%s*$", "%1")
if line ~= "" then -- skip blank lines
local function err(msg)
error(msg .. ": " .. line)
end
local function rsplit2(term, regex)
local splitvals = rsplit(term, regex)
if #splitvals ~= 2 then
err("Expected two parts in '" .. term .. "' when split by '" .. regex .. "'")
end
return splitvals
end
if line:find("^#") then
-- Line beginning with # is a section header.
line = line:gsub("^#%s*", "")
table.insert(parsed_examples, line)
else
local function expand_styles(styles)
local expansion = {}
for _, style in ipairs(rsplit(styles, "%-")) do
if all_style_set[style] then
table.insert(expansion, style)
elseif m_pt_pronunc.all_style_groups[style] then
for _, basic in ipairs(m_pt_pronunc.all_style_groups[style]) do
table.insert(expansion, basic)
end
else
err("Unrecognized style '" .. style .. "'")
end
end
return expansion
end
local parts = rsplit(line, "\t")
local spelling = parts[1]
local expected = {}
local comment
for i=2,#parts do
local part = parts[i]
if part:find("^#") then
if i ~= #parts then
err("Comment .. " .. part .. " should be last element on the line")
end
comment = part
break
end
local respelling, styled_ipas = unpack(rsplit2(part, ":"))
local styles
if respelling:find("=") then
styles, respelling = unpack(rsplit2(respelling, "="))
else
styles = "all"
end
if respelling == "" then
respelling = spelling
end
respelling = rsplit(respelling, ",")
styles = expand_styles(styles)
local style_set = m_table.listToSet(styles)
for _, styled_ipa in ipairs(rsplit(styled_ipas, ";")) do
local ipa_styles, ipas
if styled_ipa:find("=") then
ipa_styles, ipas = unpack(rsplit2(styled_ipa, "="))
ipa_styles = expand_styles(ipa_styles)
else
ipa_styles = styles
ipas = styled_ipa
end
ipas = rsplit(ipas, ",")
local ipa_type
for _, ipa in ipairs(ipas) do
local this_ipa_type
if ipa:find("^/.*/$") then
this_ipa_type = "phonemic"
elseif ipa:find("^%[.*%]$") then
this_ipa_type = "phonetic"
else
err("IPA " .. ipa .. " should be surrounded with /.../ or [...]")
end
if not ipa_type then
ipa_type = this_ipa_type
elseif ipa_type ~= this_ipa_type then
err("All IPA values " .. table.concat(ipa, ",") ..
" specified for this style should agree in being phonemic or phonetic")
end
end
for _, ipa_style in ipairs(ipa_styles) do
if not style_set[ipa_style] then
err("Style '" .. ipa_style .. "' not listed among respelling styles " ..
table.concat(styles, ","))
end
expected[ipa_style] = {respellings = respelling, ipas = ipas, type = ipa_type}
end
end
end
if not next(expected) then
err("No expected pronunciations given")
end
table.insert(parsed_examples, {spelling, expected, comment})
end
end
end
return parsed_examples
end
function tests:test()
self:iterate(parse(examples), "check_ipa")
end
return tests