Module:cy-IPA/sandbox
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of
--[=====[
Currently missing:
* Dialects: should be include North Wales, South Wales and standard vs. colloquial variants of each. These parameters are optional when no difference
* ch, dd, ff, ng, ll, ph, th should be treated as single letters, all other
consonant combinations should not
* Function for de-aspiration of b, d, g in /sC/ clusters and word finally, but
remaining distinct from aspirated p, t, k
* Function to treat all voicing assimilation as becomong voiceless rather than
progressive or regressive assimilation
* Function for pre-consonantal obstruent devoicing of d, g, b, s
* y should be treated as /ə/, unless in a final syllable when it is /ɨ/ or /ɨː/
depending on vowel length. This ensures it stays separate from u /ɨ/ or /ɨː/
* y (in final syllables) and u merge with i in South Wales, including in diphthongs
* ae should be treated as /ɑːɨ/ in final syllables and /eːɨ̯/ elsewhere
* ng is usually ŋ (marked for alphabetisation as g~ already) but may be ŋɡ (not
considered one letter) especially in compound words
* Function to reduce double letters, after appropriate consideration of effects on vowel length
* Function to convert ⟨ai, au⟩ in final unstressed syllables to /ɛ/ in colloquial Welsh
* Function to convert ⟨ai, au, e⟩ in final unstressed syllables to /a/ in colloquial Northern Welsh
* Many other dipthongs (including stressed) are smoothed in South Welsh - need to research
* Rule to determine stress - always penultimate syllable, unless there is a stressed
suffix such as -(h)áu or the word is a recent loanword
* Rules to determine when to make vowels short vs. long. The best way to do this
is by taking South Welsh as normative wrt. vowel length and North Welsh as
normative wrt vowel quality (and length in diphthongs). Whichever of the two
has a long vowel before a cluster should be normative in this respect.
* An input whether the word is a recent loan from Englsh might make a lot of exceptions
predictable/automatable, e.g. words with atypical short and long vowels or stress
* Rules to determine when to make vowels short vs. long. There will need to be
ways to override this, e.g. by adding a circumflex to long vowels and a grave to short vowels.
Some defaults:
- vowels should be short if unstressed or /ə/
- vowels should be long in a stressed open syllable (unless non-final in North Welsh)
- vowels should be long in a stressed final syllable before /b, ch, d, dd, g, f, ff, g, h, l, n, r, ph, s, th/
- note that exceptions to the above are common for /l, n, r/
- vowels should also be long in stressed open syllables before /b, ch, d, dd, g, f, ff, g, h, l, n, r, ph, th/
but NOT /s/ (except in North Wales, where all non-final vowels are short)
- all other vowels should be short, especially when an aspirated stop and some
liquid consonants follow /c, m, ll, ng, nn, p, rr, t/
- vowels should generally be short before clusters, with well-defined exceptions
- Vowels in North Welsh are long in stressed final syllables before /sC, ɬC/ clusters - should form part of the norm with South Welsh automatically derived from it
- Diphthongs with long vowels in North Welsh (only in final syllables)
include ae /ɑːɨ̯, eːɨ̯/, aw /ɑːu̯/, ew /eːu̯/, ey /e.ɨ̯/, oe /ɔːɨ̯/, ou /ɔːɨ̯/- should form part of the norm with South Welsh automatically derived from it
- syllables with secondary stress should be treated as if stressed
--]=====]
local export = {}
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
local function ine(x)
if x == "" then return nil else return x end
end
local AC = u(0x0301)
local GR = u(0x0300)
local BREVE = u(0x0306)
local stress_accent = AC .. GR
local stress_accent_c = "[" .. stress_accent .. "]"
local accent = stress_accent .. BREVE
local accents_r = "[" .. accent .. "]*"
local DIA = u(0x0308)
local vowel = "àáâäèéêëìíîïòóôöùúûẁẃŵẅüỳýŷÿ" .. accent
local vowel_c = "[" .. vowel .. "]"
local cons_c = "[^" .. vowel .. ".⁀ %-()]"
local front_vowel = "eiyæœ" -- Artefact from Module:de-IPA, Welsh has i-umlaut so may be useful?
local front_vowel_c = "[" .. front_vowel .. "]"
local sequences = {
["a"] = {
["a" ] = "a";
["à" ] = "a";
["á" ] = "a";
["â" ] = "a";
["ä" ] = "a";
["ae" ] = "ɑːɨ̯";
["ai" ] = "ai̯";
["au" ] = "aɨ̯";
["aw" ] = "ɑːu̯";
};
["b"] = {
["b" ] = "b";
};
["c"] = {
["c" ] = "k";
["ch" ] = "ç";
};
["d"] = {
["d" ] = "d";
["dd" ] = "d";
};
["e"] = {
["e" ] = "ɛ";
["è" ] = "ɛ";
["é" ] = "eː";
["ê" ] = "eː";
["ë" ] = "e";
["ei" ] = "ɛi̯";
["eu" ] = "əɨ̯";
["ew" ] = "eːu̯";
["ey" ] = "aɨ̯";
};
["f"] = {
["f" ] = "v";
["ff" ] = "f";
}; -- Here, Arafsymudwr stopped editing and what follows is from Module:de-IPA
["f"] = "f";
["g"] = "ɡ";
["h"] = "h";
["i"] = {
["i" ] = "ɪ";
["ie" ] = "iː";
};
["j"] = "j";
["k"] = {
["k" ] = "k";
["kk" ] = "k";
["ck" ] = "k";
};
["l"] = "l";
["m"] = "m";
["n"] = {
["n" ] = "n";
["ng" ] = "ŋ";
["nn" ] = "n";
};
["o"] = {
["oo" ] = "oː";
["os" ] = { "ɔ", "s" };
["o" ] = "ɔ";
};
["ö"] = {
-- XXX: manchmal /øː/
["ö" ] = "œ";
["ös" ] = { "œ", "s" };
};
["p"] = {
["ph" ] = "f";
["pp" ] = "p";
["p" ] = "p";
};
["q"] = {
["qu" ] = { "k", "f" };
["q" ] = "k"; -- XXX
};
["r"] = {
-- XXX: /ʀ/? /r/?; manchmal /ɐ/ ("Uhr"); auch /ər/ ("oder")
["r" ] = "r";
["rr" ] = "r";
};
["s"] = {
["s" ] = "s";
["sch" ] = "ʃ";
["sp" ] = { "ʃ", "p" };
["ss" ] = "s";
["st" ] = { "ʃ", "t" };
};
["t"] = {
["t" ] = "t";
["tsch"] = "t͡ʃ";
["tt" ] = "t";
["tion"] = { "t͡s", "i̯", "o", "n" };
};
["u"] = {
["u" ] = "ʊ";
["uch" ] = { "ʊ", "x" };
};
["ü"] = {
["ü" ] = "yː";
["üh" ] = "yː";
};
["v"] = "f";
["w"] = "ʋ";
["x"] = { "k", "s" }; -- XXX
["y"] = "i";
["z"] = "z"; -- already converted from s
["ß"] = "s";
["́"] = "ˈ"; -- FIXME
["-"] = {};
}
function export.IPA(text, orig, pos)
if type(text) == 'table' then
text, orig, pos = ine(text.args[1]), ine(text.args.orig), ine(text.args.pos)
end
text = text or mw.title.getCurrentTitle().text
text = ulower(text)
-- decompose, then recompose umlauted vowels, and convert ae oe ue to
-- umlauted vowels
text = mw.ustring.toNFD(text)
-- while we're doing this, don't get confused by wrongly-ordered umlauts/e's
-- and other accents
text = rsub(text, "(" .. accents_r .. ")([e" .. DIA .. "])", "%2%1")
text = rsub(text, "([aou])[e" .. DIA .. "]", {a="ä", o="ö", u="ü"})
-- put breves before acute/grave accents
text = rsub(text, "(" .. stress_accent_c .. ")" .. BREVE, BREVE .. "%1")
-- To simplify checking for word boundaries and liaison markers, we
-- add ⁀ at the beginning and end of all words, and remove it at the end.
-- Note that the liaison marker is ‿.
text = rsub(text, "%s*,%s*", '⁀⁀ | ⁀⁀')
text = rsub(text, "%s+", '⁀ ⁀')
text = rsub(text, "%-+", '⁀-⁀')
text = '⁀⁀' .. text .. '⁀⁀'
text = rsub(text, "([aou]" .. accents_r .. ")" .. "ch", "%1χ")
text = rsub(text, "sch", "ʃ")
text = rsub(text, "ch", "ç")
text = rsub(text, "ck", "kk")
text = rsub(text, "z", "c")
text = rsub(text, "s(" .. vowel_c .. ")", "z%1")
text = rsub(text, "([bdgr])(" .. cons_or_boundary_c .. ")",
function(c1, c2)
return devoiced_cons[c1] .. c2
end)
-- Buchstaben in Foneme konvertieren
local phones, i, n = {}, 1, ulen(text)
while i <= n do
local bid = ulower(usub(text, i, i))
local value = sequences[bid]
if (type(value) == 'table') and not value[1] then
local bidl = ulen(bid)
for seq in pairs(value) do
local seql = ulen(seq)
if seql > bidl then
if (ulower(usub(text, i, i + seql - 1)) == seq) then
bid = seq
bidl = ulen(bid)
end
end
end
value = value[bid]
end
if type(value) == 'string' then
table.insert(phones, value)
elseif not value then
table.insert(phones, bid)
else
for _, phone in ipairs(value) do
table.insert(phones, phone)
end
end
i = i + ulen(bid)
end
text = table.concat(phones)
--remove hyphens and word-boundary markers
text = rsub(text, '[⁀%-]', '')
return text
end
return export