Module:grc-pronunciation/sandbox
- The following documentation is located at Module:grc-pronunciation/sandbox/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
Data for the sandbox module: Module:grc-pronunciation/sandbox/data.
Testcases
[edit]ἄγριος (ágrios) | Mark the vowel length of the ambiguous vowels ἄ and ι by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ἄναρθρος (ánarthros) | Mark the vowel length of the ambiguous vowels ἄ and α by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ἄνθρωπος (ánthrōpos) | Mark the vowel length of the ambiguous vowel ἄ by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ᾰ̓́νθρωπος (ánthrōpos) | |
ἀρχιμανδρῑ́της (arkhimandrī́tēs) | |
Αὖλος (Aûlos) | |
Γᾱ́δ (Gā́d) | |
γαῖᾰ (gaîa) | |
γένος (génos) | |
Δῐονῡ́σῐᾰ (Dionū́sia) | |
ἐγγενής (engenḗs) | |
ἔγγονος (éngonos) | |
ἔγκειμαι (énkeimai) | |
ἔκγονος (ékgonos) | |
ἔκδικος (ékdikos) | Mark the vowel length of the ambiguous vowel ι by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ἐκφῠ́ω (ekphúō) | |
ἔμβρυον (émbruon) | Mark the vowel length of the ambiguous vowel υ by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ἐρετμόν (eretmón) | |
ἐρρήθη (errhḗthē) | |
Ζεύς (Zeús) | |
Ἡρᾰκλέης (Hērakléēs) | |
Θρᾷξ (Thrâix) | |
Κιλικίᾱ (Kilikíā) | Mark the vowel length of the ambiguous vowels ι, ι and ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
μάχη (mákhē) | Mark the vowel length of the ambiguous vowel ά by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
ναῦς (naûs) | |
οἷαι (hoîai) | |
πᾶς (pâs) | |
πατρίς (patrís) | Mark the vowel length of the ambiguous vowels α and ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
Πηληϊάδης (Pēlēïádēs) | Mark the vowel length of the ambiguous vowels ϊ and ά by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
πρᾶγμα (prâgma) | Mark the vowel length of the ambiguous vowel α by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
σβέννῡμῐ (sbénnūmi) | |
σημεῖον (sēmeîon) | |
σμῑκρός (smīkrós) | |
τάττω (táttō) | Mark the vowel length of the ambiguous vowel ά by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
τὴν ᾰ̓οιδήν (tḕn aoidḗn) | |
τμῆμα (tmêma) | Mark the vowel length of the ambiguous vowel α by adding a macron after it if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
φιλίᾳ (philíāi) | Mark the vowel length of the ambiguous vowels ι and ί by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
χάσμα (khásma) | Mark the vowel length of the ambiguous vowels ά and α by adding a macron after each one if it is long, or a breve if it is short. By default, Module:grc-pronunciation assumes it is short if unmarked. |
χέω (khéō) | |
ᾠδῇ (ōidêi) |
local export = {}
local strip_accent = require('Module:grc-accent').strip_accent
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local m_utils = require("Module:grc-utilities")
local rearrangeDiacritics = m_utils.pronunciationOrder
local m_utils_data = require("Module:grc-utilities/data")
local chars = m_utils_data.named
local m_data = mw.loadData("Module:grc-pronunciation/sandbox/data")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")
local lang = require("Module:languages").getByCode("grc")
local sc = require("Module:scripts").getByCode("polytonic")
local full_link = m_utils.link
local tag_text = m_utils.tag
local periods = {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'cla', 'koi2', 'byz2'}
local rsplit = mw.text.split
local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower
local U = mw.ustring.char
local function fetch(s, i)
--[==[
because we fetch a single character at a time so often
out of bounds fetch gives ''
]==]
return usub(s, i, i)
end
--Combining diacritics are tricky.
local tie = U(0x35C) -- tie bar
local nonsyllabic = U(0x32F) -- combining inverted breve below
local high = U(0x341) -- combining acute tone mark
local low = U(0x340) -- combining grave tone mark
local rising = U(0x30C) -- combining caron
local falling = chars.Latin_circum -- combining circumflex
local midHigh = U(0x1DC4) -- mid–high pitch
local midLow = U(0x1DC6) -- mid–low pitch
local highMid = U(0x1DC7) -- high–mid pitch
local voiceless = U(0x325) -- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'
local function is(text, X)
if not text or not X then
return false
end
pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2)
if X == "frontDiphth" or X == "Greekdiacritic" then
pattern = "^" .. pattern .. "$"
else
pattern = "^[" .. pattern .. "]$"
end
return rfind(text, pattern)
end
local env_functions = {
preFront = function(term, index)
local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
end,
isIDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'ι' and not m_data[letter].diaer
end,
isUDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'υ' and not m_data[letter].diaer
end,
hasMacronBreve = function(term, index)
return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
end,
}
local function decode(condition, x, term)
--[==[
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
If + ("and") or / ("or") is found, the function is called again,
until if-statements are found.
In if-statements:
* A number represents the character under consideration:
-1 is the previous character, 0 is the current, and 1 is the next.
* Equals sign (=) checks to see if the character under consideration
is equal to a character.
* Period (.) plus a word sends the module to the corresponding entry
in the letter's data table.
* Tilde (~) calls a function on the character under consideration,
if the function exists.
]==]
if rfind(condition, '[+/]') then
-- Find slash or plus sign preceded by something else, and followed by anything
-- (including another sequence of slash or plus sign and something else).
local subcondition1, sep, subcondition2 = rmatch(condition, "^([^/+]-)([/+])(.*)$")
if not (subcondition1 or subcondition2) then
error('Condition "' .. tostring(condition) .. '" is improperly formed')
end
if sep == '/' then -- logical operator: or
return decode(subcondition1, x, term) or decode(subcondition2, x, term)
elseif sep == '+' then -- logical operator: and
return decode(subcondition1, x, term) and decode(subcondition2, x, term)
end
elseif rfind(condition, '=') then -- check character identity
local offset, char = unpack(rsplit(condition, "="))
return char == fetch(term, x + offset) -- out of bounds fetch gives ''
elseif rfind(condition, '%.') then -- check character quality
local offset, quality = unpack(rsplit(condition, "%."))
local character = fetch(term, x + offset)
return m_data[character] and m_data[character][quality] or false
elseif rfind(condition, '~') then -- check character(s) using function
local offset, func = unpack(rsplit(condition, "~"))
return env_functions[func] and env_functions[func](term, x + offset) or false
end
end
local function check(p, x, term)
if type(p) == 'string' or type(p) == 'number' then
return p
elseif type(p) == 'table' then --This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) == 'string' or type(possP) == 'number' then
return possP
elseif type(possP) == 'table' then --This table is paired, with two values: a condition and a result.
rawCondition, rawResult = possP[1], possP[2]
if decode(rawCondition, x, term) then
return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term)
end
end
end
else
error('"p" is of unrecongized type ' .. type(p))
end
end
local function convert_term(term, periodstart)
if not term then error('The variable "term" in the function "convert_term" is nil.') end
local IPAs = {}
local start
local outPeriods = {}
if periodstart and periodstart ~= "" then
start = false
else
start = true
end
for _, period in ipairs(periods) do
if period == periodstart then
start = true
end
if start then
IPAs[period] = {}
table.insert(outPeriods, period)
end
end
local length, x, advance, letter, p = ulen(term), 1, 0, '', nil
while x <= length do
letter = fetch(term, x)
local data = m_data[letter]
if not data then -- no data found
-- explicit pass
else
-- check to see if a multicharacter search is warranted
advance = data.pre and check(data.pre, x, term) or 0
p = (advance ~= 0) and m_data[usub(term, x, x + advance)].p or data.p
for _, period in ipairs(outPeriods) do
table.insert(IPAs[period], check(p[period], x, term))
end
x = x + advance
end
x = x + 1
end
--Concatenate the IPAs
for _, period in ipairs(outPeriods) do
IPAs[period] = { IPA = table.concat(IPAs[period], '')}
end
return IPAs, outPeriods
end
local function find_syllable_break(word, nVowel, wordEnd)
if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
if wordEnd then
return ulen(word)
elseif is(fetch(word, nVowel - 1), "liquid") then
if is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
return nVowel - 4
else
return nVowel - 2
end
elseif is(fetch(word, nVowel - 1), "cons") then
return nVowel - 2
elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
return nVowel - 3
else
return nVowel - 1
end
end
local function syllabify_word(word)
local syllables = {}
--[[ cVowel means "current vowel", nVowel "next vowel",
sBreak "syllable break". ]]--
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
cVowelFound = false
while not cVowel do
letter = fetch(word, searching)
local nextLetter = fetch(word, searching + 1)
if cVowelFound then
if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif is(letter, "diacritic") then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
else
searching = searching + 1
end
else
if is(letter, "vowel") then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
end
searching = searching + 1
end
end
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
letter = fetch(word, searching)
if is(letter, "vowel") or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
else
searching = searching + 1
end
end
--Finally we find the syllable break point.
sBreak = find_syllable_break(word, nVowel, wordEnd)
--Pull everything up to and including the syllable Break.
local syllable = usub(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
else
syllable = rsubn(syllable, 'ˈ', '')
end
stress = false
end
table.insert(syllables, syllable)
word = usub(word, sBreak + 1)
end
local out = nil
if #syllables > 0 then
out = table.concat(syllables, '.')
out = rsubn(out, '%.ˈ', 'ˈ')
end
return out
end
local function syllabify(IPAs, periods)
--Syllabify
local word_ipa = ''
local ipa = {}
for _, period in ipairs(periods) do
ipa = {}
for _, word in ipairs(rsplit(IPAs[period].IPA, ' ')) do
word_ipa = syllabify_word(word)
if word_ipa then
table.insert(ipa, word_ipa)
end
end
IPAs[period].IPA = table.concat(ipa, ' ')
end
return IPAs
end
local function make_ambig_note(ambig)
-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
local ambig_note = ''
if #ambig > 0 then
local agr = (#ambig > 1) and { 's ', 'each one' } or { ' ', 'it' }
ambig_note = '\n<p class="previewonly">Mark the vowel length of the ambiguous vowel' .. agr[1]
.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2]
.. ' if it is long, or a breve if it is short. By default, [[Module:grc-pronunciation]] assumes it is short if unmarked.'
.. '<br/><small>[This message shows only in preview mode.]</small></span></p>\n'
end
return ambig_note
end
local function make_table(IPAs, ambig, periods)
--Final format
local inlineProns = {}
local listOfProns = {}
local fullProns = {}
local periods2 = {}
for _, period in ipairs(periods) do
table.insert(fullProns, '* ' .. m_a.format_qualifiers(lang, {"grc-" .. period}) .. ' ' ..
m_IPA.format_IPA_full { lang = lang, items = {{pron = '/' .. IPAs[period].IPA .. '/'}} })
periods2[period] = true
end
for _, period in ipairs(inlinePeriods) do
if periods2[period] then
local pron = '/' .. IPAs[period].IPA .. '/'
table.insert(inlineProns, {pron = pron})
table.insert(listOfProns, pron)
end
end
local inlineIPAlength = ulen("IPA(key): " .. table.concat(listOfProns, ' → ') or "")
local inline = '<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full {
lang = lang,
items = inlineProns,
separator = " → ",
} .. "</div>"
local full = '<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. make_ambig_note(ambig) .. '</div>'
return '<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' .. inlineIPAlength * 0.68 .. 'em;"><span class="vsToggleElement" style="float: right;"> </span>' .. inline .. full .. '</div>'
end
function export.create(frame)
local params = {
[1] = {alias_of = 'w'},
["w"] = {default = mw.title.getCurrentTitle().text},
["period"] = {default = "cla"},
}
local args = require("Module:parameters").process(frame.getParent and frame:getParent().args or frame, params)
local term = ulower(args.w)
local ambig = {}
if args.period == "cla" then
ambig = m_utils.findAmbig(term)
end
term = rsubn(term, 'ς', 'σ')
term = rsubn(term, 'ῤ', 'ρ')
term = rearrangeDiacritics(term)
local IPAs, periods = convert_term(term, args.period)
IPAs = syllabify(IPAs, periods)
return make_table(IPAs, ambig, periods)
end
function export.example(frame)
local output = { '{| class="wikitable"' }
local params = {
[1] = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local terms = mw.text.split(args[1], ",%s+")
for _, term in pairs(terms) do
local period = rmatch(term, "%(period ?= ?([^%)]+)%)") or "cla"
local entry = rmatch(term, "([^%(]+) %(") or term or error('No term found in "' .. term .. '".')
local link = full_link(entry)
local IPA = export.create{ entry, ["period"] = period }
table.insert(output, "\n|-\n| " .. link .. " || " .. IPA)
end
table.insert(output, "\n|}")
return table.concat(output)
end
return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.