- The following documentation is located at Module:User:Sarri.greek/grk-pronunciation/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
User:Sarri.greek (CAT) » Module grk-pronunciation doc :: data doc »» invoked at Template grk-IPA-mod »»» applied in table Template grk-IPA, Template gkm-IPA
This is Module:grc-pronunciation & Module:grc-pronunciation/data
- TEST MEDIEVAL with only the two lines, no Hide/Show »» at table Template gkm-IPA
- TODO check some clusters like ks ps zm (do not break), and initial μπ ντ γκ b d g .
-- 2024.04.24. [[wikt:en:User:Sarri.greek]] my notes with !!, todo with ??
-- this is [[Module:grc-pronunciation]]
-- Now, default is med1+med2. Also view period=cla (all periods)
-- TESTS raw at [[Template:User:Sarri.greek/grk-IPA-mod]] [[]]
-- TEST for med at [[Template:User:Sarri.greek/gkm-IPA]]
STRUCTURE: This module produces at the moment consecutive lines for different language codes with pronunciations.
Each line has 3 things / 'elements':
1) the period-label (linked) e.g. period-label-cla, period-lebal-koi1... (example: 5th c. BCE Attic)
2) the IPA-label+key (linked IPA<sup>key</sup>): e.g. IPA-label-cla, IPA-label-koi1...
3) the actual IPA e.g. IPA-cla, IPA-koi1....
At the moment these three are unbreakable. There are no args for the editor.
One cannot call only one element. ONLY 3) or ONLY 1).
One cannot omit an element e.g. with?? [[Module:IPA]] has something like split_output "raw"
One cannot override IPA.
One cannot add a second IPA.
One cannot add inline notes before or after.
NEED for structure
* 1 Detach elements. Especially, detach the IPAs as named parameters
Need args for editor.
tested: cla-only, el-only etc.
* 2 OVERRIDE must be possible by editor, especially for med2, el loanwards or minor corrections
el: if override cannot be done, the we CANNOT have modern Greek IPA
because it is impossible to predict some pronunciations
The same goes for some med2 (with synizesis) it may or it may not have synizesis
depending on register.
* 3 second ipa (for optional second pronunciation)
* -- NOTES for every period line
-- now done with tables at templates
Then, editor can make a Template with any combination of the above
Make a line for ONE period: inline (no break lines)
period-label-cla .. IPA-label-cla .. IPA-cla in one line (no break lines)
add parameters for all elements
Combinations: functions for one period only
Period-titles as cla-only, el-only, from Module:User:Sarri.greek/accent qualifier/data
Combinations of many lines: if a next line follows, but <br>
But now, we need the same width column for Period-titles
Better do it at a template
Function: IPA only (for inflectional tables. It could be placed under the forms or under transliterations)
PROBLEMS at IPA = > > > > specific letters or clusters problems at DATA page. see /data page for /ks ps zm/ and others
* ?? if imput initial ΆάΈέΉήΊίΌόΎύΏώ / ΑαΕεΗηΙιΟοΥυΩω / Αί αί Εί εί Οί οί Ού ού / Αύ αύ Εύ εύ Ηύ ηύ then, show warning for period=el
(not Polyt? But it works equally well for monotonic ell too)
or a message: Do you wish only el...
local export = {}
local m_data = mw.loadData("Module:User:Sarri.greek/grk-pronunciation/data") --!! mw.loadData("Module:grc-pronunciation/data")
local mark_implied_length = require('Module:grc-accent').mark_implied_length
local strip_accent = require('Module:grc-accent').strip_accent
local m_general_utils = require("Module:utilities")
-- [[Module:grc-utilities]] converts sequences of diacritics to the order required by this module,
-- then replaces combining macrons and breves with spacing ones.
local m_utils = require("Module:grc-utilities")
local m_utils_data = require("Module:grc-utilities/data")
local full_link =
local tag_text = m_utils.tag
local diacritics = m_utils_data.diacritics
local rearrangeDiacritics = m_utils.pronunciationOrder
local m_IPA = require("Module:IPA")
local m_a = require("Module:User:Sarri.greek/accent qualifier") --!! require("Module:accent qualifier")
--!! put these under periods, according to each language
local lang = require("Module:languages").getByCode("grc") --!!this works for all but not Links to Wikipedias
local sc = require("Module:scripts").getByCode("Polyt") --!!this works fine for polytonic and monotonic
local periods = {'cla', 'koi1', 'koi2', 'med1', 'med2', 'el'} -- was {'cla', 'koi1', 'koi2', 'byz1', 'byz2'}
local inlinePeriods = {'med1', 'med2'} --!! was = {'cla', 'koi2', 'byz2'}
--!! add params for inline notes?
--!! there are commands, not some made up names
local title = mw.title.getCurrentTitle()
local pagename = title.text
local namespace = title.nsText
local rsplit = mw.text.split
local rfind = mw.ustring.find
local usub = mw.ustring.sub
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len --
local ulower = mw.ustring.lower
local U = mw.ustring.char
local function fetch(s, i)
because we fetch a single character at a time so often
out of bounds fetch gives ''
i = tonumber(i)
if type(i) ~= "number" then
error("fetch requires a number or a string equivalent to a number as its second argument.")
if i == 0 then
return ""
local n = 0
for character in string.gmatch(s, "[\1-\127\194-\244][\128-\191]*") do
n = n + 1
if n == i then
return character
return ""
--!! these are IPA things.
--Combining diacritics are tricky.
local tie = U(0x35C) -- tie bar
local nonsyllabic = U(0x32F) -- combining inverted breve below
local high = U(0x341) -- combining acute tone mark
local low = U(0x340) -- combining grave tone mark
local rising = U(0x30C) -- combining caron
local falling = diacritics.Latin_circum -- combining circumflex
local midHigh = U(0x1DC4) -- mid–high pitch
local midLow = U(0x1DC6) -- mid–low pitch
local highMid = U(0x1DC7) -- high–mid pitch
local voiceless = U(0x325) -- combining ring below
local aspirated = 'ʰ'
local macron = '¯'
local breve = '˘'
--?? what does this do?
--!! ['frontDiphth'] = "[αο]ι", ['Greekdiacritic'] = m_utils_data.all,
local function is(text, X)
if not text or not X then
return false
pattern = m_data.chars[X] or error('No data for "' .. X .. '".', 2)
if X == "frontDiphth" or X == "Greekdiacritic" then
pattern = "^" .. pattern .. "$"
pattern = "^[" .. pattern .. "]$"
return mw.ustring.find(text, pattern)
--!! env = environment MAKE diphthongs αι ει?? οι, αυ ευ ηυ but not when iota has dialytics (diaeresis)
--!! ['frontVowel'] = "ιηευ", ['frontDiphth'] = "[αο]ι", ['iDiaer'] = "ϊΐῒῗ",
local env_functions = {
preFront = function(term, index)
local letter1, letter2 = fetch(term, index + 1), fetch(term, index + 2)
return is(strip_accent(letter1), "frontVowel") or (is(strip_accent(letter1 .. letter2), "frontDiphth") and not is(letter2, "iDiaer"))
isIDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'ι' and not m_data[letter].diaer
isUDiphth = function(term, index)
local letter = fetch(term, index + 1)
return strip_accent(letter) == 'υ' and not m_data[letter].diaer
hasMacronBreve = function(term, index)
return fetch(term, index + 1) == macron or fetch(term, index + 1) == breve
local function decode(condition, x, term)
"If" and "and" statements.
Note that we're finding the last operator first,
which means that the first will get ultimately get decided first.
If + ("and") or / ("or") is found, the function is called again,
until if-statements are found.
In if-statements:
* A number represents the character under consideration:
-1 is the previous character, 0 is the current, and 1 is the next.
* Equals sign (=) checks to see if the character under consideration
is equal to a character.
* Period (.) plus a word sends the module to the corresponding entry
in the letter's data table.
* Tilde (~) calls a function on the character under consideration,
if the function exists.
if mw.ustring.find(condition, '[+/]') then
-- Find slash or plus sign preceded by something else, and followed by anything
-- (including another sequence of slash or plus sign and something else).
local subcondition1, sep, subcondition2 = mw.ustring.match(condition, "^([^/+]-)([/+])(.*)$")
if not (subcondition1 or subcondition2) then
error('Condition "' .. tostring(condition) .. '" is improperly formed')
if sep == '/' then -- logical operator: or
return decode(subcondition1, x, term) or decode(subcondition2, x, term)
elseif sep == '+' then -- logical operator: and
return decode(subcondition1, x, term) and decode(subcondition2, x, term)
elseif mw.ustring.find(condition, '=') then -- check character identity
local offset, char = unpack(mw.text.split(condition, "="))
if namespace == "Module" or namespace == "Template" then
mw.log(term, offset, char, x + offset, fetch(term, x + offset), char == fetch(term, x + offset) )
return char == fetch(term, x + offset) -- out of bounds fetch gives ''
elseif mw.ustring.find(condition, '%.') then -- check character quality
local offset, quality = unpack(mw.text.split(condition, "%."))
local character = fetch(term, x + offset)
return m_data[character] and m_data[character][quality] or false
elseif mw.ustring.find(condition, '~') then -- check character(s) using function
local offset, func = unpack(mw.text.split(condition, "~"))
return env_functions[func] and env_functions[func](term, x + offset) or false
local function check(p, x, term)
if type(p) == 'string' or type(p) == 'number' then
return p
elseif type(p) == 'table' then --This table is sequential, with a variable number of entries.
for _, possP in ipairs(p) do
if type(possP) == 'string' or type(possP) == 'number' then
return possP
elseif type(possP) == 'table' then --This table is paired, with two values: a condition and a result.
rawCondition, rawResult = possP[1], possP[2]
if decode(rawCondition, x, term) then
return (type(rawResult) == 'string') and rawResult or check(rawResult, x, term)
error('"p" is of unrecongized type ' .. type(p))
--?? handle lines/periods separately too?
--!! add notes NEED somewhere here
local function convert_term(term, periodstart)
if not term then error('The variable "term" in the function "convert_term" is nil.') end
local IPAs = {}
local start
local outPeriods = {}
if periodstart and periodstart ~= "" then
start = false
start = true
for _, period in ipairs(periods) do
if period == periodstart then
start = true
if start then
IPAs[period] = {}
table.insert(outPeriods, period)
local length, x, advance, letter, p = mw.ustring.len(term), 1, 0, '', nil
while x <= length do
letter = fetch(term, x)
local data = m_data[letter]
if not data then -- no data found
-- explicit pass
-- check to see if a multicharacter search is warranted
advance = data.pre and check(data.pre, x, term) or 0
p = (advance ~= 0) and m_data[mw.ustring.sub(term, x, x + advance)].p or data.p
for _, period in ipairs(outPeriods) do
table.insert(IPAs[period], check(p[period], x, term))
x = x + advance
x = x + 1
--Concatenate the IPAs
for _, period in ipairs(outPeriods) do
IPAs[period] = { IPA = table.concat(IPAs[period], '')}
return IPAs, outPeriods
local function find_syllable_break(word, nVowel, wordEnd)
if not word then error('The variable "word" in the function "find_syllable_break" is nil.') end
if wordEnd then
return mw.ustring.len(word)
--!! unbreakable or special consonants for el, ...
--!! ks ps & zm at el, med2, (med1?), I don't know about koi and grc
--!! check example λοκσι λοξι
elseif period == 'el' or period == 'med2' or period == 'med1'
if mw.ustring.match(word, nVowel - 1, "z") then
if mw.ustring.match(word, nVowel - 2, "m") then
return nVowel - 4
--!! consDoule (ks ps if they represent ξ ψ but not κσ)
--!! if el τσ τζ = with ties t͡s d͡z
--!! if med, el αυφ αυβ do not repeat aff avv Same for ευ ηυ
elseif is(fetch(word, nVowel - 1), "liquid") then
if is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 2) == aspirated and is(fetch(word, nVowel - 3), "obst") then
return nVowel - 4
return nVowel - 2
elseif is(fetch(word, nVowel - 1), "cons") then
return nVowel - 2
elseif fetch(word, nVowel - 1) == aspirated and is(fetch(word, nVowel - 2), "obst") then
return nVowel - 3
elseif fetch(word, nVowel - 1) == voiceless and fetch(word, nVowel - 2) == 'r' then
return nVowel - 3
return nVowel - 1
local function syllabify_word(word)
local syllables = {}
--[[ cVowel means "current vowel", nVowel "next vowel",
sBreak "syllable break". ]]--
local cVowel, nVowel, sBreak, stress, wordEnd, searching
while word ~= '' do
cVowel, nVowel, sBreak, stress = false, false, false, false
--First thing is to find the first vowel.
searching = 1
cVowelFound = false
while not cVowel do
letter = fetch(word, searching)
local nextLetter = fetch(word, searching + 1)
if cVowelFound then
if (is(letter, "vowel") and nextLetter ~= nonsyllabic) or is(letter, "cons") or letter == '' or letter == 'ˈ' then
cVowel = searching - 1
elseif is(letter, "diacritic") then
searching = searching + 1
elseif letter == tie then
cVowelFound = false
searching = searching + 1
searching = searching + 1
if is(letter, "vowel") then
cVowelFound = true
elseif letter == 'ˈ' then
stress = true
searching = searching + 1
--Next we try and find the next vowel or the end.
searching = cVowel + 1
while (not nVowel) and (not wordEnd) do
letter = fetch(word, searching)
if is(letter, "vowel") or letter == 'ˈ' then
nVowel = searching
elseif letter == '' then
wordEnd = true
searching = searching + 1
--?? keep ψ = ps and ξ = ks toghther .ps. .ks. at med1 med2 el?
--Finally we find the syllable break point.
sBreak = find_syllable_break(word, nVowel, wordEnd)
--Pull everything up to and including the syllable Break.
local syllable = usub(word, 1, sBreak)
--If there is a stress accent, then we need to move it to the
--beginning of the syllable, unless it is a monosyllabic word,
--in which case we remove it altogether.
if stress then
if next(syllables) or syllable ~= word then
syllable = 'ˈ' .. rsubn(syllable, 'ˈ', '')
syllable = rsubn(syllable, 'ˈ', '')
stress = false
table.insert(syllables, syllable)
word = usub(word, sBreak + 1)
local out = nil
if #syllables > 0 then
out = table.concat(syllables, '.')
out = rsubn(out, '%.ˈ', 'ˈ')
return out
local function syllabify(IPAs, periods)
local word_ipa = ''
local ipa = {}
for _, period in ipairs(periods) do
ipa = {}
for _, word in ipairs(rsplit(IPAs[period].IPA, ' ')) do
word_ipa = syllabify_word(word)
if word_ipa then
table.insert(ipa, word_ipa)
IPAs[period].IPA = table.concat(ipa, ' ')
return IPAs
--??TODO is everything automatically brachy for koi1 koi2 med1 med2?
local function make_ambig_note(ambig, ambig_letter_list)
-- The table ambig is filled with all the ambiguous vowels that have been found in the term.
local ambig_note = ''
if ambig and #ambig > 0 then
local agr = (#ambig > 1) and { 's ', 'each one' } or { ' ', 'it' }
ambig_note = '\n<p class="previewonly">Mark the vowel length for 5th century Attic of the ambiguous vowel' .. agr[1]
.. mw.text.listToText(ambig) .. ' by adding a macron after ' .. agr[2]
.. ' if it is long, or a breve if it is short. By default, [[Module:grc-pronunciation]] assumes it is short if unmarked.'
.. '<br/><small>[This message shows only in preview mode.]</small>'
--??TODO Take this Category off? no, but it should only apply to cla.
.. m_general_utils.format_categories(
{ 'Ancient Greek terms with incomplete pronunciation' }, lang)
return ambig_note
local function make_table(IPAs, ambig, periods, ambig_letter_list)
--Final format
local inlineProns = {}
local listOfProns = {}
local fullProns = {}
local periods2 = {}
--!! add notes
--?? need periodnotes td next to eadh periodline local inline_period_notes = {}
local listOfNotes = {}
for _, period in ipairs(periods) do
--!! I change grc to grk at --{'grc-' .. period})
table.insert(fullProns, '* ' ..{'grk-' .. period}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = {{pron = '/' .. IPAs[period].IPA .. '/'}}, } .. notes_full({{inline_notes = ' || ' .. {period .. '-note'} }}))
periods2[period] = true
for _, period in ipairs(inlinePeriods) do
if periods2[period] then
local pron = '/' .. IPAs[period].IPA .. '/'
table.insert(inlineProns, {pron = pron})
table.insert(listOfProns, pron)
for _, period in ipairs(periods) do
if periods2[period] then
local inline_notes = ' || ' .. {period .. '-note'}
table.insert(inlineNotes, {inline_notes = inline_notes})
table.insert(listOfNotes, inline_notes)
for _, period in ipairs(periods) do
--!! I change grc to grk at --{'grc-' .. period})
table.insert(fullProns, '* ' ..{'grk-' .. period}) .. ' ' .. m_IPA.format_IPA_full { lang = lang, items = {{pron = '/' .. IPAs[period].IPA .. '/'}} })
periods2[period] = true
for _, period in ipairs(inlinePeriods) do
if periods2[period] then
local pron = '/' .. IPAs[period].IPA .. '/'
table.insert(inlineProns, {pron = pron})
table.insert(listOfProns, pron)
--?? is this the length of IPA or the titles or both? I fixed the titles, small and balanced.
--?? THE IPA length it is too big. Need autofit? and after it a note for eachline?
--?? and manual like med1=xyz
local inlineIPAlength = math.floor( math.max( mw.ustring.len("IPA(key): " .. table.concat(listOfProns, ' → ') or "") * 0.68, mw.ustring.len("(15th c. Medieval of Constantinople) IPA(key): /" .. IPAs.med2.IPA .. "/") * 0.68 ) )
local inline = '\n<div class="vsShow" style="display:none">\n* ' .. m_IPA.format_IPA_full { lang = lang, items = inlineProns, separator = ' → ' } .. '</div>'
--!! ambiguous for cal
local full = '\n<div class="vsHide">\n' .. table.concat(fullProns, '\n') .. make_ambig_note(ambig, ambig_letter_list) .. '</div>'
--!! I do not want More/Less hide/show
--!! take off switcher -- <div class="vsSwitcher"
--?? is the IPA length here? It is too big. And I need a note/per line. And a manual med2=xyz What is toggle... float right??
return '<div data-toggle-category="pronunciations" style="width: ' .. inlineIPAlength .. 'em; max-width:100%;"><span class="vsToggleElement" style="float: right;"> </span>' .. inline .. full .. '</div>'
--!! make period default = med1 at [[Tempalte:gkm-IPA]], not here
function export.create(frame)
--?? if ["period"] == 'med1' then default = "med1" else {default = "med1"} end},
local params = {
[1] = {default = pagename},
["period"] = {default = "cla"},
--!! change "grc-pronunciation", "create")
local args = require("Module:parameters").process(frame.getParent and frame:getParent().args or frame, params, nil, "User:Sarri.greek/gkm-pronunciation", "create")
local term = ulower(args[1])
local old = term
term = m_utils.standardDiacritics(term)
term = mark_implied_length(term)
if mw.ustring.toNFD(old) ~= term then
mw.log(old .. " > " .. term)
local decomposed = mw.ustring.toNFD(term)
if rfind(decomposed, "[εοηω]" .. m_utils_data.diacritic .. "*[" .. diacritics.spacing_macron .. diacritics.spacing_breve .. diacritics.breve .. diacritics.macron .. "]") then
error("Macrons and breves cannot be placed after the letters ε, ο, η, or ω.")
local ambig, ambig_letter_list
if args.period == "cla" then
ambig, ambig_letter_list = m_utils.findAmbig(term)
term = rsubn(term, 'ς', 'σ')
term = rsubn(term, 'ῤ', 'ρ')
term = rearrangeDiacritics(term)
local IPAs, periods = convert_term(term, args.period)
IPAs = syllabify(IPAs, periods)
return make_table(IPAs, ambig, periods, ambig_letter_list)
function export.example(frame)
--!! adding little title without bullet
local little_title = '<div style="width:500px; padding-top:0px; padding-bottom:0px; background:#fafafa; font-size:11px; line-height:105%;"><i>An approximation of non-dialectal pronunciation.</i> (? = uncertain or debated)</div>\n'
local output = { '{| class="wikitable"' } --?? why cannot i put here '|' .. little_title?
local params = {
[1] = {}
--!! changed "grc-pronunciation", "example") --!! it works WHATEVER i write
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "User:Sarri.greek/grk-pronunciation", "example")
local terms = mw.text.split(args[1], ",%s+")
for _, term in pairs(terms) do
--?? cla? I am not changing this, as everything works fine with it.
local period = rmatch(term, "%(period ?= ?([^%)]+)%)") or "cla"
local entry = rmatch(term, "([^%(]+) %(") or term or error('No term found in "' .. term .. '".')
local link = full_link(entry)
local IPA = export.create{ entry, ["period"] = period }
--?? local periodnotes = ''
table.insert(output, "\n|-\n| " .. link .. " || " .. IPA) --?? " || " .. periodnotes
table.insert(output, "\n|}")
return table.concat(output)
return export
--Things we still need:
--Voicing of sigma around (after?) voiced stops.
--Proper alerts for editors, especially on ambiguous vowels.
--?? that IPA is too long. Put notes at end like qual
--?? present lines/periods independently too
--?? el (perhaps med1 too) must have manual option for override (for loadwords)
--?? allow manuals e.g. med2=xxxx med2-note=as in Cretan dialect