Module:sa-pronunc/sandbox2
Appearance
- The following documentation is located at Module:sa-pronunc/sandbox2/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
local export = {}
local u = mw.ustring.char
local gsub = mw.ustring.gsub
local HIGH = u(0x0301)
local RISING = u(0x030C)
local LOW = u(0x0300)
local PEAKING = u(0x1DC8)
local FALLING = u(0x0302)
local COARTIC = u(0x0361)
local DENTAL = u(0x032A)
local FLAP = u(0x0306)
local NORELEASE = u(0x031A)
local SYLLABIC = u(0x0329)
local NASAL = u(0x0303)
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("sa")
local m_a = require("Module:accent qualifier")
local consonants = {
["क"] = "k", ["ग"] = "ɡ", ["ख"] = "kʰ", ["घ"] = "ɡʱ", ["ङ"] = "ŋ",
["च"] = "c", ["ज"] = "ɟ", ["छ"] = "cʰ", ["झ"] = "ɟʱ", ["ञ"] = "ɲ",
["त"] = "t̪", ["द"] = "d̪", ["थ"] = "t̪ʰ", ["ध"] = "d̪ʱ", ["न"] = "n̪",
["ट"] = "ʈ", ["ड"] = "ɖ", ["ठ"] = "ʈʰ", ["ढ"] = "ɖʱ", ["ण"] = "ɳ",
["प"] = "p", ["ब"] = "b", ["फ"] = "pʰ", ["भ"] = "bʱ", ["म"] = "m",
["य"] = "j", ["र"] = "ɽ", ["ल"] = "l̪", ["व"] = "w", ["ळ"] = "ɭ̆", ["ळ्ह"] = "ɭ̆ʱ",
["श"] = "ɕ", ["ष"] = "ʂ", ["स"] = "s̪", ["ह"] = "ɦ",
}
local diacritics = {
["ा"] = "ɑː", ["ि"] = "i", ["ी"] = "iː", ["ु"] = "u", ["ू"] = "uː", ["ृ"] = "r̩", ["ॄ"] = "r̩ː",
["ॢ"] = "l̩", ["ॣ"] = "l̩ː", ["े"] = "ɐɪ", ["ै"] = "ɑːɪ", ["ो"] = "ɐʊ", ["ौ"] = "ɑːʊ", ["्"] = "",
}
local vowel_list = {
["ɐ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, ["uː"] = true, ["r̩"] = true, ["r̩ː"] = true,
["l̩"] = true, ["l̩ː"] = true, ["ɐɪ"] = true, ["ɑːɪ"] = true, ["ɐʊ"] = true, ["ɑːʊ"] = true,
}
local stop_list = {
["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true,
["c"] = true, ["ɟ"] = true, ["cʰ"] = true, ["ɟʱ"] = true,
["t̪"] = true, ["d̪"] = true, ["t̪ʰ"] = true, ["d̪ʱ"] = true,
["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true,
["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true,
}
local consonant_sonority = {
-- voiceless stops and affricates
["k"] = 1, ["kʰ"] = 1,
["c"] = 1, ["cʰ"] = 1,
["t̪"] = 1, ["t̪ʰ"] = 1,
["ʈ"] = 1, ["ʈʰ"] = 1,
["p"] = 1, ["pʰ"] = 1,
-- voiceless fricatives
["ɕ"] = 2, ["ʂ"] = 2, ["s̪"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2,
-- voiced stops and affricates
["ɡ"] = 3, ["ɡʱ"] = 3,
["ɟ"] = 3, ["ɟʱ"] = 3,
["d̪"] = 3, ["d̪ʱ"] = 3,
["ɖ"] = 3, ["ɖʱ"] = 3,
["b"] = 3, ["bʱ"] = 3,
-- voiced fricatives
["ɦ"] = 4,
-- nasals
["ŋ"] = 5, ["ɲ"] = 5, ["n̪"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5,
-- flaps
["ɽ"] = 6,
-- laterals
["l̪"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7,
-- glides
["j"] = 8, ["w"] = 8,
}
local tt = {
-- vowels
["अ"] = "ɐ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", ["ऋ"] = "r̩", ["ॠ"] = "r̩ː",
["ऌ"] = "l̩", ["ॡ"] = "l̩ː", ["ए"] = "ɐɪ", ["ऐ"] = "ɑːɪ", ["ओ"] = "ɐʊ", ["औ"] = "ɑːʊ",
-- visarga
["ः"] = "h",
-- chandrabindu
["ँ"] = "m̐",
-- anusvara
["ं"] = "ṃ",
-- avagraha
['ऽ'] = "",
--Vedic extensions
['ᳵ'] = "x", ['ᳶ'] = "ɸ",
}
local rising_vowel = {
["ɐ"] = "ɐ" .. RISING, ["ɑː"] = "ɑ" .. RISING .. "ː",
["i"] = "i" .. RISING, ["iː"] = "i" .. RISING .. "ː",
["u"] = "u" .. RISING, ["uː"] = "u" .. RISING .. "ː",
["r̩"] = "r" .. RISING .. "̩", ["r̩ː"] = "r" .. RISING .. "̩ː",
["l̩"] = "l" .. RISING .. "̩", ["l̩ː"] = "l" .. RISING .. "̩ː",
["ɐɪ"] = "ɐ" .. RISING .. "ɪ", ["ɑːɪ"] = "ɑ" .. RISING .. "ːɪ",
["ɐʊ"] = "ɐ" .. RISING .. "ʊ", ["ɑːʊ"] = "ɑ" .. RISING .. "ːʊ",
}
local low_vowel = {
["ɐ"] = "ɐ" .. LOW, ["ɑː"] = "ɑ" .. LOW .. "ː",
["i"] = "i" .. LOW, ["iː"] = "i" .. LOW .. "ː",
["u"] = "u" .. LOW, ["uː"] = "u" .. LOW .. "ː",
["r̩"] = "r" .. LOW .. "̩", ["r̩ː"] = "r" .. LOW .. "̩ː",
["l̩"] = "l" .. LOW .. "̩", ["l̩ː"] = "l" .. LOW .. "̩ː",
["ɐɪ"] = "ɐ" .. LOW .. "ɪ", ["ɑːɪ"] = "ɑ" .. LOW .. "ːɪ",
["ɐʊ"] = "ɐ" .. LOW .. "ʊ", ["ɑːʊ"] = "ɑ" .. LOW .. "ːʊ",
}
local peaking_vowel = {
["ɐ"] = "ɐ" .. PEAKING, ["ɑː"] = "ɑ" .. PEAKING .. "ː",
["i"] = "i" .. PEAKING, ["iː"] = "i" .. PEAKING .. "ː",
["u"] = "u" .. PEAKING, ["uː"] = "u" .. PEAKING .. "ː",
["r̩"] = "r" .. PEAKING .. "̩", ["r̩ː"] = "r" .. PEAKING .. "̩ː",
["l̩"] = "l" .. PEAKING .. "̩", ["l̩ː"] = "l" .. PEAKING .. "̩ː",
["ɐɪ"] = "ɐ" .. PEAKING .. "ɪ", ["ɑːɪ"] = "ɑ" .. PEAKING .. "ːɪ",
["ɐʊ"] = "ɐ" .. PEAKING .. "ʊ", ["ɑːʊ"] = "ɑ" .. PEAKING .. "ːʊ",
}
local function shift_to_codas(syllables)
-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method
local to_move = 0
for i, syll in ipairs(syllables) do
if i == 1 then
-- no need to shift to coda if in the first syllable
elseif #syll < 3 then
-- coda movement only needed for onset clusters of 2 or more
elseif #syll == 3 then
-- V.CCV => VC.CV
to_move = 1
elseif #syll == 4 then
if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
-- if the first two consonants are stops, VC.CCV
to_move = 1
else
-- V.CCCV => VCC.CV
to_move = 2
end
else
-- 4 consonants or more
if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" then
to_move = #syll - 3
else
-- find index of consonant of least sonority
to_move = #syll - 1
local min_son = consonant_sonority[syll[#syll - 1]]
for i = (#syll - 1), 1, -1 do
if consonant_sonority[syll[i]] < min_son then
to_move = i
min_son = consonant_sonority[syll[i]]
end
end
end
end
while to_move > 0 do
table.insert(syllables[i - 1], table.remove(syllables[i], 1))
to_move = to_move - 1
end
end
return syllables
end
local function syllabify(remainder, accent)
local syllables = {}
local syll = {}
while #remainder > 0 do
local phoneme = table.remove(remainder, 1)
if vowel_list[phoneme] then
table.insert(syll, phoneme)
table.insert(syllables, syll)
syll = {}
else
table.insert(syll, phoneme)
end
end
-- store whatever consonants remain
local final_cons = syll
-- Vedic pitch accent
if accent ~= nil and accent <= #syllables then
syll = syllables[accent]
syllables[accent][#syll] = rising_vowel[syll[#syll]]
if accent - 1 > 0 then -- sannatara takes precendence
syll = syllables[accent - 1]
syllables[accent - 1][#syll] = low_vowel[syll[#syll]]
end
if accent + 1 <= #syllables then -- then svarita
syll = syllables[accent + 1]
syllables[accent + 1][#syll] = peaking_vowel[syll[#syll]]
end
end
syllables = shift_to_codas(syllables)
local short_vowel_patt = "^[ɐiurl]" .. SYLLABIC .. "?[" .. RISING .. LOW .. PEAKING .. "]?$"
-- Classic stress accent
local num_sylls = #syllables
if num_sylls == 2 then
table.insert(syllables[1], 1, 'ˈ')
elseif num_sylls == 3 then
-- if the final segment of the second syllable is not a short vowel, stress the second syllable
if mw.ustring.match(syllables[2][#syllables[2]], short_vowel_patt) == nil then
table.insert(syllables[2], 1, 'ˈ')
-- else stress the third
else
table.insert(syllables[1], 1, 'ˈ')
end
elseif num_sylls >= 4 then
if mw.ustring.match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then
table.insert(syllables[num_sylls - 1], 1, 'ˈ')
elseif mw.ustring.match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then
table.insert(syllables[num_sylls - 2], 1, 'ˈ')
else
table.insert(syllables[num_sylls - 3], 1, 'ˈ')
end
end
-- If there are phonemes left, then the word ends in a consonant
-- Add them to the last syllable
for _, phoneme in ipairs(final_cons) do
table.insert(syllables[#syllables], phoneme)
end
for i, _ in ipairs(syllables) do
syllables[i] = table.concat(syllables[i], "")
end
return table.concat(syllables, ".")
end
local anu_to_nasals = {
--earlier
["s̪"] = "ŋ̊",
["ɕ"] = "ŋ̊",
["ʂ"] = "ŋ̊",
["h"] = "ŋ̊",
["ɦ"] = "ŋ",
["ɽ"] = "ŋ",
--later
["k"] = "ŋ", ["ɡ"] = "ŋ",
["c"] = "ɲ", ["ɟ"] = "ɲ",
["t̪"] = "n̪", ["d̪"] = "n̪",
["ʈ"] = "ɳ", ["ɖ"] = "ɳ",
["p"] = "m", ["b"] = "m",
}
local function anusvara(text)
text = gsub(text, "ṃ$", "m")
text = gsub(
text,
"ṃ([ %.ˈ]?)([kɡtdʈɖcɟpbsɕʂhɦɽ])(" .. DENTAL .. "?)",
function(div, cons, mark)
return anu_to_nasals[cons .. mark] .. div .. cons .. mark
end
)
text = gsub(
text,
"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)ṃ",
"%1%2" .. NASAL .. "%3%4%5"
)
return text
end
local function convert_word(word, accent)
local chars = {}
local t = {}
gsub(word, ".", function(c) table.insert(chars, c) end)
for i, c in ipairs(chars) do
if consonants[c] then
table.insert(t, consonants[c])
if not diacritics[chars[i + 1]] then
table.insert(t, "ɐ")
end
elseif c == "्" then
-- do nothing
elseif diacritics[c] then
table.insert(t, diacritics[c])
elseif tt[c] then
table.insert(t, tt[c])
end
end
word = syllabify(t, accent)
word = gsub(word, "%.ˈ", "ˈ")
-- chandrabindu
word = gsub(
word,
"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. RISING .. LOW .. PEAKING .. "?)(ː?)([ɪʊ]?)m̐",
"%1%2" .. NASAL .. "%3%4%5"
)
return word
end
local function convert_words(words, accents)
local result = {}
local word_num = 1
for word in mw.text.gsplit(words, " ") do
table.insert(result, convert_word(word, accents[word_num]))
word_num = word_num + 1
end
text = table.concat(result, " ")
return text
end
local function phon_procs(text)
-- Anusvāra
text = anusvara(text)
return text
end
local function abhinidhana(text)
text = gsub(
text,
"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖcɟpb])",
"%1%2" .. NORELEASE .. "%3%4"
)
return text
end
local superscript = {
["ɐ"] = "ᵄ",
["ɑ"] = "ᵅ",
["e"] = "ᵉ",
["o"] = "ᵒ",
["i"] = "ⁱ",
["u"] = "ᵘ",
}
local function make_dialects(text)
local dialects = {}
-- Rigvedic Sanskrit
local rig_phnm = text
rig_phnm = gsub(rig_phnm, "^ˈ", "")
rig_phnm = gsub(rig_phnm, "ˈ", ".")
rig_phnm = gsub(rig_phnm, " %.", " ")
local rig_phnt = abhinidhana(rig_phnm)
-- visarga alternation
rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([p])", "ɸ%1%2")
rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([k])", "x%1%2")
-- nasalized semivowels
rig_phnt = gsub(
rig_phnt,
"([ŋɲnɳm])(" .. DENTAL .. "?)([ %.ˈ]?)([lɭɪʊ])([" .. DENTAL .. FLAP .. "]?)(ʱ?)",
"%4%5" .. NASAL .. "%3%4%5%6"
)
-- nasalized yama
rig_phnt = gsub(
rig_phnt,
"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",
"%1%2%3ⁿ%4%5"
)
rig_phnt = gsub(rig_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3")
-- remove sannatara and svarita from phonemic
rig_phnm = gsub(rig_phnm, "[" .. LOW .. PEAKING .. "]", "")
rig_phnm = gsub(rig_phnm, RISING, HIGH)
dialects['rig'] = {
label = "Vedic",
phonemic = rig_phnm,
phonetic = rig_phnt,
}
-- Classical Sanskrit
local cla_phnm = text
cla_phnm = gsub(cla_phnm, "([ɐɑeoiurl])[" .. RISING .. LOW .. PEAKING .. "]", "%1")
cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ɪ", "e%1ː")
cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)ʊ", "o%1ː")
cla_phnm = gsub(cla_phnm, "ɑ(" .. NASAL .. "?)ː([ɪʊ])", "ɑ%1%2")
cla_phnm = gsub(cla_phnm, "w", "ʋ")
local cla_phnt = abhinidhana(cla_phnm)
-- cla_pron = gsub(cla_pron, "r̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "ɽi%1%2%3")
-- cla_pron = gsub(cla_pron, "l̩(" .. NASAL .. "?)(" .. RISING .. "?)(ː?)", "l̪i%1%2%3")
--nasalized yama
cla_phnt = gsub(
cla_phnt,
"([kɡtdʈɖcɟpb])(" .. DENTAL .. "?)([ʰʱ]?)([ %.ˈ]?)([nŋɲɳm])",
"%1%2%3ⁿ%4%5"
)
cla_phnt = gsub(cla_phnt, "(ɦ)([ %.ˈ]?)([nɳm])", "%1ⁿ%2%3")
cla_phnt = gsub(
cla_phnt,
"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h$",
function (vow, nas, length, glide)
return vow .. nas .. length .. glide .. "h" .. superscript[vow]
end
)
cla_phnt = gsub(
cla_phnt,
"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([ɪʊ]?)h ",
function (vow, nas, length, glide)
return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "
end
)
dialects['cla'] = {
label = "Classical Sanskrit",
phonemic = cla_phnm,
phonetic = cla_phnt,
}
return dialects
end
local function make_table(dialects, novedic)
local dial_types = {'rig', 'cla'}
if novedic then
table.remove(dial_types, 1)
end
if #dial_types == 1 then
local dial = dial_types[1]
local IPA_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
if dialects[dial].phonemic ~= dialects[dial].phonetic then
table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'})
end
return table.concat{
'\n* ',
m_a.format_qualifiers(lang, {dialects[dial].label}),
' ',
m_IPA.format_IPA_full { lang = lang, items = IPA_args },
}
else
local inline_args = {{pron = '/' .. dialects.cla.phonemic .. '/'}}
if dialects.cla.phonemic ~= dialects.cla.phonetic then
table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'})
end
local inline = table.concat{
'\n* ',
m_IPA.format_IPA_full { lang = lang, items = inline_args },
}
local full = {}
table.insert(full, '\n<div class="mw-collapsible-content">\n----\n')
for _, dial in ipairs(dial_types) do
local full_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
if dialects[dial].phonemic ~= dialects[dial].phonetic then
table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'})
end
table.insert(full, table.concat{
'\n* ',
m_a.format_qualifiers(lang, {dialects[dial].label}),
' ',
m_IPA.format_IPA_full { lang = lang, items = full_args },
})
end
table.insert(full, '</div>')
return table.concat{
'<div class="toccolours mw-collapsible mw-collapsed" style="width:600px; font-size:100%">',
inline,
table.concat(full, ""),
'</div>',
}
end
end
function export.show(frame)
local params = {
[1] = {alias_of = 'w'},
w = {default = mw.title.getCurrentTitle().text},
a = {list = true, allow_holes = true, type = 'number'},
novedic = {type = 'boolean'}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local text = convert_words(args.w, args.a)
text = phon_procs(text)
local dialects = make_dialects(text)
return make_table(dialects, args.novedic)
end
return export