Module:hy-pronunciation/sandbox
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of (diff)
local export = {}
-- single characters that map to IPA sounds
local phonetic_chars_map = {
-- Eastern Armenian
east = {
["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="ɛ", ["զ"]="z",
["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ",
["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v",
["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
},
-- Western Armenian
west = {
["ա"]="ɑ", ["բ"]="pʰ", ["գ"]="kʰ", ["դ"]="tʰ", ["ե"]="ɛ", ["զ"]="z",
["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
["խ"]="χ", ["ծ"]="d͡z", ["կ"]="ɡ", ["հ"]="h", ["ձ"]="t͡sʰ", ["ղ"]="ʁ",
["ճ"]="d͡ʒ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
["չ"]="t͡ʃʰ", ["պ"]="b", ["ջ"]="t͡ʃʰ", ["ռ"]="ɾ", ["ս"]="s", ["վ"]="v",
["տ"]="d", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
},
}
-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
east = {
['ու'] = 'u',
-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
['իե'] = 'jɛ',
['իա'] = 'jɑ'
},
west = {
['ու'] = 'u',
['էօ'] = 'œ',
['(.?)յու'] = function(before)
-- if not in the initial position and if not preceded by [ɑɛəɔiu]
-- This will not catch initial position in a word
-- that follows a vowel-initial word.
if not (before == '' or mw.ustring.find(before, '[%sɑɛəɔiu]')) then
return 'ʏ'
end
end,
-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
['իե'] = 'jɛ',
['իա'] = 'jɑ'
},
}
function export._pronunciation(word, system)
if not (phonetic_chars_map[system] and phonetic_2chars_map[system]) then
error("Invalid system " .. tostring(system))
end
word = mw.ustring.lower(word)
local phonetic = word
-- then long consonants that are orthographically geminated.
phonetic = mw.ustring.gsub(phonetic, "(.)%1", "%1ː")
for pat, repl in pairs(phonetic_2chars_map[system]) do
phonetic = mw.ustring.gsub(phonetic, pat, repl)
end
-- ե and ո are pronounced as jɛ and vɔ word-initially.
phonetic = mw.ustring.gsub(phonetic, "^ե", "յէ")
phonetic = mw.ustring.gsub(phonetic, "^ո", "վօ")
-- except when followed by another վ.
phonetic = mw.ustring.gsub(phonetic, "^վօվ", "օվ")
phonetic = mw.ustring.gsub(phonetic, '.', phonetic_chars_map[system])
-- assimilation: nasal + velar plosives = velar nasal + velar plosives
phonetic = mw.ustring.gsub(phonetic, "n([ɡkχ]+)", "ŋ%1")
-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
--phonetic = mw.ustring.gsub(phonetic, "tj", "t͡sj")
--phonetic = mw.ustring.gsub(phonetic, "tʰj", "t͡sʰj")
--phonetic = mw.ustring.gsub(phonetic, "dj", "d͡zj")
-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
--phonetic = mw.ustring.gsub(phonetic, "ɾt", "rt")
phonetic = mw.ustring.gsub(phonetic, "%S+", function(word)
-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑɛəɔiu]+.
local numberOfVowels = select(2, mw.ustring.gsub(phonetic, "[ɑɛəɔiu]", "%0"))
-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not
-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
-- 1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
-- If true, put the IPA stress at the beginning, else do step 3.
-- 2) Find the last non-schwa vowel, i.e. [ɑɛɔiu],
-- 3) If the IPA symbol preceding it is [ɑɛəɔiu], i.e. a vowel, put the stress symbol between them,
-- if it is NOT [ɑɛɔiuə], i.e. it is a consonant,
-- put the stress before that consonant.
if numberOfVowels > 1 then
local rcount
phonetic, rcount = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiuə])՛", "ˈ%1")
if rcount == 0 then
phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiu][^ɑɛɔiuə]*)$", "ˈ%1")
phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛəɔiu]?[ɑɛɔiu][^ɑɛɔiuə]*ə[^ɑɛɔiuə]*)$", "ˈ%1")
end
phonetic = mw.ustring.gsub(phonetic, "([ɑɛəɔiu])ˈ([^ɑɛɔiuə]+)([^ɑɛɔiuəːˈʰ])", "%1%2ˈ%3")
phonetic = mw.ustring.gsub(phonetic, "(.)͡ˈ", "ˈ%1͡")
return phonetic
end
end)
return phonetic
end
function export.pronunciation(word, system)
if type(word) == "table" then
local frame = word
word = frame.args[1] or frame:getParent().args[1]
system = frame.args.system or "east"
end
if not word or (word == "") then
error("Please put the word as the first positional parameter!")
end
return export._pronunciation(word, system)
end
return export