Jump to content

Module:ne-IPA-verb

From Wiktionary, the free dictionary


-- Transliteration for Nepali

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
-- consonants
['क'] = 'k', ['ख'] = 'kʰ', ['ग'] = 'ɡ', ['घ'] = 'ɡʱ', ['ङ'] = 'ŋ',
['च'] = 't͡s', ['छ'] = 't͡sʰ', ['ज'] = 'd͡z', ['झ'] = 'd͡zʱ', ['ञ'] = 'n',
['ट'] = 'ʈ', ['ठ'] = 'ʈʰ', ['ड'] = 'ɖ', ['ढ'] = 'ɖʱ', ['ण'] = 'ɳ',
['त'] = 't̪', ['थ'] = 't̪ʰ', ['द'] = 'd̪', ['ध'] = 'd̪ʱ', ['न'] = 'n',
['प'] = 'p', ['फ'] = 'pʰ', ['ब'] = 'b', ['भ'] = 'bʱ', ['म'] = 'm',
['य'] = 'j', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'w',
['श'] = 's', ['ष'] = 's', ['स'] = 's', ['ह'] = 'ɦ',

['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ɣ', ['ऴ'] = 'ɭ',
['ळ'] = 'ɭ', ['ज़'] = 'z', ['श़'] = 'ʒ', ['झ़'] = 'ʒ',
['ड़'] = 'ɽ', ['ढ़'] = 'ɽʱ', ['फ़'] = 'f', ['थ़'] = 'θ',
['द़'] = 'ð', ['ऩ'] = 'n̪', ['ऱ'] = 'ɹ', ['ॽ'] = "ʔ",
['व़'] = 'v', ['ॹ'] = 'ʒ',
-- vowel diacritics
['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'o', ['ा'] = 'ä', ['ी'] = 'i', ['ू'] = 'u', ['ृ'] = 'ri', ['ॄ'] = 'ri', ['ॢ'] = 'liɾi', ['ॣ'] = 'liɾi', ['ै'] = 'ʌi̯', ['ौ'] = 'ʌu̯', ['ॉ'] = 'ɔ', ['ॅ'] = 'æ',
-- vowel signs
['अ'] = 'ʌ', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'o', ['आ'] = 'ä', ['ई'] = 'i', ['ऊ'] = 'u', ['ऋ'] = 'ri', ['ॠ'] = 'ri', ['ऌ'] = 'liɾi', ['ॡ'] = 'liɾi', ['ऐ'] = 'ʌi̯', ['औ'] = 'ʌu̯', ['ऑ'] = 'ɔ', ['ॲ'] = 'æ', ['ऍ'] = 'æ',
-- chandrabindu
['ँ'] = '̃',
-- anusvara
['ं'] = 'ṃ',
-- visarga
['ः'] = 'ː',
-- virama
['्'] = '',
-- om
['ॐ'] = 'oːm',
-- zero-width non joiner
['‌'] = ' ͜ ',
-- zero-width joiner
['‍'] = 'ʌ',
-- diphthong marker
['ॱ'] = '̯',
-- numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
-- punctuation
['।'] = '.', -- danda
['॥'] = '.', -- double danda
['+'] = '', -- compound separator

-- abbreviation sign
['॰'] = '.',
}

local nasal_assim = {
['क'] = 'ङ', ['ख'] = 'ङ', ['ग'] = 'ङ', ['घ'] = 'ङ',
['च'] = 'ञ', ['छ'] = 'ञ', ['ज'] = 'ञ', ['झ'] = 'ञ',
['ट'] = 'ण', ['ठ'] = 'ण', ['ड'] = 'ण', ['ढ'] = 'ण',
['प'] = 'म', ['फ'] = 'म', ['ब'] = 'म', ['भ'] = 'म', ['म'] = 'म',
['त'] = 'न', ['थ'] = 'न', ['द'] = 'न', ['ध'] = 'न', ['न'] = 'न', ['ष'] = 'न', ['श'] = 'ङ', ['स'] = 'न',
['य'] = 'म', ['र'] = 'म', ['ल'] = 'ँ', ['व'] = 'म', ['ह'] = 'ङ',
}
local perm_cl = {
['ज्न'] = true, ['ज्ञ'] = true, ['ट्र'] = true, ['ड्र'] = true, ['ट्स'] = true, ['ड्स'] = true, ['स्ड'] = true
}

local all_cons, special_cons = 'कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह', 'कखगघचछजझटठडढणतथदधनपफबभमयरलवशषसह'
local vowel, vowel_sign = 'aिुृेोाीूैौॉॅॆॊॄॢॣ',  'अइउएओआईऊऋॠॡऌऐऔऑऍ'
local syncope_pattern = '(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])ʌ(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local nor_cons, sp_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवणनमयरलवनम', 'कलम'
local vowel, vowel_sign = 'aिुृेोाीूैौॉॅॆॊॄॢॣ', 'अइउएओआईऊऋॠॡऌऐऔऑऍ'
local koka_sign = 'ोीाैे'
local koka_pattern = '([' .. koka_sign .. '])(़?[' .. sp_cons .. '])ʌ(़?[' .. gsub(nor_cons, "य", "") .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d)
return c .. (d == "" and 'ʌ' or d) end)
for word in mw.ustring.gmatch(text, "[ऀ-ॿʌ]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^ʌ(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)
return (((match(first, '[' .. special_cons .. ']') and match(second, 'ं')
or match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])
or match(first .. second, 'य[aिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]') or match(first .. second, 'ह[अaिुृेोाीूैौॉॅॆॊआईउऊइएऐओऔʌ]'))
and 'ʌ' or "") .. opt .. first .. second .. third end)

while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4%5%6%7%8%9')
end
while match(word, koka_pattern) do

word = gsub(word, koka_pattern, '%1%2%3%4')
end
word = gsub(word, '(.?)ं(.)', function(succ, prev)
return succ .. (succ..prev == "ʌ" and "्म" or
(succ == "" and match(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "̃")) .. prev end)

local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, 'ईन$', 'inʌ')
text = gsub(text, '(...)ईन ', '%1inʌ ')
text = gsub(text, 'इन$', 'inʌ')
text = gsub(text, '(...)इन ', '%1inʌ ')
text = gsub(text, 'ैन$', 'ʌi̯nʌ')
text = gsub(text, '(...)ैैैैन ', '%1ʌi̯nʌ ')
text = gsub(text, 'उन$', 'unʌ')
text = gsub(text, '(...)उन ', '%1unʌ ')
text = gsub(text, 'ुन$', 'unʌ')
text = gsub(text, '(...)ुन ', '%1unʌ ')
text = gsub(text, 'िन$', 'inʌ')
text = gsub(text, '(...)िन ', '%1inʌ ')
text = gsub(text, 'िछ$', 'it͡sʰʌ')
text = gsub(text, '(...)िछ ', '%1it͡sʰʌ ')
text = gsub(text, 'उछ$', 'ut͡sʰʌ')
text = gsub(text, '(...)उछ ', '%1ut͡sʰʌ ')
text = gsub(text, 'इछ$', 'it͡sʰʌ')
text = gsub(text, '(...)इछ ', '%1it͡sʰʌ ')
text = gsub(text, 'एछ$', 'et͡sʰʌ')
text = gsub(text, 'ेछ$', 'et͡sʰʌ')
text = gsub(text, '(...)ेछ ', '%1et͡sʰʌ ')
text = gsub(text, '(...)ेन ', '%1enʌ ')
text = gsub(text, 'ेन$', 'enʌ')
text = gsub(text, '(...)एन ', '%1enʌ ')
text = gsub(text, 'एर$', 'eɾʌ')
text = gsub(text, '(...)एर ', '%1eɾʌ ')
text = gsub(text, 'ेर$', 'eɾʌ')
text = gsub(text, '(...)ेर ', '%1eɾʌ ')
text = gsub(text, 'एन$', 'enʌ')
text = gsub(text, 'उँछ$', 'ũt͡sʰʌ')
text = gsub(text, '(...)उँछ ', '%1ũt͡sʰʌ ')
text = gsub(text, 'ज्ञ', 'ɡi̯')
end
text = gsub(text, '.़?', conv)
text = gsub(text, 'ʌ([iu])̯̃', 'ʌ̃%1̯̃')
text = gsub(text, '([ʌ])̃([iu])̯', '%1̃%2̯')
text = gsub(text, "[<>]", "")
text = gsub(text, "ॱ", "")

text = gsub(text, 'dʌt͡sʰ$', 'dʌt͡sʰʌ')
text = gsub(text, '(...)dʌt͡sʰ ', '%1dʌt͡sʰʌ ')
text = gsub(text, 'ʌi̯n$', 'ʌi̯nʌ')
text = gsub(text, '(...)ʌi̯n ', '%1ʌi̯nʌ ')
text = gsub(text, 'nʌt͡sʰ$', 'nʌt͡sʰʌ')
text = gsub(text, '(...)nʌt͡sʰ ', '%1nʌt͡sʰʌ ')
text = gsub(text, 'wʌi̯', 'bʌi̯')
text = gsub(text, 'w$', 'b')
text = gsub(text, '()jʌ([cspdtngkbrzjyṇṣśṇɾṅñṃ])', 'e%2')
text = gsub(text, '([aʌäāiīuūeoŏĕɔæɛʌ̃ä̃ĩũī̃ū̃ẽõɔ̃e̤])([gɡbtṭdmhncjvwṛṛ̃ṅɽśṣɳszʐlkpḍç])(̞?)([ʰhʱ]?)([wvb])([ʌaä])',
          '%1%2%2%4o')  ----new
text = gsub(text, 'ji([cspdtngkbrzjyṇṣśṇɾṅñṃ])', 'i%1')
text = gsub(text, '([kɡtdʈṭpb])(̪?)([ʰʱ]?)([ %.ˈ]?)([kɡtdṭʈpb])([ʰʱ]?)', '%1%2%3̚%4%5%6') ------new
text = gsub(text, '(...)w ', '%1b ')
text = gsub(text, '([rʌäiueo])([r])w', '%1rb')
text = gsub(text, 'w([iewuojr])', 'b%1')
text = gsub(text, '([w])ʌ([krjtcṅñysśdpɦhn])([tnrṇṣcśkghjɦsueoayd])', 'bʌ%2%3')
text = gsub(text, '([w])ä([cgjṇtdmyshɦśṣn])', 'bä%2')
text = gsub(text, '([w])ä([rɾ])([tdābuṇɦṣh])', 'bä%2%3')
text = gsub(text, '([w])ä([l])([m])', 'bä%2%3')
text = gsub(text, '([w])ʌ([sśṣṅñṃyjpdtnc])', 'bʌ%2')
text = gsub(text, '([ʌäiueoŏĕ])([nl])([td]̪)', '%1%2̪%3') -- dental assimilation
text = gsub(text, '([ʌäiueoŏĕ])n([ʈɖ])', '%1ɳ%2') -- retroflex aassimilation
text = gsub(text, '([l])([ʈɖ])', 'ɭ%2')
text = gsub(text, '([ʌʌ̃äaāiuūəãā̃ī̃ĩũū̃ẽõeeo̯o ̤])r([ʌʌ̃äaāiīuūəãā̃ī̃ĩũū̃ẽõeeyo̯o])', '%1ɾ%2')
text = gsub(text, '()([śsnlcjzkhptdgb])([vw])([aāäiīuūoeĩ])([cspdtngkbrjyṇṣśɾṅñṃ])', '%2w%4%5')

text = gsub(text, '([n])([ʌ])d͡z([ʱ]?)([ʌäiueo])', '%1%2d͡z%3%4')

text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([k])([ʰ])', '%1k̞ʰ')
text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])([d]͡)(z)([ʱ]?)', '%1(d)z')
text = gsub(text, '([spdtzʱʰɦgkbrjyɖʈṣśɾṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])b(ʱ)([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤jː])', '%1%2b%4')
text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])pʰ', '%1ɸ')
text = gsub(text, '([spdtzʱʰɦgkbrjyɖʈṇṣśɾṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõjɔ̃e̤ː])d̪ʱ', '%1%2d̪')
text = gsub(text, '([spdtzʱʰɦgkbrjyɖʈṇṣśɾṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃je̤ː])ɡ(ʱ?)', '%1%2ɡ̞')
text = gsub(text, 't͡st͡s(ʰ?)', 't̚t͡s%1')
text = gsub(text, 'd͡zd͡z(ʱ?)', 'd̚d͡z%1')
text = gsub(text, '([ spdtzʱʰɦgkbrjyɖʈṇṣśṇɾṅñṃ̪])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤])ɦ%2', '%1%2̤ː')
text = gsub(text, '([ʌʌ̃aã])ɦ([äāā̃ä̃])', '%2̤ː')
text = gsub(text, '([äāā̃ä̃])ɦ([ʌʌ̃aã])', '%1̤ː')
--text = gsub(text, '([ʌäeoɔæɛʌ̃ä̃ẽõɔ̃e̤])ɦ([iuĩũ])', '%1%2')
text = gsub(text, '([iĩ])ɦ([ũu])', '%1%2')
text = gsub(text, '([uũ])ɦ([iĩ])', '%1%2')
text = gsub(text, '([uũ])ɦ([äʌ])', '%1%2')
--text = gsub(text, '([ʌʌ̃iĩ])ɦ([eẽoõ])', '%1%2̤')
text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̯e̤ː])ɖ(ʱ?)(j?)', '%1ɽ%3')
text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦr([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])', '%1ɾ%2')
text = gsub(text, '([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])ɦ([n])([ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])', '%1̤ː%2%3')
text = gsub(text, '([spdtzʱʰɦgkbrjyṇṣśʈɖnṇɾṅñṃ̪ʌäiueoɔæɛʌ̃ä̃ĩũẽõɔ̃e̤ː])jʌ', '%1e')
text = gsub(text, 'ɦri', 'ri')

text = gsub(text, 'kʌn$', 'kʌnʌ')
text = gsub(text, '(...)kʌn ', '%1kʌnʌ ')
text = gsub(text, 'nʌʌ$', 'nʌ')
text = gsub(text, 'ñɡi̯', 'ŋgi̯')
text = gsub(text, 'ː̃', 'ː')
text = gsub(text, 'ː̤ː', 'ː')
return mw.ustring.toNFC(text)
end
return export