Module:User:Sinonquoi/ks-pa-translit
Jump to navigation
Jump to search
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Sinonquoi, for their own experimentation. Items in this module may be added and removed at Sinonquoi's discretion; do not rely on this module's stability.
local u = mw.ustring.char -- unicode
local gsub = mw.ustring.gsub -- string manipulation
-- TODO
-- 1. [HACK] Aspirate aspirables
-- 2. Vowels with fixed characters (ā, ạ̄, ū, o, ō, e, ē, ọ, ọ̄)
-- 3. Initial variants of vowels (with alef)
-- 4. Medial variants of vowels (diacritics or standalone)
-- 5. Final variants of vowels (same as medial except: e, ē)
-- 6. Treat final yē and vāv differently depending on what comes before
-- 7. Treat final hē with vowel diacritic
-- 8. Fixed consonants and provided vowels
-- 9. [DONE] Disregard hat for nūn
-- 10. [DONE] Work around hat for rē
-- 11. [DONE] yē with hat is a palatal
-- 12. [DONE] Kashmiri yē medial is 'a; final '
-- 12b. Check support for compound words
-- 13. Add vowels to consonants
-- 14. Add vowels to dual role characters
-- How to add vowels
-- Make list of consonants and vowels
-- Check if conditions are met (C+V; alef initial+V; vowel carrier + V)
-- Change
local export = {}
local aspirable = "پتٹچژک"
local aspirate_h = "ھ"
local hattable = "یر"
local vowel_diacritics = u(0x064E) .. u(0x064F) .. u(0x0650) .. u(0x0654) .. u(0x0655) .. u(0x065F)
-- DIACRITICS
local v_sign = u(0x065A) -- V
local inverted_v_sign = u(0x065B) -- inverted V
local hats = v_sign .. inverted_v_sign
local conv = {
['ب'] = 'b', ['پ'] = 'p', ['ت'] = 't', ['ٹ'] = 'ṭ', ['ث'] = 's',
['ج'] = 'j', ['چ'] = 'c', ['ح'] = 'h', ['خ'] = 'kh',
['د'] = 'd', ['ڈ'] = 'ḍ', ['ذ'] = 'z',
['ر'] = 'r', ['ڑ'] = 'ḍ', ['ز'] = 'z', ['ژ'] = 'ċ',
['س'] = 's', ['ش'] = 'ś', ['ص'] = 's', ['ض'] = 'z',
['ط'] = 't', ['ظ'] = 'z',
['ع'] = 'ʿ', ['غ'] = 'ġ',
['ف'] = 'f', ['ق'] = 'q',
['ک'] = 'k', ['گ'] = 'g',
['ل'] = 'l', ['م'] = 'm', ['ن'] = 'n',
['ھ'] = 'h',
-- treatment varies
['ه'] = 'h',
-- extended set
['ی'] = 'y', ['و'] = 'v',
}
local vowels_conv = {
[u(0x064E)] = 'a', [u(0x064F)] = 'u', [u(0x0650)] = 'i', [u(0x0654)] = 'ạ', [u(0x0655)] = 'ụ', [u(0x065F)] = 'ụ̄',
}
local consonants = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھ'
local consonants_extended = 'بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھوی'
function export.transliterate(text)
-- ASPIRATE
-- text = gsub(text, aspirable .. aspirate_h, "hhhh")
-- REMOVE HAT FROM NŪN and RĒ
text = gsub(text, '([نر])' .. inverted_v_sign, "%1")
-- C2=r/palatal
text = gsub(text, '([' .. hattable .. '])([' .. vowel_diacritics .. '])' .. inverted_v_sign, "%1%2")
-- YĒ with INVERTED HAT
text = gsub(text, 'ی' .. inverted_v_sign, "\'")
-- FINAL HALF-YĒ IS A PALATAL
text = gsub(text, 'ؠ$', "\'")
-- BEFORE A SPACE
text = gsub(text, 'ؠ[ ]+', "\' ")
-- MEDIAL HALF-YĒ IS 'a
text = gsub(text, '([' .. consonants .. '])ؠ([' .. consonants .. '])', "%1\'a%2")
-- CONSONANT + VOWEL
text = gsub(text,
'([' .. consonants_extended .. '])([' .. vowel_diacritics .. '])',
function(c,v)
return conv[c] .. vowels_conv[v]
end)
-- text = gsub(text, '([' .. vowel_diacritics .. '])', vowels_conv)
-- FINAL HE + VOWEL
-- text = gsub(text, 'ہ([' .. short_vowels_list .. '])$', short_vowels)
text = gsub(text, '[بپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنھ]', conv)
return text
end
return export