Module:pa-Arab-translit
Appearance
- The following documentation is located at Module:pa-Arab-translit/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox (diff)
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:pa-Arab-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
Introduction
....
Exceptions
The following words do not need any diacritics:
Notes
- نْ should be written as ن٘ when it is from an inherited nasal vowel (i.e. most cases)
- Shahmukhi distinguishes Sukoon/Jazm diacritic ءْ with the absence of a diacritic ء;
- Sukoon/Jazm should be used where there is a true consonant cluster, such as in تْرے (trai), بْھرا (bhrā), گْراں (grāṉ) etc.
- The diacritic should be absent where there is a weak or semi-deleted schwa ([ə̆] or [ᵊ], rarely also [ɪ̆] and [ʊ̆]), most commonly at the end of words such as in مَجّھ (majjhă), ناࣇ (nāḷĭ) etc. but also medially as in چَھڈّݨا (chaḍḍăṇā), دَسّݨا (dassăṇā) etc.
- Final ہ (h) should be written double if it joins to the previous letter, such as in مُون٘ہہ (mūṉh), ایہہ (eh) but not اوہ (oh). This does not apply when final ہ (h) is used as a vowel, e.g. in چُوچَہ (cūcā) or بارَہ (bārhā̃).
To do list
- Sort out any problems with diphthongs
- make sure all diphthongs are represented correctly
- change īū -> iyū
- Arabic loanwords
- al and non al words
- archaic ha murtaba tah
- Diacritics detector
- Distinction between a schwa and loanwords ـہ | need not diacritics when a schwa ('..a') and need when loanword ('..ah')
Working Template Examples
Headword-line templates
With replacing header:
{{pa-noun|gur=ਮਸੀਤ|g=f|head=مَسِیت}}
Check: مسیت (example can't be shown on module documentation)
Without (needing to) replacing header:
{{pa-noun|g=f|gur=ਅਲਮਾਰੀ}}
Check: الماری (example can't be shown on module documentation)
Other templates
{{ux|pa|مَیں کَلّھ لَندَن جا رہا واں۔|I am going to London tomorrow.}}:
مَیں کَلّھ لَندَن جا رِہا واں۔
- maiṉ kallh landan jā rihā vāṉ.
- I am going to London tomorrow.
Status
Last updated: 23/08/2021
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"
local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local vao = "و"
local aspirate = 'ھ'
local nasal = 'ں'
local consonants = "بٻبپتثجڄڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنںݨوہھٹڈڑ"
local consonantS = "بٻبپتثجڃڄچحخدݙذرزژسشصضطظعغفقکگڳلࣇمنݨہھٹڈڑ"
local consonantS2 = "یٻببپتثجڃڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنݨوہھٹڈڑ"
local vowels = "اآیئےۓوؤ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"
local mapping = {
["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["پ"] = 'p', ["ت"] = 't', ["ٹ"] = 'ṭ', ["ث"] = 's̱',
["ج"] = 'j', ["ڄ"] = 'ǰ', ["چ"] = 'c', ["ح"] = 'ḥ', ["خ"] = 'x',
["د"] = 'd', ["ڈ"] = 'ḍ', ["ݙ"] = 'ḏ', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڑ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž',
["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ',
["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ق"] = 'q',
["ک"] = 'k', ["گ"] = 'g', ["ڳ"] = 'g̈', ["ݨ"] = 'ṇ', ["ࣇ"] = 'ḷ',
["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'v', ["ہ"] = 'h', ["ی"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',
["ھ"] = "h",
["ؤ"] = "'o",
-- diacritics
[zabar] = "a",
[zer] = "i",
[pesh] = "u",
[jazm] = "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۓ"] = "-ye",
["ۀ"] = "h-e" -- he ye (in izafat)
}
function export.tr(text, lang, sc)
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, '([' .. consonants .. '])' .. ye .. vao .. nasal, "%1eyoṉ") -- needs to be fixed
text = gsub(text, '([' .. consonants .. '])' .. ye .. vao, "%1eyo") -- needs to be fixed
text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])' , "%1e%2") -- needs to be fixed
text = gsub(text, '([' .. consonants .. '])' .. ye .. alif, "%1eyā") -- needs to be fixed
text = gsub(text, zabar .. aspirate .. "(ی)" , "hai") -- needs to be fixed
text = gsub(text, zabar .. aspirate .. "(و)" , "hau") -- needs to be fixed
text = gsub(text, "ئے", "'e")
text = gsub(text, "َے", "ai")
text = gsub(text, "ے", "e")
text = gsub(text, "ہہ", "h")
text = gsub(text, "اے", "e")
text = gsub(text, "اَے", "ai")
text = gsub(text, "ن٘", "ṉ")
text = gsub(text, "اللہ", "allāh")
text = gsub(text, "ؤ" .. pesh, "ū") -- needs to be fixed
--text = gsub(text, "ُھوِیں", "vīṉ")
text = gsub(text, "([" .. pesh .. aspirate .. "])" .. "وِیں", "%1vīṉ")
-- diacritics
text = gsub(text, "([" .. consonants .. "])" .. zer .. ye .. alif, "%1īyā")
text = gsub(text, pesh .. vao .. jazm .. "", "ū")
text = gsub(text, "([" .. consonants .. "])" .. zabar .. vao, "%1au")
text = gsub(text, "([" .. consonants .. "])" .. zabar .. ye, "%1ai")
text = gsub(text, "([" .. alif .. consonants .. "])" .. zabar .. ye3, "%1ai")
text = gsub(text, "([" .. consonants .. "])" .. zer .. ye, "%1ī")
text = gsub(text, jazm .. alif, "ā")
text = gsub(text, ye2 .. zer .. ye, "'ī")
text = gsub(text, "" .. alif .. ye2 .. "([" .. consonants .. "])", "ā'i%1") -- needs to be fixed
-- Initial alif
text = gsub(text, alif .. zer, "ī")
text = gsub(text, alif .. zabar .. '([' .. consonantS .. '])', "a%1")
text = gsub(text, alif .. zabar .. vao .. jazm .. "", "au")
text = gsub(text, alif .. vao .. jazm .. "", "o")
text = gsub(text, alif .. zabar .. ye .. jazm .. "", "ai")
text = gsub(text, alif .. ye .. jazm .. "", "e")
text = gsub(text, alif .. pesh .. vao, "ū")
text = gsub(text, alif .. pesh .. vao .. jazm .. "", "ū")
text = gsub(text, alif .. pesh, "u")
-- do-chashme-he zabar, zer, pesh / no need to mess about
--- works for short vowels
text = gsub(text, "([" .. ZZP .. "])" .. aspirate, "h%1")
text = gsub(text, pesh .. aspirate .. vao .. jazm .. "", "ū")
text = gsub(text, zabar .. aspirate .. vao .. jazm .. "", "ai")
text = gsub(text, '([' .. consonants .. '])' .. aspirate .. ye .. jazm .. "", "%1he")
-- Tashdeed
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "%1%1")
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1%1%2")
text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1yy%2")
text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1vv%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1an")
-- khari zabar -- / no need to mess about
text = gsub(text, '([' .. vowels .. '])' .. 'ٰ', "á")
text = gsub(text, '([' .. consonants .. '])' .. 'ٰ' .. '([' .. vowels .. '])', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "ā‘")
text = gsub(text, ain .. alif .. '([' .. consonants .. '])', "ʻā%1")
text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1ʻa")
text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zabar .. vao .. '([' .. consonants .. '])', "‘au%1")
text = gsub(text, ain .. zabar .. ye .. '([' .. consonants .. '])', "‘ai%1")
text = gsub(text, ain .. zer .. '([' .. consonants .. '])', "ʻi%1")
text = gsub(text, ain .. pesh .. '([' .. consonants .. '])', "ʻu%1")
text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ʻī%1")
text = gsub(text, ain .. pesh .. vao .. '([' .. consonantS .. '])', "ʻū%1")
--- alif
text = gsub(text, '([' .. consonants .. '])' .. zabar .. alif, "%1ā")
text = gsub(text, '([' .. consonants .. '])' .. alif, "%1ā")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1%1ā")
-- Vao
text = gsub(text, vao .. '([' .. ZZP .. '])', "v%1")
text = gsub(text, 'ُو', "ū")
text = gsub(text, '([' .. consonants .. '])' .. zabar .. vao .. alif, "%1avā")
-- medial/final consonants
--- (e) -- works
text = gsub(text, '([' .. consonants .. '])' .. ye .. jazm .. '([' .. consonants .. '])', "%1e%2")
text = gsub(text, '([' .. consonants .. '])' .. ye3, "%1e")
text = gsub(text, '([' .. consonants .. '])' .. zabar .. ye3, "%1ai")
--- izafat
text = gsub(text, '([' .. consonants .. '])' .. zer .. " ", "%1-e ")
--- he
text = gsub(text, zabar .. he .. zer .. ye, "ahī")
text = gsub(text, zabar .. he .. alif, "ahā")
text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ah%1")
--- vao
text = gsub(text, '([' .. consonants .. '])' .. vao, "%1o")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. vao, "%1%1o")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. zer .. ye .. jazm .. alif, "%1%1īyā")
text = gsub(text, zer .. ye .. jazm .. alif, "īyā")
text = gsub(text, zer .. ye .. alif, "iyā")
--- ye
text = gsub(text, ye .. zabar .. alif, "yā")
text = gsub(text, '([' .. consonants .. zer .. '])' .. ye, "%1ī")
text = gsub(text, "ۂ", "a-e")
text = gsub(text, '.', mapping)
-- Changed these to 'iy(*)', because they will be used for with ی, which are normally written as 'iy'
text = gsub(text, 'īā', "iyā")
text = gsub(text, 'īa', "iya")
text = gsub(text, 'aa', "ā")
--
text = gsub(text, 'ئy', "'ī")
text = gsub(text, "" .. 'ئے', "'ye")
text = gsub(text, "īے", "iye")
text = gsub(text, "iīe", "iye")
text = gsub(text, "īe", "iye")
text = gsub(text, "iīv", "iyo")
text = gsub(text, "ئiy", "'ī")
return text
end
return export