Module:ps-translit
Jump to navigation
Jump to search
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox (diff)
This module will transliterate Pashto language text per WT:PS TR.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:ps-translit/testcases.
Functions
[edit]tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zwar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local zwarakay = U(0x659) -- Pashto /ə/
local jazm = "ْ"
local he = "ه"
local ain = 'ع'
local alif = 'ا'
local ye = 'ي'
local ye2 = "ےی"
local ye3 = 'ې'
local waw = "و"
local nasal = 'ں'
local consonants = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼوه"
local consonantS = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼه"
local consonantS2 = "بپتټثجځچڅحخدډذرړزژږسشښصضطظعغفقکګلمنڼوهي"
local vowels = "اآیېيۍئےو"
local vowels2 = "آیېيۍئےو"
local semivowels = "وي"
local hes = "هح"
local diacritics = "َُِّْٰٙ"
local ZZPZ = "َُِٙ"
local mapping = {
["آ"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ټ"] = 'ṭ', ["ث"] = 's̱',
["ج"] = 'j', ["ځ"] = 'ź', ["چ"] = 'č', ["څ"] = 'ś', ["ح"] = 'ḥ', ["خ"] = 'x',
["د"] = 'd', ["ډ"] = 'ḍ', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ړ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["ږ"] = 'ǵ',
["س"] = 's', ["ش"] = 'š', ["ښ"] = 'x̌', ["ص"] = 'ṣ', ["ض"] = 'ẓ',
["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ǧ', ["ف"] = 'f', ["ق"] = 'q',
["ک"] = 'k', ["ګ"] = 'g', ["ڼ"] = 'ṇ',
["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'w', ["ه"] = 'h', ["ي"] = 'y', ["ں"] = 'ṉ',
["ؤ"] = "wë", ["ۍ"] = "ëy", ["ئ"] = 'ëy', ["ې"] = 'e', ["ۀ"] = 'ë', ["ی"] = 'y', ["ے"] = 'y',
-- diacritics
[zwar] = "a",
[zer] = "ĭ",
[pesh] = "ŭ",
[zwarakay] = "ë",
[jazm] = "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
}
function export.tr(text, lang, sc)
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
-- hastags now mark the beginning and end of a word
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, "ن٘", "ṉ")
text = gsub(text, "الله", "allâh")
-- diacritics
text = gsub(text, pesh .. waw .. jazm .. "", "u")
text = gsub(text, jazm .. alif, "â")
-- Initial alif
text = gsub(text, alif .. zwar .. '([' .. consonantS .. '])', "a%1")
text = gsub(text, alif .. zer .. ye .. jazm .. "", "i")
text = gsub(text, alif .. zer, "ĭ")
text = gsub(text, alif .. waw .. jazm .. "", "o")
text = gsub(text, alif .. pesh .. waw, "u")
text = gsub(text, alif .. pesh .. waw .. jazm .. "", "u")
text = gsub(text, alif .. pesh, "ŭ")
-- Tashdeed
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "%1%1")
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZPZ .. '])', "%1%1%2")
text = gsub(text, '([' .. ZZPZ .. '])' .. ye .. '([' .. ZZPZ .. '])' .. tashdid, "%1yy%2")
text = gsub(text, '([' .. ZZPZ .. '])' .. waw .. '([' .. ZZPZ .. '])' .. tashdid, "%1ww%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZPZ .. '])' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1an")
-- tall zwar -- / no need to mess about
text = gsub(text, '([' .. vowels .. '])' .. 'ٰ', "á")
text = gsub(text, '([' .. consonants .. '])' .. 'ٰ' .. '([' .. vowels .. '])', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "â‘")
text = gsub(text, ain .. alif .. '([' .. consonants .. '])', "ʻâ%1")
text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1ʻa")
text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zer .. '([' .. consonants .. '])', "ʻĭ%1")
text = gsub(text, ain .. pesh .. '([' .. consonants .. '])', "ʻŭ%1")
text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ʻi%1")
text = gsub(text, ain .. pesh .. waw .. '([' .. consonantS .. '])', "ʻu%1")
--- alif
text = gsub(text, '([' .. consonants .. '])' .. zwar .. alif, "%1â")
text = gsub(text, '([' .. consonantS2 .. '])' .. alif, "%1â")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1%1â")
text = gsub(text, "#" .. alif .. ye, "i")
text = gsub(text, "#" .. alif .. waw, "o")
text = gsub(text, "#" .. alif .. ye3, "e")
text = gsub(text, '([' .. consonantS2 .. '])' .. alif .. ye .. waw, "%1âyo")
-- waw
text = gsub(text, waw .. '([' .. ZZPZ .. '])', "w%1")
text = gsub(text, 'ُو', "u")
-- medial/final consonants
--- (e) -- works
text = gsub(text, '([' .. consonants .. '])' .. ye .. jazm .. '([' .. consonants .. '])', "%1i%2")
--- he
text = gsub(text, '([' .. consonantS2 .. '])' .. zwar .. he .. zer .. ye, "%1ahi")
text = gsub(text, '([' .. consonantS2 .. '])' .. zwar .. he .. alif, "%1ahâ")
text = gsub(text, zwar .. he .. '([' .. consonants .. vowels .. '])', "ah%1")
text = gsub(text, '([' .. consonantS2 .. '])' .. zwar .. he, "%1ah")
text = gsub(text, '([' .. consonantS .. '])' .. he .. "#", "%1a")
text = gsub(text, jazm .. waw .. he, "wa")
text = gsub(text, jazm .. ye .. he, "ya")
text = gsub(text, '([' .. vowels2 .. diacritics .. '])' .. '([' .. semivowels .. '])' .. he .. "#", "%1%2a")
text = gsub(text, '([' .. consonantS .. '])' .. waw .. he .. "#", "%1oh")
text = gsub(text, '([' .. consonantS .. '])' .. ye .. he .. "#", "%1ih")
--- waw
text = gsub(text, '([' .. consonants .. '])' .. waw, "%1o")
text = gsub(text, ye .. waw, "yo")
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. waw, "%1%1o")
text = gsub(text, zer .. ye .. waw, "io")
text = gsub(text, '([' .. consonantS2 .. '])' .. ye .. waw .. "#", "%1yo")
text = gsub(text, '([' .. consonantS2 .. '])' .. ye .. waw .. '([' .. consonantS .. '])', "%1yo%2")
text = gsub(text, '([' .. consonantS2 .. '])' .. ye .. waw .. jazm, "%1iw")
text = gsub(text, '([' .. diacritics .. '])' .. '([' .. semivowels .. '])' .. waw, "%1%2o")
text = gsub(text, "#" .. ye .. waw, "yo")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. zer .. ye .. jazm .. alif, "%1%1iyâ")
text = gsub(text, '([' .. consonants .. '])' .. zer .. ye .. alif, "%1iâ")
--- ye
text = gsub(text, ye .. zwar .. alif, "yâ")
text = gsub(text, '([' .. consonants .. zer .. '])' .. ye .. '([' .. consonantS .. '])', "%1i%2")
text = gsub(text, '([' .. consonants .. zer .. '])' .. ye .. "#", "%1i")
text = gsub(text, '([' .. consonantS .. '])' .. '([' .. ye2 .. '])', "%1a%2")
text = gsub(text, '([' .. diacritics .. '])' .. '([' .. semivowels .. '])' .. '([' .. ye2 .. '])', "%1%2a%3")
text = gsub(text, "#" .. '([' .. semivowels .. '])' .. '([' .. ye2 .. '])', "%1a%2")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = gsub(text, '.', mapping)
text = gsub(text, 'ĭy', "i")
text = gsub(text, 'ŭw', "u")
text = gsub(text, 'ĭi', "i")
text = gsub(text, 'ŭu', "u")
text = gsub(text, "اa", "a")
text = gsub(text, 'aa', "â")
--
return text
end
return export