Module:User:OblivionKhorasan/pnb-testing-2
Appearance
- This module sandbox lacks a documentation subpage. Please create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"
local punctuation = "%-:%(%)%[%]*&٫؛؟،ـ«\".\'!»٪؉۔"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ے'
local ye3 = 'ئ'
local vaw = "و"
local nasal = 'ں'
local nunghunna = 'ن٘'
local aspirate = 'ھ'
local consonants = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئیں"
local nonhe = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوھئیں"
local nonye = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئں"
local nonvaw = "ٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨہھئیں"
local anything = "بٻپتٹثجڄچحخدڊڈڋݙذرڑزژسشصضطظعغفقکݢگڳلࣇمنݨوہھئیںاےؤ"
local vowels = "ایئےۓوؤ"
local indvowels = "آایےوؤ"
local semivowel = "یو"
local hes = "ہح"
local diacritics = "ًَُِّْٰ"
local ZZP = "َُِ"
local mapping = {
["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'b̤', ["پ"] = 'p', ["ت"] = 't', ["ٹ"] = 'ṭ', ["ث"] = 's̱',
["ج"] = 'j', ["ڄ"] = 'j̈', ["چ"] = 'c', ["ح"] = 'ḥ', ["خ"] = 'x',
["د"] = 'd', ["ڈ"] = 'ḍ', ["ݙ"] = 'd̤', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڑ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž',
["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ',
["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ق"] = 'q',
["ک"] = 'k', ["گ"] = 'g', ["ڳ"] = 'g̈', ["ݢ"] = 'ŋ', ["ݨ"] = 'ṇ', ["ࣇ"] = 'ḷ',
["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'v', ["ہ"] = 'h', ["ی"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',
["ئ"] = 'H',
["أ"] = '',
-- diacritics
[zabar] = "a",
[zer] = "i",
[pesh] = "u",
[jazm] = "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
}
function export.tr(text, lang, sc)
--define the "end" of a word
text = gsub(text, "#", "HASHTAG")
text = gsub(text, " | ", "# | #")
text = gsub(text, "\n" , "#".."\n" .. "#")
text = gsub(text, "(["..punctuation.."])" , "#".."%1" .. "#")
text = "##" .. gsub(text, " ", "# #") .. "##"
-- hastags now mark the beginning and end of a word
-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere
text = gsub(text, "الله", "allāh")
-- diacritics
text = gsub(text, '([' .. consonants .. '])' .. '([' .. diacritics .. '])' .. aspirate, "%1h%2")
-- Initial alif
text = gsub(text, alif .. '([' .. ZZP .. '])', "%1")
text = gsub(text, "#" .. alif .. vaw, "o")
text = gsub(text, "#" .. alif .. ye .. '([' .. jazm .. consonants .. '])', "e")
text = gsub(text, "#" .. alif .. ye .. alif, "eyā")
-- Tashdeed
text = gsub(text, '([' .. consonants .. '])' .. tashdid, "%1%1")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1%1%2")
text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1yy%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1%1%2")
-- tanween diacritic / no need to mess about
text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1an")
text = gsub(text, alif .. 'ً', "an")
text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1an")
-- tall zabar -- / no need to mess about
text = gsub(text, '([' .. vowels .. '])' .. 'ٰ', "á")
text = gsub(text, '([' .. consonants .. '])' .. 'ٰ' .. '([' .. vowels .. '])', "%1á")
-- ‘ain
text = gsub(text, alif .. ain , "ā‘")
text = gsub(text, ain .. alif .. '([' .. consonants .. '])', "ʻā%1")
text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2ʻ")
text = gsub(text, ain .. zer .. '([' .. consonants .. '])', "ʻi%1")
text = gsub(text, ain .. pesh .. '([' .. consonants .. '])', "ʻu%1")
text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ʻī%1")
text = gsub(text, ain .. pesh .. vaw .. '([' .. consonants .. '])', "ʻū%1")
--- alif
text = gsub(text, '([' .. consonants .. diacritics .. '])' .. alif, "%1ā")
-- vaw
-- medial/final consonants
--- (e) -- works
text = gsub(text, '([' .. consonants .. '])' .. ye .. jazm .. '([' .. consonants .. '])', "%1e%2")
text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])', "%1e%2")
text = gsub(text, '([' .. nonye .. '])' .. ye .. alif, "%1eyā")
text = gsub(text, '([' .. nonye .. '])' .. ye .. alif, "%1eyā")
--- he
--- vaw
--- ye
text = gsub(text, zabar .. ye2, "ai")
text = gsub(text, ye2, "e")
-- get rid of hashtags (not needed)
text = gsub(text, "#", "")
text = gsub(text, "HASHTAG", "#")
text = gsub(text, '.', mapping)
text = gsub(text, 'n٘', "ṉ")
text = gsub(text, 'H', "'")
--
return text
end
return export