Module:User:Babr/Sandbox
Jump to navigation
Jump to search
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Babr, for their own experimentation. Items in this module may be added and removed at Babr's discretion; do not rely on this module's stability.
local export = {}
local m_string_utils = require("Module:string utilities")
local gcodepoint = m_string_utils.gcodepoint
local rfind = m_string_utils.find
local rsubn = m_string_utils.gsub
local rmatch = m_string_utils.match
local rsplit = m_string_utils.split
local U = m_string_utils.char
local fatHataan = U(0x64B) -- an
local Dammataan = U(0x64C) -- un
local kasrataan = U(0x64D) -- in
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652)
local he = U(0x647)
local zwnj = U(0x200C)
local highhmz = U(0x654)
local lrm = U(0x200e) -- left-to-right mark
local rlm = U(0x200f) -- right-to-left mark
local alif = "ا"
local alif_madd = "آ"
local hamza = "ء"
local yaa = "ی" -- farsi ye
local hamza_yaa = "ئ"
local hamza_waaw = "ؤ"
local waaw = "و"
local dagger_alif = U(0x670)
local marbuta = U(0x629)
local returning_yaa = "ے" -- baRi ye
local mapping = {
["آ"] = "ā",
["ب"] = "b",
["پ"] = "p",
["ت"] = "t",
["ث"] = "s",
["ج"] = "j",
["چ"] = "č",
["ح"] = "h",
["خ"] = "x",
["د"] = "d",
["ذ"] = "z",
["ر"] = "r",
["ز"] = "z",
["ژ"] = "ž",
["س"] = "s",
["ش"] = "š",
["ص"] = "s",
["ض"] = "z",
["ط"] = "t",
["ظ"] = "z",
["غ"] = "ğ",
["ف"] = "f",
["ق"] = "q",
["ک"] = "k",
["گ"] = "g",
["ل"] = "l",
["م"] = "m",
["ن"] = "n",
["و"] = "ō",
["ی"] = "ē",
["۔"] = ".",
["ه"] = "h",
["ع"] = "'",
["ء"] = "'",
["ئ"] = "'",
["ؤ"] = "'",
["أ"] = "'",
-- diacritics
[zabar] = "a",
[zer] = "i",
[pesh] = "u",
[fatHataan] = "an",
[kasrataan] = "in",
[Dammataan] = "un",
[jazm] = "", -- also sukun - no vowel
[zwnj] = "-", -- ZWNJ (zero-width non-joiner)
[highhmz] = "-yi",
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "‐", -- kashida, no sound
-- alif_wasla
[alif_wasla] = "", -- nothing
-- numerals
["۱"] = "1",
["۲"] = "2",
["۳"] = "3",
["۴"] = "4",
["۵"] = "5",
["۶"] = "6",
["۷"] = "7",
["۸"] = "8",
["۹"] = "9",
["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = "“", -- quotation mark
["»"] = "”", -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousan
-- regional characters (FOR VERY SPECIFIC USECASES)
["ټ"] = "ṭ",
["ٹ"] = "ṭ",
["ډ"] = "ḍ",
["ڈ"] = "ḍ",
-- balti
-- cant do anything about ژ because it conflicts with persian
["ڃ"] = "ž",
["ڇ"] = "č̣",
["ڑ"] = "ṛ",
["ڗ"] = "dz",
["ݜ"] = "ṣ",
["ݨ"] = "ng",
["ݩ"] = "ny",
["ھ"] = "h",
["ے"] = "e",
}
local sun_letters = "تثدذرزسشصضطظلن"
local punctuation = ":%(%)%[%]*&٫؛؟،ـ«\".'!»٪؉۔`,/–—%{%}"
local numbers = "۱۲۳۴۵۶۷۸۹۰"
local balticonsonants = "ڃڇڑڗݜݨݩǩ" -- for any other languages using this module
local consonants_needing_vowels = "بپتټٹثجچحخدډڈذرزژسشصضطظعغفقکگلمنؤهئء" .. balticonsonants
local rconsonants = consonants_needing_vowels .. malif .. "وی"
local lconsonants = consonants_needing_vowels -- yaa and waaw can be vowels w/o diacritics
local space_like = "%s'" .. '"'
local space_like_class = "[" .. space_like .. zwnj .. "]"