Module:typing-aids/data/saz

The following documentation is located at Module:typing-aids/data/saz/documentation. ^[edit]
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
	This module needs documentation.
	Please document this module by describing its purpose and usage on the documentation page.
local data = {}

local U = mw.ustring.char

local anusvAra = U(0xA880)
local visarga = U(0xA882)
local hAru = U(0xA8B4)
local virAma = U(0xA8C4)
local avagraha = "ऽ"
local consonants = "ꢒꢓꢔꢕꢖꢗꢘꢙꢚꢛꢜꢝꢞꢟꢠꢡꢢꢣꢤꢥꢦꢧꢨꢩꢪꢫꢬꢭꢮꢯꢰꢱꢲꢳ"
local consonant = "[" .. consonants .. "]" .. hAru .. "?"

local acute = U(0x301)		-- combining acute

data["saz"] = {
	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "ꢎ"},
	{"au", "ꢑ"},
	{"ä", "ꢂ"},
	{"ö", "ꢏ"},
	{"ï", "ꢄ"},
	{"ü", "ꢆ"},
	{"a", "ꢂ"},
	{"ā", "ꢃ"},
	{"i", "ꢄ"},
	{"ī", "ꢅ"},
	{"u", "ꢆ"},
	{"ū", "ꢇ"},
	{"e", "ꢌ"},
	{"ē", "ꢍ"},
	{"o", "ꢏ"},
	{"ō", "ꢐ"},
	{"ṝ", "ꢉ"},
	{"ṛ", "ꢈ"},
	{"r̥", "ꢈ"},
	{"ḹ", "ꢋ"},
	{"l̥", "ꢊ"},
	{"(ꢂ)[%-/]([ꢄꢆ])", "%1%2"},		-- a-i, a-u for ꢂꢄ, ꢂꢆ; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{"kh", "ꢓ"},
	{"gh", "ꢕ"},
	{"ch", "ꢘ"},
	{"jh", "ꢚ"},
	{"ṭh", "ꢝ"},
	{"ḍh", "ꢟ"},
	{"th", "ꢢ"},
	{"dh", "ꢤ"},
	{"ph", "ꢧ"},
	{"bh", "ꢩ"},
	{"h", "ꢲ"},

	-- Other stops.
	{"k", "ꢒ"},
	{"g", "ꢔ"},
	{"c", "ꢗ"},
	{"j", "ꢙ"},
	{"ṭ", "ꢜ"},
	{"ḍ", "ꢞ"},
	{"t", "ꢡ"},
	{"d", "ꢣ"},
	{"p", "ꢦ"},
	{"b", "ꢨ"},

	-- Hāru.
	{"n̤", "ꢥ" .. hAru},
	{"m̤", "ꢪ" .. hAru},
	{"r̤", "ꢬ" .. hAru},
	{"l̤", "ꢭ" .. hAru},

	-- Nasals.
	{"ṅ", "ꢖ"},
	{"ñ", "ꢛ"},
	{"ṇ", "ꢠ"},
	{"n", "ꢥ"},
	{"n", "ꢥ"},
	{"m", "ꢪ"},

	-- Remaining consonants.
	{"y", "ꢫ"},
	{"r", "ꢬ"},
	{"l", "ꢭ"},
	{"v", "ꢮ"},
	{"ś", "ꢯ"},
	{"ṣ", "ꢰ"},
	{"s", "ꢱ"},
	{"ḷ", "ꢳ"},

	{"ṃ", anusvAra},
	{"ḥ", visarga},
	{"'", avagraha},
	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["ꢄ"] = U(0xA8B6),
	["ꢆ"] = U(0xA8B8),
	["ꢈ"] = U(0xA8BA),
	["ꢊ"] = U(0xA8BC),
	["ꢌ"] = U(0xA8BE),
	["ꢍ"] = U(0xA8BF),
	["ꢏ"] = U(0xA8C1),
	["ꢐ"] = U(0xA8C2),
	["ꢃ"] = U(0xA8B5),
	["ꢅ"] = U(0xA8B7),
	["ꢇ"] = U(0xA8B9),
	["ꢉ"] = U(0xA8BB),
	["ꢋ"] = U(0xA8BD),
	["ꢎ"] = U(0xA8C0),
	["ꢑ"] = U(0xA8C3),
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
	table.insert(data["saz"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["saz"], {"(" .. consonant .. ")ꢂ", "%1"})

-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["saz-tr"] = {
	[1] = {
		["A"] = "ā",
		["I"] = "ī",
		["U"] = "ū",
		["E"] = "ē",
		["O"] = "ō",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["z"] = "ś",
		["S"] = "ṣ",
		["M"] = "ṃ",
		["H"] = "ḥ",
		["lRR"] = "ḹ",
		["/"] = acute,
	},
	[2] = {
		["n:"] = "n̤",
		["m:"] = "m̤",
		["r:"] = "r̤",
		["l:"] = "l̤",
		["lR"] = "l̥",
		["RR"] = "ṝ",
	},
	[3] = {
		["R"] = "ṛ",
		["L"] = "ḷ",
	},
}

return data