Module:typing-aids/data/bho

This module lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
local data = {}

local U = require("Module:string/char")

local anusvAra = U(0x11081)
local visarga = U(0x11082)
local virAma = U(0x110B9)
local nuktA = U(0x110BA)
local candrabindu = U(0x11080)
local avagraha = "ऽ"
local consonants = "𑂍-𑂯"
local consonant = "[" .. consonants .. "]" .. nuktA .. "?"

local acute = U(0x301)		-- combining acute

data["bho"] = {
	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "𑂊"},
	{"au", "𑂌"},
	{"ä", "𑂃"},
	{"ö", "𑂋"},
	{"ï", "𑂅"},
	{"ü", "𑂇"},
	{"a", "𑂃"},
	{"ā", "𑂄"},
	{"i", "𑂅"},
	{"ī", "𑂆"},
	{"u", "𑂇"},
	{"ū", "𑂈"},
	{"e", "𑂉"},
	{"o", "𑂋"},
	-- {"ṝ", ""},
	-- {"ṛ", "𑂩𑂱"},
	-- {"r̥", "𑂩𑂱"},
	-- {"ḹ", ""},
	-- {"ḷ", ""},
	{"(𑂃)[%-/]([𑂅𑂇])", "%1%2"},		-- a-i, a-u for अइ, अउ; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{"kh", "𑂎"},
	{"gh", "𑂐"},
	{"ch", "𑂓"},
	{"jh", "𑂕"},
	{"ṭh", "𑂘"},
	{"ḍh", "𑂛"},
	{"ɽh", "𑂜"},
	{"th", "𑂟"},
	{"dh", "𑂡"},
	{"ph", "𑂤"},
	{"bh", "𑂦"},
	{"h", "𑂯"},

	-- Other stops.
	{"k", "𑂍"},
	{"g", "𑂏"},
	{"c", "𑂒"},
	{"j", "𑂔"},
	{"ṭ", "𑂗"},
	{"ḍ", "𑂙"},
	{"ɽ", "𑂚"},
	{"t", "𑂞"},
	{"d", "𑂠"},
	{"p", "𑂣"},
	{"b", "𑂥"},

	-- Nasals.
	{"ṅ", "𑂑"},
	{"ñ", "𑂖"},
	{"ṇ", "𑂝"},
	{"n", "𑂢"},
	{"n", "𑂢"},
	{"m", "𑂧"},

	-- Remaining consonants.
	{"y", "𑂨"},
	{"r", "𑂩"},
	{"l", "𑂪"},
	{"v", "𑂫"},
	{"ś", "𑂬"},
	{"ṣ", "𑂭"},
	{"s", "𑂮"},

	{"ṃ", anusvAra},
	{"ḥ", visarga},
	{"'", avagraha},
	{"~", candrabindu},
	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["𑂅"] = U(0x110B1),
	["𑂆"] = U(0x110B2),
	["𑂇"] = U(0x110B3),
	["𑂈"] = U(0x110B4),
	["𑂉"] = U(0x110B5),
	["𑂊"] = U(0x110B6),
	["𑂋"] = U(0x110B7),
	["𑂌"] = U(0x110B8),
	["𑂄"] = U(0x110B0),
	-- ["𑂩𑂱"] = U(0x110C2),
	-- ["ॠ"] = "",
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
	table.insert(data["bho"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["bho"], {"(" .. consonant .. ")𑂃", "%1"})

-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["bho-tr"] = {
	[1] = {
		["A"] = "ā",
		["I"] = "ī",
		["U"] = "ū",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["z"] = "ś",
		["S"] = "ṣ",
		["M"] = "ṃ",
		["H"] = "ḥ",
		--["lRR"] = "ḹ",
		["/"] = acute,
	},
	[2] = {
		["_rh_"] = "ɽh",
		-- ["lR"] = "ḷ",
		-- ["RR"] = "ṝ",
	},
	[3] = {
		["_r_"] = "ɽ",
		["R"] = "ṛ",
	},
}

return data