Module:Linb-utilities

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}

local gsub = mw.ustring.gsub
local u = mw.ustring.char

local dig_chars = {
	[""] = 0,
	["๐„‡"] = 1,
	["๐„ˆ"] = 2,
	["๐„‰"] = 3,
	["๐„Š"] = 4,
	["๐„‹"] = 5,
	["๐„Œ"] = 6,
	["๐„"] = 7,
	["๐„Ž"] = 8,
	["๐„"] = 9,
	["๐„"] = 10,
	["๐„‘"] = 20,
	["๐„’"] = 30,
	["๐„“"] = 40,
	["๐„”"] = 50,
	["๐„•"] = 60,
	["๐„–"] = 70,
	["๐„—"] = 80,
	["๐„˜"] = 90,
	["๐„™"] = 100,
	["๐„š"] = 200,
	["๐„›"] = 300,
	["๐„œ"] = 400,
	["๐„"] = 500,
	["๐„ž"] = 600,
	["๐„Ÿ"] = 700,
	["๐„ "] = 800,
	["๐„ก"] = 900,
	["๐„ข"] = 1000,
	["๐„ฃ"] = 2000,
	["๐„ค"] = 3000,
	["๐„ฅ"] = 4000,
	["๐„ฆ"] = 5000,
	["๐„ง"] = 6000,
	["๐„จ"] = 7000,
	["๐„ฉ"] = 8000,
	["๐„ช"] = 9000,
	["๐„ซ"] = 10000,
	["๐„ฌ"] = 20000,
	["๐„ญ"] = 30000,
	["๐„ฎ"] = 40000,
	["๐„ฏ"] = 50000,
	["๐„ฐ"] = 60000,
	["๐„ฑ"] = 70000,
	["๐„ฒ"] = 80000,
	["๐„ณ"] = 90000,
}

local chars = {
	-- SYLLABOGRAMS --
	["๐€…"] = { id = "1", tl = "da" },
	["๐€ซ"] = { id = "2", tl = "ro" },
	["๐€ž"] = { id = "3", tl = "pa" },
	["๐€ณ"] = { id = "4", tl = "te" },
	["๐€ต"] = { id = "5", tl = "to" },
	["๐€™"] = { id = "6", tl = "na" },
	["๐€‡"] = { id = "7", tl = "di" },
	["๐€€"] = { id = "8", tl = "a" },
	["๐€ฎ"] = { id = "9", tl = "se" },
	["๐€„"] = { id = "10", tl = "u" },
	["๐€ก"] = { id = "11", tl = "po" },
	["๐€ฐ"] = { id = "12", tl = "so" },
	["๐€•"] = { id = "13", tl = "me" },
	["๐€ˆ"] = { id = "14", tl = "do" },
	["๐€—"] = { id = "15", tl = "mo" },
	["๐€ฃ"] = { id = "16", tl = "qa" },
	["๐€ผ"] = { id = "17", tl = "za" },
	["๐"] = { id = "18" },
	["๐‘"] = { id = "19" },
	["๐€ฟ"] = { id = "20", tl = "zo" },
	["๐€ฅ"] = { id = "21", tl = "qi" },
	["๐’"] = { id = "22" },
	["๐€˜"] = { id = "23", tl = "mu" },
	["๐€š"] = { id = "24", tl = "ne" },
	["๐€"] = { id = "25", tl = "a2" },
	["๐€ฌ"] = { id = "26", tl = "ru" },
	["๐€ฉ"] = { id = "27", tl = "re" },
	["๐€‚"] = { id = "28", tl = "i" },
	["๐†"] = { id = "29", tl = "pu2" },
	["๐€›"] = { id = "30", tl = "ni" },
	["๐€ญ"] = { id = "31", tl = "sa" },
	["๐€ฆ"] = { id = "32", tl = "qo" },
	["๐‰"] = { id = "33", tl = "ra3" },
	["๐“"] = { id = "34" },
	-- *35 is a flipped variant of *34
	["๐€"] = { id = "36", tl = "jo" },
	["๐€ด"] = { id = "37", tl = "ti" },
	["๐€"] = { id = "38", tl = "e" },
	["๐€ "] = { id = "39", tl = "pi" },
	["๐€น"] = { id = "40", tl = "wi" },
	["๐€ฏ"] = { id = "41", tl = "si" },
	["๐€บ"] = { id = "42", tl = "wo" },
	["๐"] = { id = "43", tl = "a3" },
	["๐€"] = { id = "44", tl = "ke" },
	["๐€†"] = { id = "45", tl = "de" },
	["๐€‹"] = { id = "46", tl = "je" },
	["๐”"] = { id = "47" },
	["๐…"] = { id = "48", tl = "nwa" },
	["๐•"] = { id = "49" },
	["๐€ข"] = { id = "50", tl = "pu" },
	["๐€‰"] = { id = "51", tl = "du" },
	["๐€œ"] = { id = "52", tl = "no" },
	["๐€ช"] = { id = "53", tl = "ri" },
	["๐€ท"] = { id = "54", tl = "wa" },
	["๐€"] = { id = "55", tl = "nu" },
	["๐–"] = { id = "56" },
	["๐€Š"] = { id = "57", tl = "ja" },
	["๐€ฑ"] = { id = "58", tl = "su" },
	["๐€ฒ"] = { id = "59", tl = "ta" },
	["๐€จ"] = { id = "60", tl = "ra" },
	["๐€ƒ"] = { id = "61", tl = "o" },
	["๐‡"] = { id = "62", tl = "pte" },
	["๐—"] = { id = "63" },
	["๐˜"] = { id = "64" },
	["๐€Ž"] = { id = "65", tl = "ju" },
	["๐‹"] = { id = "66", tl = "ta2" },
	["๐€‘"] = { id = "67", tl = "ki" },
	["๐Š"] = { id = "68", tl = "ro2" },
	["๐€ถ"] = { id = "69", tl = "tu" },
	["๐€’"] = { id = "70", tl = "ko" },
	["๐ƒ"] = { id = "71", tl = "dwe" },
	["๐€Ÿ"] = { id = "72", tl = "pe" },
	["๐€–"] = { id = "73", tl = "mi" },
	["๐€ฝ"] = { id = "74", tl = "ze" },
	["๐€ธ"] = { id = "75", tl = "we" },
	["๐ˆ"] = { id = "76", tl = "ra2" },
	["๐€"] = { id = "77", tl = "ka" },
	["๐€ค"] = { id = "78", tl = "qe" },
	["๐™"] = { id = "79" },
	["๐€”"] = { id = "80", tl = "ma" },
	["๐€“"] = { id = "81", tl = "ku" },
	["๐š"] = { id = "82" },
	["๐›"] = { id = "83" },
	-- *84
	["๐‚"] = { id = "85", tl = "au" },
	["๐œ"] = { id = "86" },
	["๐Œ"] = { id = "87", tl = "twe" },
	-- *88
	["๐"] = { id = "89" },
	["๐„"] = { id = "90", tl = "dwo" },
	["๐"] = { id = "91", tl = "two" },
	-- IDEOGRAMS --
	["๐‚€"] = { id = "100", tl = "VIR" },
	-- *101 is a variant of *100
	["๐‚"] = { id = "102", tl = "MUL" },
	-- *103 is a variant of *102
	["๐‚‚"] = { id = "104", tl = "CERV" },
	["๐‚ƒ"] = { id = "105", tl = "EQU" },
	["๐‚„"] = { id = "105f", tl = "EQU<sup>f</sup>" },
	["๐‚…"] = { id = "105m", tl = "EQU<sup>m</sup>" },
	["$๐€ฅ"] = { id = "106", tl = "OVIS" }, -- same as *21
	["๐‚†"] = { id = "106f", tl = "OVIS<sup>f</sup>" },
	["๐‚‡"] = { id = "106m", tl = "OVIS<sup>m</sup>" },
	["$๐’"] = { id = "107", tl = "CAP" }, -- same as *22
	["๐‚ˆ"] = { id = "107f", tl = "CAP<sup>f</sup>" },
	["๐‚‰"] = { id = "107m", tl = "CAP<sup>m</sup>" },
	["$๐‚"] = { id = "108", tl = "SUS" }, -- same as *85
	["๐‚Š"] = { id = "108f", tl = "SUS<sup>f</sup>" },
	["๐‚‹"] = { id = "108m", tl = "SUS<sup>m</sup>" },
	["$๐€˜"] = { id = "109", tl = "BOS" }, -- same as *23
	["๐‚Œ"] = { id = "109f", tl = "BOS<sup>f</sup>" },
	["๐‚"] = { id = "109m", tl = "BOS<sup>m</sup>" },
	["๐„ฟ"] = { id = "110", tl = "Z" },
	["๐„พ"] = { id = "111", tl = "V" },
	["๐„ผ"] = { id = "112", tl = "T" },
	["๐„ฝ"] = { id = "113", tl = "S" },
	["๐„ป"] = { id = "114", tl = "Q" },
	["๐„บ"] = { id = "115", tl = "P" },
	["๐„น"] = { id = "116", tl = "N" },
	["๐„ธ"] = { id = "117", tl = "M" },
	["๐„ท"] = { id = "118", tl = "L" },
	["๐‚Ž"] = { id = "120", tl = "GRA" },
	["๐‚"] = { id = "121", tl = "HORD" },
	["๐‚"] = { id = "122", tl = "OLIV" },
	["๐‚‘"] = { id = "123", tl = "AROM" },
	-- *124 is a flipped variant of *125, traditionally transliterated as PYC
	["๐‚’"] = { id = "125", tl = "CYP" },
	-- *126
	["๐‚“"] = { id = "127", tl = "kapo" },
	["๐‚”"] = { id = "128", tl = "kanako" },
	["$๐€Ž"] = { id = "129", tl = "FAR" }, -- same as *65
	["๐‚•"] = { id = "130", tl = "OLE" },
	["๐‚–"] = { id = "131", tl = "WINE" },
	["๐‚—"] = { id = "132" },
	["๐‚˜"] = { id = "133", tl = "arepa" },
	-- *134
	["๐‚™"] = { id = "135", tl = "meri", },
	["๐‚š"] = { id = "140", tl = "AES" },
	["๐‚›"] = { id = "141", tl = "AUR" },
	["๐‚œ"] = { id = "142" },
	-- *143
	["๐‰"] = { id = "144", tl = "CROC" },
	["๐‚"] = { id = "145", tl = "LANA" },
	["๐‚ž"] = { id = "146" },
	-- *147, *148, *149
	["๐‚Ÿ"] = { id = "150" },
	["๐‚ "] = { id = "151", tl = "CORN" },
	["๐‚ก"] = { id = "152" },
	["๐‚ข"] = { id = "153" },
	["๐‚ฃ"] = { id = "154" },
	["๐ƒž"] = { id = "155", vas = true },
	["๐‚ค"] = { id = "156", tl = "turo2" },
	["๐‚ฅ"] = { id = "157" },
	["๐‚ฆ"] = { id = "158" },
	["๐‚ง"] = { id = "159", tl = "TELA" },
	["๐‚จ"] = { id = "160" },
	["๐‚ฉ"] = { id = "161" },
	["๐‚ช"] = { id = "162", tl = "TUN" },
	["๐‚ซ"] = { id = "163", tl = "ARM" },
	["๐‚ฌ"] = { id = "164" },
	["๐‚ญ"] = { id = "165" },
	["๐‚ฎ"] = { id = "166" },
	["๐‚ฏ"] = { id = "167" },
	["๐‚ฐ"] = { id = "168" },
	["๐‚ฑ"] = { id = "169" },
	["๐‚ฒ"] = { id = "170" },
	["๐‚ณ"] = { id = "171" },
	["๐‚ด"] = { id = "172" },
	["๐‚ต"] = { id = "173", tl = "LUNA" },
	["๐‚ถ"] = { id = "174" },
	-- *175
	["๐‚ท"] = { id = "176", tl = "ARB" },
	["๐‚ธ"] = { id = "177" },
	["๐‚น"] = { id = "178" },
	["๐‚บ"] = { id = "179" },
	["๐‚ป"] = { id = "180" },
	["๐‚ผ"] = { id = "181" },
	["๐‚ฝ"] = { id = "182" },
	["๐‚พ"] = { id = "183" },
	["๐‚ฟ"] = { id = "184" },
	["๐ƒ€"] = { id = "185" },
	-- *186, *187, *188
	["๐ƒ"] = { id = "189" },
	["๐ƒ‚"] = { id = "190" },
	["๐ƒƒ"] = { id = "191", tl = "GAL" },
	["๐ƒŸ"] = { id = "200", vas = true },
	["๐ƒ "] = { id = "201", vas = true },
	["๐ƒก"] = { id = "202", vas = true },
	["๐ƒข"] = { id = "203", vas = true },
	["๐ƒฃ"] = { id = "204", vas = true },
	["๐ƒค"] = { id = "205", vas = true },
	["๐ƒฅ"] = { id = "206", vas = true },
	["๐ƒฆ"] = { id = "207", vas = true },
	["๐ƒง"] = { id = "208", vas = true },
	["๐ƒจ"] = { id = "209", tl = "AMPH", vas = true },
	["๐ƒฉ"] = { id = "210", vas = true },
	["๐ƒช"] = { id = "211", vas = true },
	["๐ƒซ"] = { id = "212", vas = true },
	["๐ƒฌ"] = { id = "213", vas = true },
	["๐ƒญ"] = { id = "214", vas = true },
	["๐ƒฎ"] = { id = "215", vas = true },
	["๐ƒฏ"] = { id = "216", vas = true },
	["๐ƒฐ"] = { id = "217", vas = true },
	["๐ƒฑ"] = { id = "218", vas = true },
	["๐ƒฒ"] = { id = "219", vas = true },
	["๐ƒ„"] = { id = "220" },
	["๐ƒณ"] = { id = "221", vas = true },
	["๐ƒด"] = { id = "222", vas = true },
	["๐ƒ…"] = { id = "225", tl = "ALV" },
	["๐ƒต"] = { id = "226", vas = true },
	["๐ƒถ"] = { id = "227", vas = true },
	["๐ƒท"] = { id = "228", vas = true },
	["๐ƒธ"] = { id = "229", vas = true },
	["๐ƒ†"] = { id = "230", tl = "HAS" },
	["๐ƒ‡"] = { id = "231", tl = "SAG" },
	["๐ƒˆ"] = { id = "232" },
	["๐ƒ‰"] = { id = "233", tl = "PUG" },
	["๐ƒŠ"] = { id = "234" },
	-- *235
	["๐ƒ‹"] = { id = "236", tl = "GUP" },
	-- *237, *238, *239
	["๐ƒŒ"] = { id = "240", tl = "BIG" },
	["๐ƒ"] = { id = "241", tl = "CUR" },
	["๐ƒŽ"] = { id = "242", tl = "CAPS" },
	["๐ƒ"] = { id = "243", tl = "ROTA" },
	-- *244, variant of *243 ?
	["๐ƒ"] = { id = "245" },
	["๐ƒ‘"] = { id = "246" },
	["๐ƒ’"] = { id = "247", tl = "dipte" },
	["๐ƒ“"] = { id = "248" },
	["๐ƒ”"] = { id = "249" },
	["๐ƒน"] = { id = "250", vas = true },
	["๐ƒ•"] = { id = "251" },
	["๐ƒ–"] = { id = "252" },
	["๐ƒ—"] = { id = "253" },
	["๐ƒ˜"] = { id = "254", tl = "JAC" },
	["๐ƒ™"] = { id = "255" },
	["๐ƒš"] = { id = "256" },
	["๐ƒ›"] = { id = "257" },
	["๐ƒœ"] = { id = "258" },
	["๐ƒ"] = { id = "259" },
	["๐ƒบ"] = { id = "305", vas = true },
}

function export.tr(text, lang, sc)
	-- See below at <export.makeDisplayText> for explanations.
	text = gsub(gsub(text, u(0x200D), "+"), u(0x200C), "$")
	-- Space between ideograms, syllabograms, and digits, or throw an error if there is apparently a missing space.
	-- TODO: Have a thorough check whether certain spaceless combination are really impossible, and adjust the error throwing if not.
	local space_between = { "[๐‚€-๐ƒบ๐„ท-๐„ฟ]", "%$[๐€ฅ๐’๐‚๐€˜๐€Ž]", "[๐€€-๐]", "[๐„‡-๐„ณ]" }
	for i_a, v_a in ipairs(space_between) do
		for i_b, v_b in ipairs(space_between) do
			if i_a ~= i_b or i_a < 3 or i_b < 3 then -- prevents spaces between syllabograms and digits
				text = gsub(text, "("..v_a..")("..v_b..")", function(a, b)
					if i_b < 4 then -- if the second char is not a digit, there's a missing space.
						error("There seems to be a missing space.")
					end	
					return a.." "..b
				end)
			end
		end
	end
	-- Put a hyphen between syllabograms.
	for i = 1, 2 do
		text = gsub(text, "([๐€€-๐])([๐€€-๐])", "%1-%2")
	end
	-- Numerals.
	text = gsub(text, "[๐„‡-๐„ณ]+", function(str)
		local ret, count = gsub(str, "^([๐„ซ-๐„ณ]?)([๐„ข-๐„ช]?)([๐„™-๐„ก]?)([๐„-๐„˜]?)([๐„‡-๐„]?)$", function (u5, u4, u3, u2, u1)
			return dig_chars[u5] + dig_chars[u4] + dig_chars[u3] + dig_chars[u2] + dig_chars[u1]
		end)
		if count ~= 1 then
			error("The number " .. str .. " is weird. Are you sure?")
		end
		return ret
	end)
	-- Syllabograms and ideograms.
	text = gsub(text, "%$?[๐€€-๐ƒบ๐„ท-๐„ฟ]", function(x_text)
		x = chars[x_text]
		if x == nil then error("The symbol " .. x_text .. " is not recognised.") end
		return x.tl or ("*"..x.id..(x.vas and "<sup>VAS</sup>" or ""))
	end)
	-- The end.
	return text
end

function export.makeDisplayText(text, lang, sc)
	-- This is a rather awful bodge, made because there currently seems to be no way of hiding certain characters from the text while keeping them in the transliteration.
	-- TODO: One should probably update MOD:languages to allow post-translit display-text replacements.
	return (gsub(gsub(text, "%+", u(0x200D)), "%$", u(0x200C)))
end

function export.makeEntryName(text, lang, sc)
	return (gsub(text, "[+$]", ""))
end

return export