Module:Linb-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate text in the Linear B script. It is used to transliterate Mycenaean Greek. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Linb-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local chars = {
	["๐€€"] = "a",
	["๐€"] = "e",
	["๐€‚"] = "i",
	["๐€ƒ"] = "o",
	["๐€„"] = "u",
	
	["๐€…"] = "da",
	["๐€†"] = "de",
	["๐€‡"] = "di",
	["๐€ˆ"] = "do",
	["๐€‰"] = "du",
	
	["๐€Š"] = "ja",
	["๐€‹"] = "je",
	-- ji not in Unicode
	["๐€"] = "jo",
	["๐€Ž"] = "ju",
	
	["๐€"] = "ka",
	["๐€"] = "ke",
	["๐€‘"] = "ki",
	["๐€’"] = "ko",
	["๐€“"] = "ku",
	
	["๐€”"] = "ma",
	["๐€•"] = "me",
	["๐€–"] = "mi",
	["๐€—"] = "mo",
	["๐€˜"] = "mu",
	
	["๐€™"] = "na",
	["๐€š"] = "ne",
	["๐€›"] = "ni",
	["๐€œ"] = "no",
	["๐€"] = "nu",
	
	["๐€ž"] = "pa",
	["๐€Ÿ"] = "pe",
	["๐€ "] = "pi",
	["๐€ก"] = "po",
	["๐€ข"] = "pu",
	
	["๐€ฃ"] = "qa",
	["๐€ค"] = "qe",
	["๐€ฅ"] = "qi",
	["๐€ฆ"] = "qo",
	-- qu not in Unicode
	
	["๐€จ"] = "ra",
	["๐€ฉ"] = "re",
	["๐€ช"] = "ri",
	["๐€ซ"] = "ro",
	["๐€ฌ"] = "ru",
	
	["๐€ญ"] = "sa",
	["๐€ฎ"] = "se",
	["๐€ฏ"] = "si",
	["๐€ฐ"] = "so",
	["๐€ฑ"] = "su",
	
	["๐€ฒ"] = "ta",
	["๐€ณ"] = "te",
	["๐€ด"] = "ti",
	["๐€ต"] = "to",
	["๐€ถ"] = "tu",
	
	["๐€ท"] = "wa",
	["๐€ธ"] = "we",
	["๐€น"] = "wi",
	["๐€บ"] = "wo",
	-- wu not in Unicode
	
	["๐€ผ"] = "za",
	["๐€ฝ"] = "ze",
	-- zi not in Unicode
	["๐€ฟ"] = "zo",
	-- zu not in Unicode
	
	["๐€"] = "ha",
	["๐"] = "ai",
	["๐‚"] = "au",
	["๐ƒ"] = "dwe",
	["๐„"] = "dwo",
	["๐…"] = "nwo",
	["๐†"] = "phu",
	["๐‡"] = "pte",
	["๐ˆ"] = "rya",
	["๐‰"] = "rai",
	["๐Š"] = "ryo",
	["๐‹"] = "tya",
	["๐Œ"] = "twe",
	["๐"] = "two",
	
	["๐"] = "*18",
	["๐‘"] = "*19",
	["๐’"] = "*22",
	["๐“"] = "*34",
	["๐”"] = "*47",
	["๐•"] = "*49",
	["๐–"] = "*56",
	["๐—"] = "*63",
	["๐˜"] = "*64",
	["๐™"] = "*79",
	["๐š"] = "*82",
	["๐›"] = "*83",
	["๐œ"] = "*86",
	["๐"] = "*89",
	
	-- explicit morpheme boundary
	["-"] = "`",
}

function export.tr(text, lang, sc)
	local ret = {}
	local i = 1
	
	for c in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern
		ret[i] = chars[c] or c
		i = i + 1
	end
	
	text = string.gsub(table.concat(ret, "-"), "%- %-", " ")
	text = string.gsub(text, "%-?`%-?", "-")
	
	return text
end

return export