Jump to content

Module:R:CAL

From Wiktionary, the free dictionary

This module is used in {{R:CAL}}.

Testcases

{{#invoke:R:CAL|title|bnt%40xwr)|A}}
bnt**ḥwrˀ

URL

Title


local export = {}

local decode_uri = require("Module:string utilities").decode_uri

local title = mw.title.getCurrentTitle()

local data = mw.loadData 'Module:R:CAL/data'

local function indexof(arr, val)
	for i, v in ipairs(arr) do
		if v == val then
			return i
		end
	end
	return -1
end

local function if_not_empty(val)
	if val == '' then
		return nil
	end
	return val
end

local function convert(word, source, target)
	local source, target = data[source], data[target]
	local converted = word:gsub('[%z\1-\127\194-\244][\128-\191]*',
		function (char)
			return target[indexof(source, char)] or char
		end)
	
	if target == CAL_code then
		converted = converted:gsub('%*%*', '@')
	end
	
	return converted
end

local function percent_encode(str)
	return mw.uri.encode(str, "PATH")
end

local function detect_encoding(word)
	-- only ASCII characters
	if not word:find '[\128-\255]' then
		return 'CAL_code'
	
	-- Search for leading byte of UTF-8 encoding of codepoints U+0700-U+073F,
	-- which includes the basic Syriac letters (U+0710-U+072E).
	elseif word:find '\220' then
		return 'Syriac'
	
	-- Search for leading byte of UTF-8 encoding of codepoints U+05C0-U+05FF,
	-- which includes the basic Hebrew letters (U+05D0-U+05EA).
	elseif word:find '\215' then
		return 'Hebrew'
	else
		return 'translit'
	end
end

function export.URL(frame)
	local args = frame.args
	local word = if_not_empty(args[1]) or if_not_empty(args.entry) or title.text
	local POS = if_not_empty(args[2]) or if_not_empty(args.POS) or "N"
	
	word = decode_uri(word, "PATH")
	
	local encoding = detect_encoding(word)
	
	return ("http://cal.huc.edu/oneentry.php?lemma=%s+%s&cits=all")
		:format(
			mw.uri.encode(convert(word, encoding, 'CAL_code')),
			POS)
end

function export.title(frame)
	local args = frame.args
	local word = if_not_empty(args[1]) or title.text
	
	if word:find '%%' then
		word = mw.uri.decode(word)
	end
	
	word = word:gsub('#(%d+)', '<sup>%1</sup>')
	
	local encoding = detect_encoding(word)
	
	return convert(word, encoding, 'translit')
end

return export