Jump to content

Module:R:Perseus/sandbox

From Wiktionary, the free dictionary


local export = {}

-- Collisions contained in submodules of [[Module:R:Perseus/collision-data]].
local m_params = require("Module:parameters")
local m_utils = require("Module:grc-utilities")
local m_scripts = require("Module:scripts")
local m_script_utils = require("Module:script utilities")
local m_languages = require("Module:languages")

local tag_greek = m_utils.tag

local function tag_latin(text)
	local lang = m_languages.getByCode("la")
	local sc = m_scripts.getByCode("Latn")
	return m_script_utils.tag_text(text, lang, sc, nil)
end

local function track(code)
	require('Module:debug').track('R:Perseus/' .. code)
	return '' -- for format_perseus_wikilink
end

local function lower_dashed(w)
	return string.gsub(string.lower(w), " ", "-")
end

local function remove_diacritics(x)
	return mw.ustring.gsub(mw.ustring.toNFD(x), '%W+', "")
end

-- maybe there is a better way to do this
local function beta(w)
	return require("Module:R:Perseus/polytonic-to-perseus-betacode").polytonic_to_perseus_betacode(w)
end

--[[ Resources:
	template name, with "R:" removed = {
		[1] = Perseus resource id,
		[2] = collisions index name,
		[3] = f-query-entry-postprocess,
		[4] = query-entry-suffix,
		[5] = language name
	}
	]]
-- This allows the optional selection of a different bio number for Smith's Persons.
if mw.getCurrentFrame():getParent().args[2] then
	if mw.ustring.match(mw.getCurrentFrame():getParent().args[2],'-bio-',1) then
		a = ""
	else
		a = '-bio-1'
	end
end
local resources = {
	["L&S"] = {
		"1999.04.0059",
		"LS",
		nil,
		nil,
		'latin',
	},
	["Elementary Lewis"] = {
		"1999.04.0060",
		"EL",
		nil,
		nil,
		'latin'
	},
	["Peck"] = {
		"1999.04.0062",
		nil,
		lower_dashed,
		'-harpers',
		'latin'
	},
	["PersEnc"] = {
		"1999.04.0004",
		nil,
		lower_dashed,
		"",
		'latin'
	},
	["Stillwell"] = {
		"1999.04.0006",
		"PECS",
		lower_dashed,
		"",
		'latin'
	},
	["Platner"] = {
		"1999.04.0054",
		"TDAR",
		lower_dashed,
		"",
		'latin'
	},
	["Smith's Antiquities"] = {
		"1999.04.0063",
		nil,
		lower_dashed,
		'-cn',
		'latin'
	},
	["Smith's Persons"] = {
		"1999.04.0104",
		nil,
		lower_dashed,
		a,
		'latin'
	},
	["Smith's Geography"] = {
		"1999.04.0064",
		nil,
		lower_dashed,
		'-geo',
		'latin'
	},
	["LSJ"] = {
		"1999.04.0057",
		"LSJ",
		nil,
		nil,
		'greek'
	},
	["Middle Liddell"] = {
		"1999.04.0058",
		"ML",
		nil,
		nil,
		'greek'
	},
	["Harpocration"] = {
		"2013.01.0002",
		nil,
		function(w)
			return lower_dashed(remove_diacritics(w))
		end,
		"",
		'greek'
	},
	["Autenrieth"] = {
		"1999.04.0073",
		"Autenrieth",
		nil,
		nil,
		'greek'
	},
	["Slater"] = {
		"1999.04.0072",
		"Slater",
		nil,
		nil,
		"greek"
	},
	["Zoega"] = {
		"2003.02.0002",
		"Zoega",
		nil,
		nil,
		'non'
	},
}

local function get_language(template)
	return resources[template][5]
end

local function is_collision(x, template)
	local collisions_data = resources[template][2]
	local lhs_postprocess = resources[template][3] or x
	if collisions_data then
		return mw.loadData("Module:R:Perseus/collision-data/" .. resources[template][2])[lhs_postprocess] == true
	end
end

local function format_perseus_url(beta_or_latin, template, redirect)
	local harpo = template == 'Harpocration' and ":letter=" .. string.upper(string.sub(remove_diacritics(beta_or_latin), 1, 1)) or ""
	local data = resources[template]
	local id = data[1] or ''
	local url_redirect_lhs = 'http://www.perseus.tufts.edu/hopper/resolveform?type=exact&lookup='
	local url_entry_lhs = 'http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:' .. id .. harpo .. ':entry='
	local url_rhs = redirect and '&lang=' .. get_language(template) or ''
	
	local postprocess
	if data[3] ~= nil then
		postprocess =
		function(w)
			return data[3](w) .. (data[4] or '')
		end
	else
		postprocess =
		function(w)
			return w
		end
	end
	
	return (redirect and url_redirect_lhs or url_entry_lhs)
			.. postprocess(beta_or_latin)
			.. url_rhs
end

local function is_polytonic(text)
	local lang = m_languages.getByCode("grc")
	return m_scripts.findBestScript(text, lang):getCode() == "polytonic"
end

local function format_perseus_wikilink(title, beta_or_latin, template, redirect)
	local title_span = title
	
	if get_language(template) == 'greek' then
		if not is_polytonic(title_span) then
			-- [[Special:WhatLinksHere/Wiktionary:Tracking/R:Perseus/wrong-script]]
			track('wrong-script')
		end
		title_span = tag_greek(title_span)
	elseif get_language(template) == 'latin' then
		title_span = tag_latin(title_span)
	end
	return (beta_or_latin == '' and track('no Perseus link')
			or '[' .. format_perseus_url(beta_or_latin, template, redirect) .. ' ' .. title_span .. '] in ')
end

function export.create(frame)
	local params = {
		[1] = {}, -- Perseus code or word
		[2] = {}, -- word; only for Greek templates?
	}
	local args = m_params.process(frame:getParent().args, params)
	
	local template = string.gsub(frame:getParent():getTitle(), "^Template:R:", "")
	template = string.gsub(template, "/sandbox$", "")
	local latin = not (get_language(template) == 'greek')
	
	if not latin and args[2] and not args[1] then
		error('Parameter 2 should be placed in parameter 1.')
	end
	
	local title = mw.title.getCurrentTitle()
	
	local word, perseus_code
	if latin then
		word = args[1] or title.text
		perseus_code = args[2] or title.text
	else
		word = args[2]
		
		if word and not is_polytonic(word) then
			error('Second parameter of {{[[Template:R:' .. template .. '|R:' .. template ..
					']]}} should be a Greek word.')
		end
		
		if args[1] then
			if is_polytonic(args[1]) then
				if not word then
					word = args[1]
				else
					error('Second parameter of {{[[Template:R:' .. template .. '|R:' .. template ..
							']]}} is a Greek word, so first parameter must be Perseus entry code.')
				end
			else
				perseus_code = args[1]
			end
		end
			
		
		if not word then
			local pagename = title.text
			if is_polytonic(pagename) then
				word = pagename
			elseif title.nsText == "Template" then
				word = 'λέξις'
			else
				error('{{[[Template:R:' .. template .. '|R:' .. template ..
						']]}} needs manual input: pagename is not Greek.')
			end
		end
		
		if not perseus_code then
			perseus_code = beta(word)
		end
	end
	
	local redirect = not (args[1] or args[2]) and is_collision(word, template)
	
	if word == template then
		return ""
	else
		return format_perseus_wikilink(word, perseus_code, template, redirect)
	end
end

return export