Jump to content

Module:interlinear

From Wiktionary, the free dictionary


local export = {}
local m_data = mw.loadData("Module:interlinear/data")
local m_languages = require("Module:languages")
local m_script_utilities = require("Module:script utilities")

local function escape(x)
	if mw.ustring.find(x, "&", 1, true) then x = mw.ustring.gsub(x, "&", "&") end
	if mw.ustring.find(x, '"', 1, true) then x = mw.ustring.gsub(x, '"', """) end
	if mw.ustring.find(x, "|", 1, true) then x = mw.ustring.gsub(x, "|", "|") end
	if mw.ustring.find(x, "<", 1, true) then x = mw.ustring.gsub(x, "<", "&lt;") end
	if mw.ustring.find(x, ">", 1, true) then x = mw.ustring.gsub(x, ">", "&gt;") end
	return x
end

local function render_gloss(gloss, categories)
	if type(gloss) == "string" then
		return escape(gloss)
	end
	
	local gloss_id = gloss.gloss
	local gloss_data = m_data[gloss_id]
	
	local label = gloss_data and gloss_data.label or gloss_id
	local tooltip = gloss_data and gloss_data.tooltip or ""
	
	if gloss.negate then
		label = "N" .. label
		if #tooltip > 0 then tooltip = "non-" .. tooltip end
	end
	
	if not gloss_data then
		return '<small>' .. escape(gloss_id) .. '</small>'
	end

	if gloss_data.ambiguous then
		table.insert(categories, "Interlinear glosses with ambiguous gloss abbreviations")
	end

	return '<small><abbr title="' .. tooltip .. '">' .. escape(label) .. '</abbr></small>'
end

local function get_tr(tr, word, lang, sc)
	if tr == true or tr == nil then
		tr = lang:transliterate(word, sc) or ""
	elseif tr == false then
		tr = ""
	end
	return tr ~= "", tr
end

function export.interlinear_gloss(data)
	local result = '<table class="interlinear-gloss"><tr>'
	local has_tr, tr_ok = false
	local trs = {}
	local categories = {}

	if #data.words == 0 then
		return ""
	end

	local lang = data.lang
	local sc = data.sc
	if not sc then
		local first = 1
		while first < #data.words and data.words[first] == "" do first = first + 1 end
		sc = lang:findBestScript(data.words[first])
	end
	
	local tr_ref = data.tr
	if data.tr == nil then
		has_tr = sc:isTransliterated()
		tr_ref = {}
	else
		has_tr = #data.tr > 0
	end

	if data.top then
		local columns = math.max(#data.words, #data.glosses)
		result = result .. '<td colspan="' .. columns .. '"><span class="intetlinear-gloss-top">' .. data.top .. '</span></td></tr><tr>'
	end

	if data.headers then
		for _, header in ipairs(data.headers) do
			result = result .. '<td><span class="intetlinear-gloss-header">' .. header .. "</span></td>"
		end
		result = result .. "</tr><tr>"
	end

	for i, word in ipairs(data.words) do
		result = result .. "<td>" .. m_script_utilities.tag_text(word, lang, sc) .. "</td>"
		if has_tr then
			local ok, tr = get_tr(tr_ref[i], word, lang, sc)
			tr_ok = tr_ok or ok
			trs[i] = tr
		end
	end

	if tr_ok then
		result = result .. "</tr><tr>"
		
		for _, tr in ipairs(trs) do
			result = result .. "<td>" .. m_script_utilities.tag_translit(tr, lang, "term") .. "</td>"
		end
	end

	if data.morphology then
		local m_links = require("Module:links")
		result = result .. "</tr><tr>"
		
		for _, morph in ipairs(data.morphology) do
			if mw.ustring.find(morph, "[[", nil, true) then
				result = result .. "<td>" .. m_links.full_link{ term = morph, lang = lang, sc = sc, tr = "-" } .. "</td>"
			else
				result = result .. "<td>" .. m_links.full_link{ alt = morph, lang = lang, sc = sc, tr = "-" } .. "</td>"
			end
		end
	end
	
	result = result .. "</tr><tr>"
	
	for _, glosses in ipairs(data.glosses) do
		result = result .. "<td>"
		for _, gloss in ipairs(glosses) do
			result = result .. render_gloss(gloss, categories)
		end
		result = result .. "</td>"
	end
	
	if data.word_translations then
		result = result .. "</tr><tr>"
		for _, wt in ipairs(data.word_translations) do
			result = result .. "<td>" .. wt .. "</td>"
		end
	end
	
	if data.translation then
		local columns = math.max(#data.words, #data.glosses)
		result = result .. '</tr><tr><td colspan="' .. columns ..
			'"><span class="intetlinear-gloss-translation-double-quote">"</span><span class="intetlinear-gloss-translation">' ..
			data.translation .. '</span><span class="intetlinear-gloss-translation-double-quote">"</span></td>'
	end
	
	if #categories > 0 then
		categories = require("Module:utilities").format_categories(categories, lang)
	else
		categories = ""
	end
	
	return result .. '</tr></table>' .. categories
end

local function split_words(text)
	local pos = 1, mend, found
	local len = mw.ustring.len(text)
	local search = "([{%s}])"
	local words = {}
	local braces = 0
	local word_start = 1
	local last_brace = nil
	local going = true

	while going do
		pos, mend, found = mw.ustring.find(text, search, pos)
		if pos == nil then
			if braces > 0 then error("Unterminated { on the first or second line") end
			found = " "
			pos, mend = len + 1, len + 1
			going = false
		end

		if found == "{" then
			if braces == 0 and word_start == pos then
				word_start = word_start + 1
			end
			braces = braces + 1
		elseif found == "}" and braces > 0 then
			braces = braces - 1
			if braces == 0 then
				last_brace = pos
			end
		elseif --[[ whitespace and ]] braces == 0 then
			-- end word
			local word_end = mend - 1
			if word_start <= word_end then
				if word_end == last_brace then
					word_end = last_brace - 1
				end
				table.insert(words, mw.ustring.sub(text, word_start, word_end))
			end
			word_start = mend + 1
			last_brace = nil
		end

		pos = mend + 1
	end

	return words
end

local function split_glosses(text)
	local glosses_all = {}
	
	for _, word in ipairs(split_words(text)) do
		local glosses = {}
		local pos = 1, mend, gloss
		local last = 1
		
		while true do
			pos, mend, prefix, gloss = mw.ustring.find(word, "([!^]?)([0-9A-Z]+)", pos) 
			if pos == nil then break end
			
			if last < pos then
				table.insert(glosses, mw.ustring.sub(word, last, pos - 1))
			end
			
			if prefix == "^" then
				table.insert(glosses, gloss)
			else
				table.insert(glosses, { gloss = gloss, negate = prefix == "!" })
			end
			
			last = mend + 1
			pos = last
		end
		
		if last <= mw.ustring.len(word) then
			table.insert(glosses, mw.ustring.sub(word, last))
		end
		table.insert(glosses_all, glosses)
	end
	
	return glosses_all
end

local function split_trs(text)
	local trs = {}

	if text == "-" then return {} end
	
	for _, word in ipairs(split_words(text)) do
		if word == "+" then
			table.insert(trs, true)
		elseif word == "-" then
			table.insert(trs, false)
		else
			table.insert(trs, word)
		end
	end
	
	return trs
end

function export.interlinear_t(frame)
	local params = {
		[1] = {required = true},
		[2] = {required = true},
		[3] = {required = true},
		[4] = {},

		["tr"] = {},
		["wt"] = {},
		["morph"] = {},
		["top"] = {},
		["header"] = {},
		["sc"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local lang = m_languages.getByCode(args[1]) or m_languages.err(args[1], 1)
	local sc = args["sc"] and (require("Module:scripts").getByCode(args["sc"]) or error("The script code \"" .. args["sc"] .. "\" is not valid.")) or nil

	return export.interlinear_gloss{
		words = split_words(args[2]),
		glosses = split_glosses(args[3]),
		tr = args.tr and split_trs(args.tr) or nil,
		morphology = args.morph and split_words(args.morph) or nil,
		word_translations = args.wt and split_trs(args.wt) or nil,
		headers = args.header and split_words(args.header) or nil,
		translation = args[4],
		top = args.top,

		lang = lang,
		sc = sc
	}
end

return export