Jump to content

Module:dialect synonyms

From Wiktionary, the free dictionary

See {{dialect synonyms}}.


--[==[
mod: intitle:dial -intitle:data -intitle:documentation -intitle:zh
Module:zh-Christian-syn
]==]

local export = {}
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_scripts = require("Module:scripts")
local m_table = require("Module:table")

local langs = {}

local elements = {}

elements.table = function(data)
	return mw.html.create("table")
		:addClass("dial-syn")
		:addClass("wikitable")
		:addClass("mw-collapsible")
		:addClass("mw-collapsed")
	:done()
end

elements.head_a = function(data)
	return mw.html.create("tr")
		:tag("th")
			:attr("colspan", #data.columns)
			:css("background-color", data.colour)
			:wikitext(data.title)
		:done()
	:done()
end

elements.head_b = function(data)
	local tr = mw.html.create("tr")
	:done()

	for i, _ in ipairs(data.columns) do
		tr
			:tag("th")
				:css("background-color", data.colour)
				:wikitext(data.columns[i])
			:done()
		:done()
	end

	return tr
end

elements.row_view_map = function(data)
	local tr = mw.html.create("tr")
	:done()

	tr
		:tag("td")
			:attr("colspan", #data.columns)
			:css("text-align", "right")
			:css("background-color", data.colour)
			:wikitext(("[[%s|view map]]; [[%s|edit data]]"):format(data.view_map, data.edit_link))
		:done()
	:done()

	return tr
end

local function prompt_create_data(dpath, preload_path)
	local url = mw.uri.fullUrl(dpath, {action='edit', preload=preload_path})
	return ("→Create [%s %s]?"):format(tostring(url), dpath)
end

function export.format_word(data_variety, data)
	-- XXX: $ety_n assumes that $language is the first section on a page
	-- and that we can safely say "foo_2" and link to [[foo#Etymology_2]].
	-- this is a fucking stupid assumption,
	-- and we should be using [[T:senseid]].
	--[==[
		data = {"Word", ety_n=5, tr="tr", q="qual"}
		data = "Word_5/tr:qual"
	]==]

	if type(data) == 'string' then
		data = { word = data }
		local temp_a, temp_b = mw.ustring.match(data.word, '^(.+):(.+)$')
		if temp_a then data.word, data.q = temp_a, temp_b end
		local temp_a, temp_b = mw.ustring.match(data.word, '^(.+)/(.+)$')
		if temp_a then data.word, data.tr = temp_a, temp_b end
		local temp_a, temp_b = mw.ustring.match(data.word, '^(.+)_(%d+)$')
		if temp_a then data.word, data.ety_n = temp_a, temp_b end
	end

	-- re-use language objects if possible
	-- $langs is a global
	if not langs[data_variety.code] then
		langs[data_variety.code] = m_languages.getByCode(data_variety.code, nil, true)
	end

	data.lang = langs[data_variety.code]
	data.term, data.alt = data.word, data.word
	if data.ety_n then data.term = ('%s#Etymology %s'):format(data.term, data.ety_n) end
	if data_variety.nolink then data.term = nil end
	data.alt = data.alt .. (data.ety_n and ('<sub>%s</sub>'):format(data.ety_n) or '')

	-- XXX: this code probably shouldn't be located in this module, or at least wholly in this function
	if (data_variety.code_main == 'ar') then
		-- verify script validity
		local valid_script = nil
		local best_script = m_scripts.findBestScriptWithoutLang(data.term):getCode()
		for _,sc in ipairs(data_variety.scripts) do
			if best_script == sc then
				valid_script = sc
				break
			end
		end
		assert(valid_script, ("Invalid script for term `%s` of the dialect `%s`"):format(data.term,data_variety.name))
		data.sc = m_scripts.getByCode(valid_script)

	-- [[Module:ko-dial-syn]]
	-- [[Template:ko-dial-syn#To-do]]
	elseif (data_variety.code_main == 'ko') then
		-- add IPA font for 계림유사
		if (data_variety.name == 'Old Korean-JLLS') and (data.tr) then
			data.tr = tostring(mw.html.create('span')
				:wikitext(data.tr)
				:addClass('IPA')
			:done())
		end

		local bool_no_auto_translit = (
			({['oko']=1, ['okm']=1, ['jje']=1})[data_variety.code]
		)
		local bool_ae_e_merger = (
			(data_variety.name == 'Seoul') or
			(data_variety.code == 'ko-se' and not ({['Jinju']=1, ['Sancheong']=1, ['Hamyang']=1, ['Namhae']=1, ['Hadong']=1, ['Sacheon']=1, ['Goseong']=1, ['Tongyeong']=1})[data_variety.name]) or
			(data_variety.name == 'Samcheok') or
			(data_variety.name == 'Gochang') or
			(data_variety.parent.name == 'South Jeolla' and not ({['Gurye']=1, ['Gwangyang']=1, ['Goheung']=1, ['Boseong']=1})[data_variety.name])
		)
		local bool_eo_eu_merger = (
			(data_variety.code == 'ko-se' and not ({['Uljin']=1, ['Bonghwa']=1, ['Hamyang']=1})[data_variety.name])
		)
		local bool_nasal = (
			(data_variety.code == 'ko-se') or
			(data_variety.parent.name == 'Yeongdong') or
			(data_variety.parent.name == 'South Hwanghae') or
			(data_variety.parent.name == 'North Hwanghae') or
			(data_variety.code == 'ko-nw') or
			(data_variety.code == 'ko-ne')
		)
		local bool_pitch = (
			({['ko-ne']=1, ['ko-se']=1, ['ko-yuk']=1})[data_variety.code] or
			({['Donghae']=1, ['Samcheok']=1, ['Taebaek']=1, ['Yeongwol']=1, ['Gangneung']=1})[data_variety.name]
		)
		local bool_length = true

		if (not data.tr) and (not bool_no_auto_translit) and (bool_ae_e_merger or bool_eo_eu_merger or bool_pitch or bool_length) then
			local m_ko_translit = require("Module:ko-translit")
			local term_for_translit = data.term

			-- transform text in preparation for transliteration
			if (bool_ae_e_merger) then
				term_for_translit = mw.ustring.toNFC(
					mw.ustring.gsub(mw.ustring.toNFD(term_for_translit), "[ᅢᅫ]", {["ᅢ"]="ᅦ", ["ᅫ"]="ᅰ"})
				)
			end
			if (bool_eo_eu_merger) then
				term_for_translit = mw.ustring.toNFC(
					mw.ustring.gsub(mw.ustring.toNFD(term_for_translit), "ᅥ", "ᅳ")
				)
			end
			if (bool_nasal) then
				data.term = mw.ustring.toNFC(
					mw.ustring.gsub(mw.ustring.toNFD(data.term), "~", "ᆼ")
				)
				data.alt = mw.ustring.toNFC(
					mw.ustring.gsub(mw.ustring.toNFD(data.alt), "~", "ᆼ")
				)
			end
			if (bool_pitch) then
				data.term = mw.ustring.gsub(data.term, "'", "")
				data.alt = mw.ustring.gsub(data.alt, "'", "")

				-- assume pitch accent
				if not string.find(term_for_translit, "'") then
					-- gyeongsang
					if data_variety.code == 'ko-se' then
						term_for_translit = mw.ustring.gsub(term_for_translit, "([^~@][~@]*)([^~@][~@]*)$", "'%1%2") -- penultimate
					-- hamgyong
					elseif data_variety.code == 'ko-ne' then
						term_for_translit = mw.ustring.gsub(term_for_translit, "([^~@][~@]*)$", "'%1") -- final
					end
				end

				if (bool_length) then
					term_for_translit = mw.ustring.gsub(term_for_translit, "'@", "@'") -- fix a weird markup order, just in case
				end
				term_for_translit = mw.ustring.gsub(term_for_translit, "('?)([^~@][~@]*)", function(bool_is_high, syllable)
					return syllable .. (bool_is_high ~= "" and "↑" or "↓")
				end)
			end
			if (bool_length) then
				-- XXX: ":" is currently used for word notes; this code tentatively uses "@" instead for development
				data.term = mw.ustring.gsub(data.term, "@", "")
				data.alt = mw.ustring.gsub(data.alt, "@", "")
			end

			-- transliterate
			data.tr = m_ko_translit.tr(term_for_translit)
			-- order diacritics for aesthetics
			data.tr = mw.ustring.gsub(data.tr, "([@↓↑~]+)", function(d)
				return ""
					.. (mw.ustring.match(d, "~") or "")
					.. (mw.ustring.match(d, "[↓↑]") or "")
					.. (mw.ustring.match(d, "@") or "")
			end)
			-- digraph vowels; closed syllables; other modified syllables
			--                                   1 v1     2 v2      3 c                       4         5
			data.tr = mw.ustring.gsub(data.tr, "([aeiou])([aeiou]?)([bcdfghjklmnpqrstvwxyz]*)([↓↑~]+)(@?)", "%1%4%2%5%3")
			-- transform
			data.tr = mw.ustring.gsub(data.tr, "[@↓↑~]", {["@"]=":", ["↑"]="́", ["↓"]="̀", ["~"]="̃"})
		end
	end

	-- create full link
	-- place annotation after the word due to
	-- notes such as those found at [[舅父]],
	-- which appear to rely on direction
	word = m_links.full_link(data)
	if data.q then
		word = word .. " " .. require("Module:qualifier").format_qualifier({ data.q })
	end

	return word
end

local function format_syns(data_variety)
	local words_formatted = {}
	for i, text in ipairs(data_variety.syns) do
		table.insert(words_formatted, export.format_word(data_variety, text))
	end
	return words_formatted
end

function export.show(frame)
	local params = {
		[1] = { required = true, default = "und" },
		[2] = { default = mw.title.getCurrentTitle().text },
		['dpath syns'] = {}, -- for testing: use arbitrary module. like [[Module:sandbox]] or something
	}
	local args = require("Module:parameters").process(frame:getParent().args, params, nil, "dialect synonyms", "show")

	-- data modules
	local dpath = "Module:dialect synonyms"
	local dpath_mul = dpath .. "/" .. "mul"
	local dpath_lang = dpath .. "/" .. args[1]
	local dpath_syns = dpath_lang  .. "/" .. args[2]
	if args['dpath syns'] then dpath_syns = args['dpath syns'] end
	local dpath_map = "Template:dialect synonym map/" .. args[1] .. "/" .. args[2]
	local data_mul = require(dpath_mul)
	local data_lang = mw.title.new(dpath_lang).exists and require(dpath_lang) or nil
	local data_syns = mw.title.new(dpath_syns).exists and require(dpath_syns) or nil

	-- prompt creation of missing data modules
	if (not data_lang) then
		return prompt_create_data(dpath_lang, 'Module:dialect synonyms/und')
	end
	if (not data_syns) then
		return prompt_create_data(dpath_syns, 'Module:dialect synonyms/' .. args[1] .. '/')
	end

	-- throw error if synonyms table does not even exist
	if (not data_syns.syns) and (not data_syns.list) then
		error(('Could not find .syns in [[%s]].'):format(dpath_syns))
	end

	-- create $data_lang.title fallback
	if not data_lang.title then
		data_lang.title = "Dialectal synonyms of %s"
	end
	-- create $data_lang.columns fallback
	if not data_lang.columns then
		data_lang.columns = {"Variety", "Location", "Words"}
	end
	-- create $data_lang.notes fallback
	if not data_lang.notes then
		data_lang.notes = {}
	end

	-- TEMP: backwards compatibility with the original [[Module:zh-dial-syn]] format
	if (not data_syns.syns) and (data_syns.list) then
		data_syns.syns = data_syns.list
		data_syns.list = nil
	end
	if (not data_syns.gloss) and (data_syns.syns.meaning) then
		data_syns.gloss = data_syns.syns.meaning
		data_syns.syns.meaning = nil
	end
	if (not data_syns.note) and (data_syns.syns.note) then
		data_syns.note = data_syns.syns.note
		data_syns.syns.note = nil
	end

	-- initialize $dial_syn_table
	local dial_syn_table = elements.table()
	:done()

	-- initialize $lang, $main_word, $main_word_link
	-- for $lang="zh:regional", the data modules are named in English,
	-- so the $lang of $main_word is "en";
	-- and the language of the words is "zh", not "zh:regional"
	local lang, lang_qualifier = string.match(args[1], '^(.+):(.+)$')
	lang = lang or args[1]
	local main_word = args[2]
	main_word = mw.ustring.gsub(main_word, "%-[%d]$", "") -- "媽媽-2"→"媽媽"
	local main_word_link = m_links.full_link({
		lang = m_languages.getByCode(lang_qualifier and "en" or lang),
		term = main_word,
		gloss = data_syns.gloss,
	}, "term")

	-- create and add headers for $dial_syn_table
	dial_syn_table
		:node(elements.head_a({
			columns = data_lang.columns,
			title = (data_lang.title):format(main_word_link),
			colour = data_mul.colours["head_a"],
		}))
		:node(elements.row_view_map({
			view_map = dpath_map,
			edit_link = dpath_syns,
			columns = data_lang.columns,
			colour = data_mul.colours["head_a"], -- ?
		}))
		:node(elements.head_b({
			columns = data_lang.columns,
			colour = data_mul.colours["head_a"],
		}))
	:done()

	-- add words to tree
	-- trim tree
	-- ----
	-- enter data tree
	local function recurse(data_variety)
		if #data_variety == 0 then
			-- add words to data tree
			-- first word is an empty string = no words entered
			if (data_syns.syns[data_variety.name]) and (data_syns.syns[data_variety.name][1] ~= '') then
				data_variety.syns = data_syns.syns[data_variety.name]
			end

			-- word fallback with $data_variety.default
			if (data_variety.default) and (not data_variety.syns) then
				if data_variety.default == 'module name' then
					data_variety.syns = {args[2]}
				end
			end

			-- word is n/a
			if (data_variety.syns) and (data_variety.syns[1] == '-') then
				data_variety.code = 'en'
				data_variety.nolink = true
				data_variety.syns[1] = {word = "<i><small>[N/A]</small></i>"}
			end

			-- erase barren leaf from tree
			if (not data_variety.syns) then
				--data_variety = nil
				return
			end

			-- format words
			data_variety.syns = format_syns(data_variety)

			-- prepare header text
			if (not data_variety.text_display) then
				data_variety.text_display = data_variety.english or data_variety.name
				data_variety.text_display = mw.ustring.gsub(data_variety.text_display, '(%(.+%))', '<small>%1</small>')

				if data_variety.code_main == 'ar' and data_variety.wikidata then
					-- XXX: also probably shouldn't be in this module
					-- parenthesise arabic/alt name
					local alt_name = nil
					if not data_variety.suppress_arabic then
						alt_name = mw.wikibase.getLabelByLang(data_variety.wikidata, 'ar')
					elseif data_variety.suppress_arabic ~= 'Y' then
						alt_name = data_variety.suppress_arabic
					end
					alt_name = alt_name and (' (%s)'):format(alt_name) or ''
					data_variety.text_display = data_variety.text_display .. alt_name

					local en_site_link = data_variety.link or mw.wikibase.getSitelink(data_variety.wikidata, 'enwiki')
					--local ar_site_link = nil
					--if not en_site_link then
						--ar_site_link = mw.wikibase.getSitelink(data_variety.wikidata, 'arwiki')
					--end
					site_link = en_site_link
					--if ar_site_link then
						--site_link = 'ar:'..ar_site_link
					--end
					if site_link then
						data_variety.text_display = ('[[w:%s|%s]]'):format(site_link, data_variety.text_display)
					end
				elseif (data_variety.link) then
					data_variety.text_display = ('[[w:%s|%s]]'):format(data_variety.link, data_variety.text_display)
				end
			end
		else
			-- prepare header text
			if (data_variety.name) and (not data_variety.text_display) then
				data_variety.text_display = data_variety.english or data_variety.name
				data_variety.text_display = mw.ustring.gsub(data_variety.text_display, '(%(.+%))', '<small>%1</small>')
				if (data_variety.link) then
					data_variety.text_display = ('[[w:%s|%s]]'):format(data_variety.link, data_variety.text_display)
				end
			end

			-- add fallback iso code
			if not data_variety.code then
				data_variety.code = (data_variety.parent and data_variety.parent.code or lang)
			end

			-- search for tree leaves
			for i, _ in ipairs(data_variety) do
				data_variety[i].parent = data_variety
				data_variety[i].code_main = args[1]
				data_variety[i].code = (data_variety[i].code or data_variety.code)
				data_variety[i].colour = (data_variety[i].colour or data_variety.colour)
				-- recurse
				data_variety[i] = recurse(data_variety[i])
				-- count
				if data_variety[i] then
					if data_variety[i].leaf_count then
						data_variety.leaf_count = (data_variety.leaf_count or 0) + data_variety[i].leaf_count
					else
						data_variety.leaf_count = (data_variety.leaf_count or 0) + 1
					end
				end
			end
			-- erase now-barren sub-tree from tree
			if #m_table.numKeys(data_variety) == 0 then
				data_variety = nil
			end
		end
		return data_variety
	end
	data_lang.varieties = recurse(data_lang.varieties)

	-- add rows to $dial_syn_table
	-- ----
	-- enter data tree
	local function recurse(data_variety, tr)
		-- empty table with no words,
		-- do not try to look for keys, do not try to count keys
		if not data_variety then
			return
		end
		-- if there are no sub-groups
		if #m_table.numKeys(data_variety) == 0 then
			if not tr then
				tr = mw.html.create('tr')
				:done()
			end

			-- add the location + word cells
			tr
				:tag('th')
					:attr('colspan', data_variety.colspan)
					:css("background-color", data_variety.colour)
					:tag('span') -- for CSS `position: sticky`
						:wikitext(data_variety.text_display)
					:done()
				:done()
				:tag('td')
					:css("background-color", data_variety.colour)
					:wikitext(table.concat(data_variety.syns, ', '))
				:done()
			:done()

			dial_syn_table
				:node(tr)
			:done()
		else
			sibling_i = 1
			-- init the row and add headers.
			-- a group header (the thing that has rowspan) must belong to a tr of a leaf, =
			-- but only if that leaf is the first leaf among its siblings.
			-- imagine the formatting of native mediawiki tables
			-- (not tr): as when recursing for the first time, or when it has been 'nil'-ed
			-- (data_variety.parent): don't generate a th for the root. empty, useless and unwanted.
			if (not tr) and (data_variety.parent) then
				tr = mw.html.create('tr')
				:done()
			end
			if (tr) and (sibling_i == 1) then
				tr
					:tag('th')
						:attr('rowspan', data_variety.leaf_count)
						:attr('colspan', data_variety.colspan)
						:css("background-color", data_variety.colour)
						:tag('span') -- for CSS `position: sticky`
							:wikitext(data_variety.text_display)
						:done()
					:done()
				:done()
			end
			-- search for tree leaves
			-- use sparseIpairs() because the deletion of things above leaves gaps in the array
			-- and lua does not continue looping if i + 1 does not exist
			-- use sibling_i because sparseIpairs() provides the original i, not necessarily beginning at 1
			for i, _ in m_table.sparseIpairs(data_variety) do
				if sibling_i > 1 then
					tr = nil
				end
				data_variety[i] = recurse(data_variety[i], tr)
				sibling_i = sibling_i + 1
			end
		end
		return data_variety
	end
	recurse(data_lang.varieties)

	-- add notes to $dial_syn_table
	-- ----
	-- insert synonyms note
	if data_syns.note and data_syns.note ~= "" then
		table.insert(data_lang.notes, 1, data_syns.note)
	end
	-- add notes to $dial_syn_table
	for _, note in ipairs(data_lang.notes) do
		dial_syn_table
			:tag('tr')
				:tag('td')
					:attr("colspan", #data_lang.columns)
					:wikitext(note)
				:done()
			:done()
		:done()
	end

	-- return
	return tostring(dial_syn_table) .. require("Module:TemplateStyles")("Template:dialect synonyms/styles.css")
end

return export