Jump to content

Module:jje

From Wiktionary, the free dictionary

This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

local export = {}

local m_str_utils = require("Module:string utilities")

local codepoint = m_str_utils.codepoint
local concat = table.concat
local floor = math.floor
local gmatch = m_str_utils.gmatch
local gsplit = m_str_utils.gsplit
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local min = math.min
local remove = table.remove
local sub = m_str_utils.sub
local u = m_str_utils.char
local upper = m_str_utils.upper

local lang = require("Module:languages").getByCode("jje")
local HangChars = require("Module:scripts").getByCode("Hang"):getCharacters()
local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()

-- Created from Module:ko

-- makes hanjatab automatically
function export.hanjatab()
	local hanja = gsub(mw.title.getCurrentTitle().text, '[^' .. HaniChars .. ']', '')
		local table_head = '<table class="floatright wikitable" style="text-align:center; font-size:small;"><tr><th colspan="' .. 
		len(hanja) .. 
		'" style="font-weight:normal;">[[hanja|Hanja]] in this term</th></tr><tr lang="jje" class="Kore" style="font-size:2em; background:white; line-height:1em;">'
	return table_head .. 
		gsub(hanja, '(.)', '<td style="padding:0.5em;">[[%1#Jeju|%1]]</td>') .. 
		'</tr></table>'
end

-- return only non-hangeul contained in text
function export.remove_hangeul(f)
	local nonhangeul = gsub(f.args[1], '[' .. HangChars .. ']', '')
	return nonhangeul
end

function export.boldify(f)
	local pagename = mw.title.getCurrentTitle().text
	hangul = f.args[1]
	if match(hangul, pagename) and not match(hangul, "'") then
		hangul = gsub(hangul, pagename, "'''" .. pagename .. "'''")
	end
	return hangul
end

function export.usex_hangul(f)
	local pagename = mw.title.getCurrentTitle().text
	hangul = f.args[1]
	if match(hangul, pagename) and not match(hangul, "'") then
		hangul = gsub(hangul, pagename, "'''" .. pagename .. "'''")
	end
	i = 1
	local front, back = '<b>', '</b>'
	for bold in gmatch(hangul, "'''") do
		hangul = gsub(hangul, "'''", (i % 2 == 1 and front or back), 1)
		i = i + 1
	end
	hangul = gsub(hangul, '%^', '')
	return hangul
end

function export.link(arg)
	local args, distances = {}, {}
	local m_pron = require("Module:jje-translit")
	local curr_distance, closest_match = 1000, 0
	local word, translit, definition, hanja, note = false, false, false, false, false
	
	for i = 1, 4, 1 do
		if arg[i] and arg[i] ~= "" then
			insert(args, arg[i])
		end
	end

	local curr_hangul_level, closest_hangul = 0, 0
	for i, parameter in ipairs(args) do
		local _, tentative_hangul_level = gsub(parameter, "[" .. HangChars .. "]", "")
		if tentative_hangul_level > curr_hangul_level then
			curr_hangul_level = tentative_hangul_level
			closest_hangul = i
		end
	end

	if curr_hangul_level > 0 then
		word = args[closest_hangul]
		remove(args, closest_hangul)
	end
	
	local function compute_distance(str1, str2)
		local len1, len2 = #str1, #str2
		local char1, char2, distance = {}, {}, {}
		
		str1:gsub('.', function (c)
			insert(char1, c) end)
		
		str2:gsub('.', function (c)
			insert(char2, c) end)
		
		for i = 0, len1 do
			distance[i] = {}
		end
		
		for i = 0, len1 do
			distance[i][0] = i
		end
		
		for i = 0, len2 do
			distance[0][i] = i
		end
		
		for i = 1, len1 do
			for j = 1, len2 do
				distance[i][j] = min(
					distance[i-1][j] + 1,
					distance[i][j-1] + 1,
					distance[i-1][j-1] + (char1[i] == char2[j] and 0 or 1)
				)
			end
		end
	
		return distance[len1][len2]
	end
	
	local m_link = require("Module:links")
	local test_translit = m_pron.tr_revised(m_link.remove_links(word or arg[1])) or ""
	
	if arg[5] and arg[5] ~= "" then
		note = arg[5]
	elseif arg["note"] and arg["note"] ~= "" then
		note = arg["note"]
	end
	
	if arg["gloss"] then
		definition = arg["gloss"]
		arg["gloss"] = nil
	end
	
	for i, parameter in ipairs(args) do
		if not match(parameter, '[' .. HangChars .. HaniChars .. ']') then
			local tentative_distance = compute_distance(test_translit, parameter)
			if tentative_distance < curr_distance then
				curr_distance = tentative_distance
				closest_match = i
			end
		end
	end

	if curr_distance < 3 and #args > 1 then
		translit = args[closest_match]
		remove(args, closest_match)
	end

	for i, parameter in ipairs(args) do
		if match(parameter, "[" .. HaniChars .. "]") then
			hanja = parameter
			remove(args, i)
		end
	end
	
	if not hanja and not word then
		word = args[1]
		remove(args, 1)
	end
	
	if #args > 1 then
		translit = args[1]
		definition = args[2]
	
	elseif #args > 0 then
		if definition then
			translit = args[1]
		else
			definition = args[1]
		end
	end
	
	if hanja and not match(hanja, "[%[%]]") then
		for hanja_word in gmatch(hanja, "[" .. HaniChars .. "]+") do
			hanja = gsub(hanja, hanja_word, "[[" .. hanja_word .. "]]")
		end
		for hangul_word in gmatch(hanja, "[" .. HangChars .. "]+") do
			hanja = gsub(hanja, hangul_word, "[[" .. hangul_word .. "]]")
		end
		hanja = gsub(hanja, "%[%[%[%[", "[[")
		hanja = gsub(hanja, "%]%]%]%]", "]]")
	end

	if arg["tr"] or translit or test_translit then
		translit = '<span lang="jje-Latn" class="mention-tr tr Latn">' .. 
			(arg["tr"] or translit or test_translit) .. "</span>"
	end
	
	if definition then
		if not match(definition, "^''.+''$") then definition = "“" .. definition .. "”" end
	end
	
	if hanja then
		hanja = '<span lang="jje" class="Kore">' .. m_link.language_link{lang = lang, term = hanja} .. '</span>'
	end

	word = gsub(word, "%^", "")
	
	if not match(word, "[%[%]]") then
		if match(word, "^—.+—$") then
			word = gsub(word, "—(.+)—", "[[-%1-|—%1—]]")
			
		elseif match(word, "^—.+$") then
			word = gsub(word, "—(.+)", "[[-%1|—%1]]")
			
		elseif match(word, "^.+—$") then
			word = gsub(word, "(.+)—", "[[%1다|%1—]]")
		
		elseif match(word, "^%*") then
			word = gsub(word, "%*", "")
		
		else
			word = "[[" .. word .. "]]"
		end
	end
	
	local info = {}
	insert(info, word and (hanja or nil) or nil)
	insert(info, translit or nil)
	insert(info, definition or nil)

	local result = word
		and
			("<span lang=\"jje\" class=\"Kore\">" .. word .. "</span>")
		or
			('<span lang="jje" class="Kore">' .. m_link.language_link{lang = lang, term = hanja} .. '</span>')
	
	if #info > 0 then
		result = result .. " (" .. concat(info, ", ") .. ")"
	end
	
	if note then
		result = result .. " (<i>" .. note .. "</i>)"
	end
	
	return result
end

function export.link_t(frame)
	return export.link(frame:getParent().args)
end

function export.new(frame)
	local title = mw.title.getCurrentTitle().text
	local args = frame:getParent().args
	local poses = { args[1] or "", args[3] or (args[4] and "" or false), args[5] or (args[6] and "" or false) }	
	local defs = { (args[2] ~= "" and args[2]), (args[4] ~= "" and args[4]), (args[6] ~= "" and args[6]) }
	local etym = args["e"] or false
	local head = args["head"] or false
	local cat = args["cat"] or false
	local image = args["pic"] or false
	local caption = args["capt"] or false
	local pedia = args["wp"] or false
	local irreg = args["irreg"] or false
	
	local result = ""
	
	local function genTitle(text)
		local pos_title = {
			[""] = "Noun", ["n"] = "Noun", ["pn"] = "Proper noun", ["propn"] = "Proper noun", ["pron"] = "Pronoun",
			["v"] = "Verb", ["a"] = "Adjective", ["adj"] = "Adjective", ["adv"] = "Adverb",
			["prep"] = "Preposition", ["postp"] = "Postposition", ["conj"] = "Conjunction",
			["part"] = "Particle", ["suf"] = "Suffix",
			["prov"] = "Proverb", ["id"] = "Idiom", ["ph"] = "Phrase", ["intj"] = "Interjection", ["interj"] = "Interjection",
			["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation",
			["det"] = "Determiner", ["deter"] = "Determiner", ["root"] = "Root",
		};
		return pos_title[text] or upper(sub(text, 1, 1)) .. sub(text, 2, -1)
	end
	
	local function genHead(text)
		local pos_head = {
			[""] = "noun", ["n"] = "noun", ["pn"] = "proper noun", ["propn"] = "proper noun", ["v"] = "verb", ["a"] = "adj", ["adv"] = "adv",
			["postp"] = "pos|post", ["conj"] = "pos|con", ["part"] = "pos|particle", ["pron"] = "pos|pronoun",
			["prov"] = "proverb", ["id"] = "pos|idiom", ["ph"] = "pos|phrase", ["intj"] = "interj",
			["abb"] = "pos|abbr", ["cl"] = "pos|cls", ["det"] = "det", ["deter"] = "det", ["root"] = "root", ["num"] = "num",
		};
		return pos_head[text] or "pos|" .. text
	end
	
	local function other(class, title, args, level)
		local code = ""
		if args[class] then
			code = code .. "\n\n" .. level .. title .. level .. "\n* {{l|jje|" .. gsub(args[class], ":", "|") .. "}}"
			i = 2
			while args[class .. i] do
				code = code .. "\n* {{l|jje|" .. gsub(args[class .. i], ":", "|") .. "}}"
				i = i + 1
			end
		end
		return code
	end
	
	if args["2e"] or args["2h"] or args["2nat"] or args["2ee"] or args["2c1"] or args["2p"] or args["multiEtym"] then
		multiEtym = true
	end
	
	local function iterate_param(args, genPos, etymNo)
		if genPos == "proper noun" then args[(etymNo > 1 and etymNo or "") .. "cap"] = "y" end
		text = ""
		for _, arg in ipairs( { "l", "com", "nn", "ui", "nobc", "cap", "ni", "bcred", "a", "uie" } ) do
			if etymNo > 1 then arg_temp = etymNo .. arg else arg_temp = arg end
			if args[arg_temp] then text = text .. "|" .. arg .. "=" .. args[arg_temp] end
		end
		return text
	end
	
	local function add_etym(args, etymNo)
		etymText = ""
		n = etymNo > 1 and etymNo or ""
		if args[n.."e"] then etymText = etymText .. args[n.."e"]

		-- TO DO: Create jje-etym-sino & jje-etym-native Templates

		elseif args[n.."h"] then etymText = etymText .. "{{ko-etym-sino|" .. args[n.."h"] .. (args[n.."he"] and "|" .. args[n.."he"] or "") .. "}}."
		elseif args[n.."nat"] then etymText = etymText .. "{{ko-etym-native|" .. gsub(args[n.."nat"], ",", "|") .. "}}"
		elseif args[n.."ee"] then etymText = etymText .. "From {{bor|jje|" .. (args[n.."el"] or "en") .. "|" .. args[n.."ee"] .. "}}."
		elseif args[n.."c1"] then etymText = etymText .. 
			"{{compound|jje|" .. args[n.."c1"] .. (args[n.."hj1"] and "(" .. args[n.."hj1"] .. ")" or "") .. (args[n.."t1"] and "|t1=" .. args[n.."t1"] or "") ..	
			"|" .. args[n.."c2"] .. (args[n.."hj2"] and "(" .. args[n.."hj2"] .. ")" or "") .. (args[n.."t2"] and "|t2=" .. args[n.."t2"] or "") ..	
			(args[n.."c3"] and "|" .. args[n.."c3"] .. (args[n.."hj3"] and "(" .. args[n.."hj3"] .. ")" or "") .. (args[n.."t3"] and "|t3=" .. args[n.."t3"] or "") or "") .. 	
			(args[n.."c4"] and "|" .. args[n.."c4"] .. (args[n.."hj4"] and "(" .. args[n.."hj4"] .. ")" or "") .. (args[n.."t4"] and "|t4=" .. args[n.."t4"] or "") or "") .. "}}."	
		elseif match(title, "[ᄒᆞ뒈롭]다$") then	
			local suffix_data = {	
				['ᄒᆞ다'] = "|t2=to do|pos2=[[light verb]] deriving " .. (genHead(poses[1]) == "adj" and "adjectives" or "active verbs"),	
				['뒈다'] = "|t2=to become|pos2=[[light verb]] deriving " .. (genHead(poses[1]) == "adj" and "adjectives" or "passive verbs"),	
				['롭다'] = "|pos2=suffix deriving adjectives",
			}

			etymText = etymText
				.. "{{af|jje"
				.. "|" .. sub(title, 1, -3) .. (args[n.."hj1"] and "(" .. args[n.."hj1"] .. ")" or "")
				.. "|-" .. sub(title, -2, -1)
				.. (args["c1r"] and "|pos1=root" or "")
				.. (args["t1"] and "|t1=" .. args["t1"] or "")
				.. suffix_data[sub(title, -2, -1)]
				.. "}}"
				.. "."
		elseif match(title, "시키다$") or match(title, "스럽다$") then
			etymText = etymText
				.. "{{af|jje"
				.. "|" .. sub(title, 1, -4) .. (args[n.."hj1"] and "(" .. args[n.."hj1"] .. ")" or "")
				.. "|-" ..  sub(title, -3, -1)
				.. (args["c1r"] and "|pos1=root" or "")
				.. (args["t1"] and "|t1=" .. args["t1"] or "")
				.. "}}"
				.. "."
		end
		return etymText
	end
	
	result = result .. "==Jeju=="
	if pedia then result = result .. "\n{{wp|lang=ko" .. (pedia ~= "y" and "|" .. pedia or "") .. "}}" end
	if image then result = result .. "\n[[File:" .. image .. "|thumb|right|250px|" .. (caption or title) .. ".]]" end
	result = result .. other("alt", "Alternative forms", args, "===")
	
	if match(title, "[ᄒᆞ뒈롭]다$") or match(title, "시키다$") or match(title, "스럽다$") then autoEtym = true end
	if args["e"] or args["2e"] or args["3e"] or args["h"] or args["nat"] or args["ee"] or args["c1"] or autoEtym or multiEtym then
		etym = "\n\n===Etymology" .. (multiEtym and " 1" or "") .. "===\n"
		etym = etym .. add_etym(args, 1)
	end
	
	if etym then result = result .. etym end
	level = multiEtym and "====" or "==="
	result = result .. other("1alt", "Alternative forms", args, "====")
	
	result = result .. "\n\n" .. level .. "Pronunciation" .. level ..
	"\n{{jje-IPA" .. iterate_param(args, genHead(poses[1]), 1) .. "}}"
	if genHead(poses[1]) == "root" then defs[1] = "{{jje-root of|" .. defs[1] .. "}}" end
	result = result .. "\n\n" .. level .. genTitle(poses[1]) .. level .. "\n{{jje-" .. genHead(poses[1]) ..
	(head and ("|head=" .. head) or "") .. (args["h"] and ("|hanja=" .. args["h"]) or "") ..
	(irreg and "|irreg=y" or "") .. "}}\n\n# " .. (defs[1] or "{{rfdef|jje}}")
	
	local function add_der(args, etymNo, hanja)
		n = etymNo > 1 and etymNo or ""
		local translDer = { ["h"] = "ᄒᆞ다", ["d"] = "뒈다", ["s"] = "시키다" }
		if args[n .. "der"] and gsub(args[n .. "der"], "[sdh]", "") == "" then
			i = 1
			for ch in gsplit(args[n .. "der"], "") do
				args[n .. "der" .. (i == 1 and "" or i)] = title .. (hanja and "(" .. hanja .. ")" or "") .. translDer[ch]
				i = i + 1
			end
		end
		return args
	end
	
	args = add_der(args, 1, args["h"] or false)
	result = result .. other("syn", "=Synonyms=", args, level)
	result = result .. other("ant", "=Antonyms=", args, level)
	result = result .. other("der", "=Derived terms=", args, level)
	result = result .. other("rel", "=Related terms=", args, level)
	result = result .. other("also", "=See also=", args, level)
	-- Revisit once jje-conj module is eventually made
	--[[if genHead(poses[1]) == "adj" or genHead(poses[1]) == "verb" then
		result = result .. "\n\n=" .. level .. "Conjugation" .. level .. "=\n{{jje-conj/" .. genHead(poses[1]) .. (irreg and "|irreg=y" or "") .. "}}"
	end]]
	
	if poses[2] then
		if multiEtym then
			result = result .. "\n\n===Etymology 2===\n" .. add_etym(args, 2)
			level = "===="
			result = result .. other("2alt", "Alternative forms", args, level)
			
			result = result .. "\n\n" .. level .. "Pronunciation" .. level .. 
			"\n{{jje-IPA" .. iterate_param(args, genHead(poses[2]), 2) .. "}}"
			if genHead(poses[2]) == "root" then defs[2] = "{{jje-root of|" .. defs[2] .. "}}" end
			result = result .. "\n\n" .. level .. genTitle(poses[2]) .. level .. "\n{{jje-" .. genHead(poses[2]) ..
			(head and ("|head=" .. head) or "") .. (args["2h"] and ("|hanja=" .. args["2h"]) or "") .. "}}\n\n# " .. (defs[2] or "{{rfdef|jje}}")
			args = add_der(args, 2, args["2h"] or false)
			result = result .. other("2syn", "=Synonyms=", args, level)
			result = result .. other("2ant", "=Antonyms=", args, level)
			result = result .. other("2der", "=Derived terms=", args, level)
			result = result .. other("2rel", "=Related terms=", args, level)
			result = result .. other("2also", "=See also=", args, level)
			-- Revisit once jje-conj module is eventually made
			--[[if genHead(poses[2]) == "adj" or genHead(poses[2]) == "verb" then
				result = result .. "\n\n=" .. level .. "Conjugation" .. level .. "=\n{{jje-conj/" .. genHead(poses[2]) .. (irreg and "|irreg=y" or "") .. "}}"
			end]]

		else
			result = result .. "\n\n===" .. genTitle(poses[2]) .. "===\n{{jje-" .. genHead(poses[2]) ..
			(head and ("|head=" .. head) or "") .. (args["2h"] and ("|hanja=" .. args["2h"]) or "") .. 
			"}}\n\n# " .. (defs[2] or "{{rfdef|jje}}")
			-- Revisit once jje-conj module is eventually made
			--[[if genHead(poses[2]) == "adj" or genHead(poses[2]) == "verb" then
				result = result .. "\n\n====Conjugation====\n{{ko-conj/" .. genHead(poses[2]) .. (irreg and "|irreg=y" or "") .. "}}"
			end]]
		end
	end
	
	if poses[3] then
		if multiEtym then
			result = result .. "\n\n===Etymology 3===\n" .. add_etym(args, 3)
			level = "===="
			result = result .. other("3alt", "Alternative forms", args, level)
			
			result = result .. "\n\n" .. level .. "Pronunciation" .. level .. 
			"\n{{jje-IPA" .. iterate_param(args, genHead(poses[3]), 3) .. "}}"
			if genHead(poses[3]) == "root" then defs[3] = "{{jje-root of|" .. defs[3] .. "}}" end
			result = result .. "\n\n" .. level .. genTitle(poses[3]) .. level .. "\n{{jje-" .. genHead(poses[3]) ..
			(head and ("|head=" .. head) or "") .. (args["3h"] and ("|hanja=" .. args["3h"]) or "") .. "}}\n\n# " .. (defs[3] or "{{rfdef|jje}}")
			args = add_der(args, 3, args["3h"] or false)
			result = result .. other("3syn", "=Synonyms=", args, level)
			result = result .. other("3ant", "=Antonyms=", args, level)
			result = result .. other("3der", "=Derived terms=", args, level)
			result = result .. other("3rel", "=Related terms=", args, level)
			result = result .. other("3also", "=See also=", args, level)
			-- Revisit once jje-conj module is eventually made
			--[[if genHead(poses[3]) == "adj" or genHead(poses[3]) == "verb" then
				result = result .. "\n\n=" .. level .. "Conjugation" .. level .. "=\n{{ko-conj/" .. genHead(poses[3]) .. (irreg and "|irreg=y" or "") .. "}}"
			end]]
		else
			result = result .. "\n\n===" .. genTitle(poses[3]) .. "===\n{{jje-" .. genHead(poses[3]) ..
			(head and ("|head=" .. head) or "") .. (args["3h"] and ("|hanja=" .. args["3h"]) or "") .. 
			"}}\n\n# " .. (defs[3] or "{{rfdef|jje}}")
			-- Revisit once jje-conj module is eventually made
			--[[if genHead(poses[3]) == "adj" or genHead(poses[3]) == "verb" then
				result = result .. "\n\n====Conjugation====\n{{ko-conj/" .. genHead(poses[3]) .. (irreg and "|irreg=y" or "") .. "}}"
			end]]
		end
	end
	
	if cat then
		result = result .. "\n\n{{C|jje|" .. cat .. "}}"
	end
	
	return result
end

function export.decompose_jamo(syllable)
	if not match(syllable, "[가-힣]") then
		if match(syllable, "[ᄀ-ᄒ]") then return { initial = syllable, vowel = "Ø", final = "Ø" }
		elseif match(syllable, "[ᅡ-ᆢ]") then return { initial = "Ø", vowel = syllable, final = "Ø" }
		elseif match(syllable, "[ᆨ-ᇝ]") then return { initial = "Ø", vowel = "Ø", final = syllable }
		elseif match(syllable, "[ퟍ]") then return { initial = "Ø", vowel = "Ø", final = syllable }
		elseif match(syllable, "[ᆦힲ]") then return { initial = "Ø", vowel = syllable, final = "Ø" }
		elseif match(syllable, "[ㄱ-ㆎ]") then return { initial = "Ø", vowel = "Ø", final = syllable }
		else return { initial = "Ø", vowel = " ", final = "X" } end
	end
	local cp = codepoint(syllable)
	if not cp then return { "", "", "" } end
	local relative_cp = cp - 0xAC00
	local jongseong = relative_cp % 28
	local jungseong = floor((relative_cp % 588) / 28)
	local choseong = floor(relative_cp / 588)
	choseong, jungseong, jongseong = 
		u(0x1100 + choseong), 
		u(0x1161 + jungseong), 
		jongseong ~= 0 and u(0x11A7 + jongseong) or ""
	return { initial = choseong, vowel = jungseong, final = jongseong }
end

return export