Jump to content

Module:ja-acc-multi

From Wiktionary, the free dictionary

See Template:ja-acc-multi for usage and examples.


local loc_link_str = "[[w:Tokyo dialect|Tokyo]]"
local heiban_link_str = "[[平板型|Heiban]]"
local atamadaka_link_str = "[[頭高型|Atamadaka]]"
local odaka_link_str = "[[尾高型|Odaka]]"
local nakadaka_link_str = "[[中高型|Nakadaka]]"

local export = {}

local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local gsplit = m_str_utils.gsplit

local lang = require("Module:languages").getByCode("ja")
local m_accent = require("Module:accent qualifier")

local kana_to_romaji = require("Module:Hrkt-translit").tr

local range = mw.loadData("Module:ja/data/range")
local a_kana = range.vowels.a
local i_kana = range.vowels.i
local u_kana = range.vowels.u
local e_kana = range.vowels.e
local o_kana = range.vowels.o
local n_kana = range.vowels.n
local submoraic_kana = range.submoraic_kana
local submoraic_kana_pattern = "[" .. submoraic_kana .. "]"

--from [[Module:ja-pron]]
local ref_template_name_data = {
	["DJR"] = "R:Daijirin",
	["DJR4"] = "R:Daijirin4",
	["DJS"] = "R:Daijisen",
	["KDJ"] = "R:Kokugo Dai Jiten",
	["NHK"] = "R:NHK Hatsuon",
	["NHK16"] = "R:NHK2016",
	["NKD2"] = "R:Nihon Kokugo Daijiten 2 Online",
	["SMK2"] = "R:Shinmeikai2",
	["SMK5"] = "R:Shinmeikai5",
	["SMK7"] = "R:Shinmeikai7",
	["SMK8"] = "R:Shinmeikai8",
	["SKK8"] = "R:Sankoku8",
	["ZAJ"] = "R:Zenkoku Akusento Jiten",
	["JEL"] = "R:Kenkyusha JEL Pocket",
	["JAC"] = "R:ja:JAccent",
}

--from [[Module:ja-pron]]
local function add_acc_refs(frame, text)
	local concat = table.concat
	local insert = table.insert
	
	local output = {}
	for ref_name in gsplit(text, ",") do
		mw.log(ref_name)
		local ref_template_name = ref_template_name_data[ref_name]
		if ref_template_name then
			insert(output, frame:extensionTag("ref", "{{" .. ref_template_name .. "}}", {name = ref_name}))
		elseif match(ref_name, "ref") then
			insert(output, frame:preprocess(ref_name))
		else
			-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-pron/unrecognized ref]]
			require("Module:debug").track("ja-pron/unrecognized ref")
		end
	end
	return concat(output)
end

local function string_to_positive_integer(str)
	if not string.match(str, "^%d+$") then
		error("String \"" .. str .. "\" is not a positive integer")
	end
	return tonumber(str)
end

--from [[Module:ja-pron]]
local romaji_high_replacement_table = {
	["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú",
	["ā"] = "áá", ["ē"] = "éé", ["ī"] = "íí", ["ō"] = "óó", ["ū"] = "úú" }
local romaji_low_replacement_table = {
	["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù",
	["ā"] = "àà", ["ē"] = "èè", ["ī"] = "ìì", ["ō"] = "òò", ["ū"] = "ùù" }
local function kana_to_romaji_acc(kana, high, dev)
	local romaji = kana_to_romaji(kana)
	if high then
		romaji = gsub(romaji, ".", romaji_high_replacement_table)
		romaji = gsub(gsub(romaji, "n([bcdfghjkmnprstvw%'z ])", "ń%1"), "n$", "ń")
	else
		romaji = gsub(romaji, ".", romaji_low_replacement_table)
		romaji = gsub(gsub(romaji, "n([bcdfghjkmnprstvw%'z ])", "ǹ%1"), "n$", "ǹ")
	end
	if dev then
		romaji = gsub(romaji, "[aeiounáéíóúńàèìòùǹ]", "<del>%1</del>")
	end
	return romaji
end

function export.show(frame)
	local params = {
		["kana\1"] = {list = true, require_index = true, disallow_holes = true},
		["kana\1_accent"] = {list = true, sublist = "%s*>>%s*", require_index = true, disallow_holes = true},
		["kana\1_acc"] = {alias_of = "kana\1_accent", list = true, sublist = "%s*>>%s*", require_index = true, disallow_holes = true},
		["kana\1_dev"] = {list = true, sublist = true, type = "number", require_index = true, allow_holes = true},
		["accent_ref"] = {sublist = true},
		["acc_ref"] = {alias_of = "accent_ref", sublist=true},
		["accent_loc"] = {default = loc_link_str},
		["acc_loc"] = {alias_of = "accent_loc", default = loc_link_str},
		["accent_note"] = { },
		["acc_note"] = {alias_of = "accent_note"}
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local output_stringbuffer = {}
	
	local kanas = args.kana
	local kana_accs = args.kana_accent
	local kana_devs = args.kana_dev
	
	table.insert(output_stringbuffer, "<ul><li>")
	
	local acc_loc = args["accent_loc"]
	local acc_loc_text = m_accent.format_qualifiers(lang, {acc_loc})
	table.insert(output_stringbuffer, acc_loc_text)
	table.insert(output_stringbuffer, " ")
	
	if kanas == nil or #kanas == 0 then
		error("Parameters kana is required")
	end
	if kana_accs == nil or #kana_accs == 0 then
		error("Parameters kana_accent is required")
	end
	
	for i, _ in ipairs(kanas) do
		local kana_accs_array = kana_accs[i]
		if kana_accs_array == nil or #kana_accs_array == 0 then
			error("Parameter kana_accent " .. i .. " is required")
		end
		
		if #kana_accs_array == 1 then
			local acc = kana_accs_array[1]
			if acc == "particle" then
				kana_accs[i] = {is_particle = true}
			else
				kana_accs[i] = {accent = string_to_positive_integer(acc)}
			end
		elseif #kana_accs_array == 2 then
			kana_accs[i] = {
				accent = string_to_positive_integer(kana_accs_array[1]),
				shifted_accent = string_to_positive_integer(kana_accs_array[2])
			}
		else
			error("Invalid >>accent")
		end
	end
	
	local kana_stringbuffer = {}
	local romaji_stringbuffer = {}
	local accname_stringbuffer = {}
	local accnumber_stringbuffer = {}
	local highspan = '<span style="border-top:1px solid;position:relative;padding:1px;">'
	local downstepspan = '<span style="position:absolute;top:0;bottom:67%;right:0%;border-right:1px solid;"></span>'
	local devspan = '<span style="border:1px dotted gray; border-radius:50%;">'
	local spanend = '</span>'
	local high_mora_state = false
	
	table.insert(kana_stringbuffer, '<span lang="ja" class="Jpan">')
	for i, kana in ipairs(kanas) do
		if kana == nil then
			error("Parameters kana is required")
		end
		
		local kana_acc_info = kana_accs[i]
		
		--format accent kana morae in accordance with [[Module:ja-pron]]
		local acc_morae_kana = kana
		acc_morae_kana = gsub(acc_morae_kana, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー")
		acc_morae_kana = gsub(acc_morae_kana, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー")
		acc_morae_kana = gsub(acc_morae_kana, "%.", "")
		acc_morae_kana = gsub(acc_morae_kana, "% ", "")
		local acc_morae = {}
		for char in gmatch(acc_morae_kana, ".") do
			if #acc_morae > 0 and find(char, submoraic_kana_pattern) then
				acc_morae[#acc_morae] = acc_morae[#acc_morae] .. char
			else
				acc_morae[#acc_morae + 1] = char
			end
		end
		
		--format kana, for romaji kana_to_romaji, in accordance with [[Module:ja-pron]]
		local romaji_morae_kana = kana
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. o_kana .. "][゙゚]?)ー", "%1お")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. e_kana .. "][゙゚]?)ー", "%1え")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. u_kana .. "][゙゚]?)ー", "%1う")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. i_kana .. "][゙゚]?)ー", "%1い")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. a_kana .. "][゙゚]?)ー", "%1あ")
		romaji_morae_kana = gsub(romaji_morae_kana, "([" .. n_kana .. "][゙゚]?)ー", "%1%1")
		romaji_morae_kana = gsub(romaji_morae_kana, "%.", "")
		local romaji_morae = {}
		for char in gmatch(romaji_morae_kana, ".") do
			local is_space = char == " "
			if #romaji_morae > 0 and (is_space or find(char, submoraic_kana_pattern)) then
				romaji_morae[#romaji_morae] = romaji_morae[#romaji_morae] .. char
			elseif not is_space then
				romaji_morae[#romaji_morae + 1] = char
			end
		end
		if kana_acc_info.is_particle and #romaji_morae == 1 and usub(romaji_morae[1], 1, 1) == "は" then
			romaji_morae[1] = gsub(romaji_morae[1], "は", "わ")
		end
		
		if #acc_morae ~= #romaji_morae then
			error("Internal error")
		end
		local n_morae = #acc_morae
		
		local kana_dev = kana_devs[i]
		local kana_dev_table = {}
		if kana_dev then
			for _, dev in ipairs(kana_dev) do
				kana_dev_table[dev] = true
			end
		end
		
		local kana_acc = kana_acc_info.shifted_accent or kana_acc_info.accent
		local kana_base_acc = kana_acc_info.accent
		
		if not kana_acc_info.is_particle then
			if kana_acc > n_morae then
				error("Accent " .. kana_acc .. " is larger than morae " .. kana)
			end
			if kana_base_acc > n_morae then
				error("Accent " .. kana_base_acc .. " is larger than morae " .. kana)
			end
		end
		
		local function do_insert_mora_index(index)
			if kana_dev_table[index] then
				table.insert(kana_stringbuffer, devspan)
				table.insert(kana_stringbuffer, acc_morae[index])
				table.insert(kana_stringbuffer, spanend)
			else
				table.insert(kana_stringbuffer, acc_morae[index])
			end
			table.insert(romaji_stringbuffer, kana_to_romaji_acc(romaji_morae[index], high_mora_state, kana_dev_table[index]))
		end
		
		if kana_acc == nil then -- pitch accent: particle
			for j = 1, n_morae do
				do_insert_mora_index(j)
			end
		elseif kana_acc == 0 then -- pitch accent: Heiban
			if high_mora_state then
				table.insert(kana_stringbuffer, spanend)
				high_mora_state = false
			end
			do_insert_mora_index(1)
			table.insert(kana_stringbuffer, highspan)
			high_mora_state = true
			for j = 2, n_morae do
				do_insert_mora_index(j)
			end
		else -- pitch accent: Atamadaka, Nakadaka, or Odaka
			if kana_acc == 1 then
				if not high_mora_state then
					table.insert(kana_stringbuffer, highspan)
					high_mora_state = true
				end
				do_insert_mora_index(1)
				high_mora_state = false
			else
				if high_mora_state then
					table.insert(kana_stringbuffer, spanend)
					high_mora_state = false
				end
				do_insert_mora_index(1)
				table.insert(kana_stringbuffer, highspan)
				high_mora_state = true
			end
			for j = 2, kana_acc do
				do_insert_mora_index(j)
			end
			table.insert(romaji_stringbuffer, "ꜜ")
			table.insert(kana_stringbuffer, downstepspan)
			table.insert(kana_stringbuffer, spanend)
			high_mora_state = false
			for j = kana_acc + 1, n_morae do
				do_insert_mora_index(j)
			end
		end
		if i ~= #kanas then
			if not kana_accs[i + 1].is_particle then
				if high_mora_state then
					table.insert(kana_stringbuffer, spanend)
					table.insert(kana_stringbuffer, "・")
					table.insert(kana_stringbuffer, highspan)
				else
					table.insert(kana_stringbuffer, "・")
				end
			end
			if string.sub(kana, string.len(kana), string.len(kana)) ~= "." then
				table.insert(romaji_stringbuffer, " ")
			end
		end
		
		local link_str = nil
		if kana_base_acc == 0 then
			link_str = heiban_link_str
		elseif kana_base_acc == 1 then
			link_str = atamadaka_link_str
		elseif kana_base_acc == n_morae then
			link_str = odaka_link_str
		else
			link_str = nakadaka_link_str
		end
		
		local shift_arrow = ""
		if kana_acc and kana_base_acc and kana_acc ~= kana_base_acc then
			if kana_acc == 0 then
				shift_arrow = "ꜛ"
			else
				shift_arrow = "ꜜ"
			end
		end
		
		if not kana_acc_info.is_particle then
			table.insert(accname_stringbuffer, link_str .. shift_arrow)
			table.insert(accnumber_stringbuffer, "[" .. kana_base_acc .. shift_arrow .. "]")
		end
	end
	
	for i = #kana_stringbuffer, 1, -1 do
		if kana_stringbuffer[i] == highspan and kana_stringbuffer[i + 1] == spanend then
			table.remove(kana_stringbuffer, i + 1)
			table.remove(kana_stringbuffer, i)
		end
	end
	
	if kana_stringbuffer[#kana_stringbuffer] == highspan then
		table.remove(kana_stringbuffer, #kana_stringbuffer)
		high_mora_state = false
	end
	
	if high_mora_state then
		table.insert(kana_stringbuffer, spanend)
		high_mora_state = false
	end
	
	table.insert(kana_stringbuffer, spanend)
	
	--patch sokuon in romaji_stringbuffer, e.g. "'k" to "kk"
	local sokuon_first_char_array = {
		"b", "d", "f", "g", "h", "j", "k", "m", "p", "q", "r", "s", "t", "v", "w", "y", "z"}
	local sokuon_first_char_table = {}
	for _, str in ipairs(sokuon_first_char_array) do
		sokuon_first_char_table[str] = true
	end
	for i, romaji in ipairs(romaji_stringbuffer) do
		if romaji == "'" then
			local next = romaji_stringbuffer[i + 1]
			if next then
				if string.len(next) > 0 then
					local first_char = usub(next, 1, 1)
					if first_char == "c" then
						romaji_stringbuffer[i] = "t"
					elseif sokuon_first_char_table[first_char] then
						romaji_stringbuffer[i] = first_char
					end
				end
			end
		end
	end
	
	--patch n-apostrophe in romaji_stringbuffer, e.g. "んあ" -> "n'a"
	local nn_apostrophe_first_char_array = {
		"a", "e", "i", "o", "u", "ā", "ē", "ī", "ō", "ū",
		"á", "é", "í", "ó", "ú", "à", "è", "ì", "ò", "ù", "y" }
	local nn_apostrophe_first_char_table = {}
	for _, str in ipairs(nn_apostrophe_first_char_array) do
		nn_apostrophe_first_char_table[str] = true
	end
	for i, romaji in ipairs(romaji_stringbuffer) do
		if romaji == "n" or romaji == "ń" or romaji == "ǹ" then
			local next = romaji_stringbuffer[i + 1]
			if next then
				if string.len(next) > 0 then
					local first_char = usub(next, 1, 1)
					if nn_apostrophe_first_char_table[first_char] then
						romaji_stringbuffer[i] = romaji .. "'"
					end
				end
			end
		end
	end
	
	--build the final output_stringbuffer
	
	for _, str in ipairs(kana_stringbuffer) do
		table.insert(output_stringbuffer, str)
	end
	if #romaji_stringbuffer > 0 then
		table.insert(output_stringbuffer, ' <span class="Latn"><samp>[')
		for i, str in ipairs(romaji_stringbuffer) do
			table.insert(output_stringbuffer, str)
		end
		table.insert(output_stringbuffer, "]</samp></span>")
	end
	if #accname_stringbuffer > 0 and #accnumber_stringbuffer > 0 then
		table.insert(output_stringbuffer, " (")
		for i, str in ipairs(accname_stringbuffer) do
			table.insert(output_stringbuffer, str)
			if i ~= #accname_stringbuffer then
				table.insert(output_stringbuffer, " + ")
			end
		end
		table.insert(output_stringbuffer, " – ")
		for i, str in ipairs(accnumber_stringbuffer) do
			table.insert(output_stringbuffer, str)
			if i ~= #accnumber_stringbuffer then
				table.insert(output_stringbuffer, "-")
			end
		end
		table.insert(output_stringbuffer, ")")
	end
	
	local accent_ref = args["accent_ref"]
	if accent_ref then
		for _, accref in ipairs(accent_ref) do
			local accref_text = add_acc_refs(frame, accref)
			table.insert(output_stringbuffer, accref_text)
		end
	end
	
	local accent_note = args["accent_note"]
	if accent_note then
		table.insert(output_stringbuffer, " ")
		table.insert(output_stringbuffer, accent_note)
	end
	
	table.insert(output_stringbuffer, "</li></ul>")
	
	return table.concat(output_stringbuffer)
end

return export