Module:ja-acc-multi
Appearance
- The following documentation is located at Module:ja-acc-multi/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
See Template:ja-acc-multi for usage and examples.
local loc_link_str = "[[w:Tokyo dialect|Tokyo]]"
local heiban_link_str = "[[平板型|Heiban]]"
local atamadaka_link_str = "[[頭高型|Atamadaka]]"
local odaka_link_str = "[[尾高型|Odaka]]"
local nakadaka_link_str = "[[中高型|Nakadaka]]"
local export = {}
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local gsplit = m_str_utils.gsplit
local lang = require("Module:languages").getByCode("ja")
local m_accent = require("Module:accent qualifier")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local range = mw.loadData("Module:ja/data/range")
local a_kana = range.vowels.a
local i_kana = range.vowels.i
local u_kana = range.vowels.u
local e_kana = range.vowels.e
local o_kana = range.vowels.o
local n_kana = range.vowels.n
local submoraic_kana = range.submoraic_kana
local submoraic_kana_pattern = "[" .. submoraic_kana .. "]"
--from [[Module:ja-pron]]
local ref_template_name_data = {
["DJR"] = "R:Daijirin",
["DJR4"] = "R:Daijirin4",
["DJS"] = "R:Daijisen",
["KDJ"] = "R:Kokugo Dai Jiten",
["NHK"] = "R:NHK Hatsuon",
["NHK16"] = "R:NHK2016",
["NKD2"] = "R:Nihon Kokugo Daijiten 2 Online",
["SMK2"] = "R:Shinmeikai2",
["SMK5"] = "R:Shinmeikai5",
["SMK7"] = "R:Shinmeikai7",
["SMK8"] = "R:Shinmeikai8",
["SKK8"] = "R:Sankoku8",
["ZAJ"] = "R:Zenkoku Akusento Jiten",
["JEL"] = "R:Kenkyusha JEL Pocket",
["JAC"] = "R:ja:JAccent",
}
--from [[Module:ja-pron]]
local function add_acc_refs(frame, text)
local concat = table.concat
local insert = table.insert
local output = {}
for ref_name in gsplit(text, ",") do
mw.log(ref_name)
local ref_template_name = ref_template_name_data[ref_name]
if ref_template_name then
insert(output, frame:extensionTag("ref", "{{" .. ref_template_name .. "}}", {name = ref_name}))
elseif match(ref_name, "ref") then
insert(output, frame:preprocess(ref_name))
else
-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-pron/unrecognized ref]]
require("Module:debug").track("ja-pron/unrecognized ref")
end
end
return concat(output)
end
local function string_to_positive_integer(str)
if not string.match(str, "^%d+$") then
error("String \"" .. str .. "\" is not a positive integer")
end
return tonumber(str)
end
--from [[Module:ja-pron]]
local romaji_high_replacement_table = {
["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú",
["ā"] = "áá", ["ē"] = "éé", ["ī"] = "íí", ["ō"] = "óó", ["ū"] = "úú" }
local romaji_low_replacement_table = {
["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù",
["ā"] = "àà", ["ē"] = "èè", ["ī"] = "ìì", ["ō"] = "òò", ["ū"] = "ùù" }
local function kana_to_romaji_acc(kana, high, dev)
local romaji = kana_to_romaji(kana)
if high then
romaji = gsub(romaji, ".", romaji_high_replacement_table)
romaji = gsub(gsub(romaji, "n([bcdfghjkmnprstvw%'z ])", "ń%1"), "n$", "ń")
else
romaji = gsub(romaji, ".", romaji_low_replacement_table)
romaji = gsub(gsub(romaji, "n([bcdfghjkmnprstvw%'z ])", "ǹ%1"), "n$", "ǹ")
end
if dev then
romaji = gsub(romaji, "[aeiounáéíóúńàèìòùǹ]", "<del>%1</del>")
end
return romaji
end
function export.show(frame)
local params = {
["kana\1"] = {list = true, require_index = true, disallow_holes = true},
["kana\1_accent"] = {list = true, sublist = "%s*>>%s*", require_index = true, disallow_holes = true},
["kana\1_acc"] = {alias_of = "kana\1_accent", list = true, sublist = "%s*>>%s*", require_index = true, disallow_holes = true},
["kana\1_dev"] = {list = true, sublist = true, type = "number", require_index = true, allow_holes = true},
["accent_ref"] = {sublist = true},
["acc_ref"] = {alias_of = "accent_ref", sublist=true},
["accent_loc"] = {default = loc_link_str},
["acc_loc"] = {alias_of = "accent_loc", default = loc_link_str},
["accent_note"] = { },
["acc_note"] = {alias_of = "accent_note"}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local output_stringbuffer = {}
local kanas = args.kana
local kana_accs = args.kana_accent
local kana_devs = args.kana_dev
table.insert(output_stringbuffer, "<ul><li>")
local acc_loc = args["accent_loc"]
local acc_loc_text = m_accent.format_qualifiers(lang, {acc_loc})
table.insert(output_stringbuffer, acc_loc_text)
table.insert(output_stringbuffer, " ")
if kanas == nil or #kanas == 0 then
error("Parameters kana is required")
end
if kana_accs == nil or #kana_accs == 0 then
error("Parameters kana_accent is required")
end
for i, _ in ipairs(kanas) do
local kana_accs_array = kana_accs[i]
if kana_accs_array == nil or #kana_accs_array == 0 then
error("Parameter kana_accent " .. i .. " is required")
end
if #kana_accs_array == 1 then
local acc = kana_accs_array[1]
if acc == "particle" then
kana_accs[i] = {is_particle = true}
else
kana_accs[i] = {accent = string_to_positive_integer(acc)}
end
elseif #kana_accs_array == 2 then
kana_accs[i] = {
accent = string_to_positive_integer(kana_accs_array[1]),
shifted_accent = string_to_positive_integer(kana_accs_array[2])
}
else
error("Invalid >>accent")
end
end
local kana_stringbuffer = {}
local romaji_stringbuffer = {}
local accname_stringbuffer = {}
local accnumber_stringbuffer = {}
local highspan = '<span style="border-top:1px solid;position:relative;padding:1px;">'
local downstepspan = '<span style="position:absolute;top:0;bottom:67%;right:0%;border-right:1px solid;"></span>'
local devspan = '<span style="border:1px dotted gray; border-radius:50%;">'
local spanend = '</span>'
local high_mora_state = false
table.insert(kana_stringbuffer, '<span lang="ja" class="Jpan">')
for i, kana in ipairs(kanas) do
if kana == nil then
error("Parameters kana is required")
end
local kana_acc_info = kana_accs[i]
--format accent kana morae in accordance with [[Module:ja-pron]]
local acc_morae_kana = kana
acc_morae_kana = gsub(acc_morae_kana, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー")
acc_morae_kana = gsub(acc_morae_kana, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー")
acc_morae_kana = gsub(acc_morae_kana, "%.", "")
acc_morae_kana = gsub(acc_morae_kana, "% ", "")
local acc_morae = {}
for char in gmatch(acc_morae_kana, ".") do
if #acc_morae > 0 and find(char, submoraic_kana_pattern) then
acc_morae[#acc_morae] = acc_morae[#acc_morae] .. char
else
acc_morae[#acc_morae + 1] = char
end
end
--format kana, for romaji kana_to_romaji, in accordance with [[Module:ja-pron]]
local romaji_morae_kana = kana
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. o_kana .. "][゙゚]?)[うウ]", "%1ー")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. e_kana .. "][゙゚]?)[いイ]", "%1ー")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. o_kana .. "][゙゚]?)ー", "%1お")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. e_kana .. "][゙゚]?)ー", "%1え")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. u_kana .. "][゙゚]?)ー", "%1う")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. i_kana .. "][゙゚]?)ー", "%1い")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. a_kana .. "][゙゚]?)ー", "%1あ")
romaji_morae_kana = gsub(romaji_morae_kana, "([" .. n_kana .. "][゙゚]?)ー", "%1%1")
romaji_morae_kana = gsub(romaji_morae_kana, "%.", "")
local romaji_morae = {}
for char in gmatch(romaji_morae_kana, ".") do
local is_space = char == " "
if #romaji_morae > 0 and (is_space or find(char, submoraic_kana_pattern)) then
romaji_morae[#romaji_morae] = romaji_morae[#romaji_morae] .. char
elseif not is_space then
romaji_morae[#romaji_morae + 1] = char
end
end
if kana_acc_info.is_particle and #romaji_morae == 1 and usub(romaji_morae[1], 1, 1) == "は" then
romaji_morae[1] = gsub(romaji_morae[1], "は", "わ")
end
if #acc_morae ~= #romaji_morae then
error("Internal error")
end
local n_morae = #acc_morae
local kana_dev = kana_devs[i]
local kana_dev_table = {}
if kana_dev then
for _, dev in ipairs(kana_dev) do
kana_dev_table[dev] = true
end
end
local kana_acc = kana_acc_info.shifted_accent or kana_acc_info.accent
local kana_base_acc = kana_acc_info.accent
if not kana_acc_info.is_particle then
if kana_acc > n_morae then
error("Accent " .. kana_acc .. " is larger than morae " .. kana)
end
if kana_base_acc > n_morae then
error("Accent " .. kana_base_acc .. " is larger than morae " .. kana)
end
end
local function do_insert_mora_index(index)
if kana_dev_table[index] then
table.insert(kana_stringbuffer, devspan)
table.insert(kana_stringbuffer, acc_morae[index])
table.insert(kana_stringbuffer, spanend)
else
table.insert(kana_stringbuffer, acc_morae[index])
end
table.insert(romaji_stringbuffer, kana_to_romaji_acc(romaji_morae[index], high_mora_state, kana_dev_table[index]))
end
if kana_acc == nil then -- pitch accent: particle
for j = 1, n_morae do
do_insert_mora_index(j)
end
elseif kana_acc == 0 then -- pitch accent: Heiban
if high_mora_state then
table.insert(kana_stringbuffer, spanend)
high_mora_state = false
end
do_insert_mora_index(1)
table.insert(kana_stringbuffer, highspan)
high_mora_state = true
for j = 2, n_morae do
do_insert_mora_index(j)
end
else -- pitch accent: Atamadaka, Nakadaka, or Odaka
if kana_acc == 1 then
if not high_mora_state then
table.insert(kana_stringbuffer, highspan)
high_mora_state = true
end
do_insert_mora_index(1)
high_mora_state = false
else
if high_mora_state then
table.insert(kana_stringbuffer, spanend)
high_mora_state = false
end
do_insert_mora_index(1)
table.insert(kana_stringbuffer, highspan)
high_mora_state = true
end
for j = 2, kana_acc do
do_insert_mora_index(j)
end
table.insert(romaji_stringbuffer, "ꜜ")
table.insert(kana_stringbuffer, downstepspan)
table.insert(kana_stringbuffer, spanend)
high_mora_state = false
for j = kana_acc + 1, n_morae do
do_insert_mora_index(j)
end
end
if i ~= #kanas then
if not kana_accs[i + 1].is_particle then
if high_mora_state then
table.insert(kana_stringbuffer, spanend)
table.insert(kana_stringbuffer, "・")
table.insert(kana_stringbuffer, highspan)
else
table.insert(kana_stringbuffer, "・")
end
end
if string.sub(kana, string.len(kana), string.len(kana)) ~= "." then
table.insert(romaji_stringbuffer, " ")
end
end
local link_str = nil
if kana_base_acc == 0 then
link_str = heiban_link_str
elseif kana_base_acc == 1 then
link_str = atamadaka_link_str
elseif kana_base_acc == n_morae then
link_str = odaka_link_str
else
link_str = nakadaka_link_str
end
local shift_arrow = ""
if kana_acc and kana_base_acc and kana_acc ~= kana_base_acc then
if kana_acc == 0 then
shift_arrow = "ꜛ"
else
shift_arrow = "ꜜ"
end
end
if not kana_acc_info.is_particle then
table.insert(accname_stringbuffer, link_str .. shift_arrow)
table.insert(accnumber_stringbuffer, "[" .. kana_base_acc .. shift_arrow .. "]")
end
end
for i = #kana_stringbuffer, 1, -1 do
if kana_stringbuffer[i] == highspan and kana_stringbuffer[i + 1] == spanend then
table.remove(kana_stringbuffer, i + 1)
table.remove(kana_stringbuffer, i)
end
end
if kana_stringbuffer[#kana_stringbuffer] == highspan then
table.remove(kana_stringbuffer, #kana_stringbuffer)
high_mora_state = false
end
if high_mora_state then
table.insert(kana_stringbuffer, spanend)
high_mora_state = false
end
table.insert(kana_stringbuffer, spanend)
--patch sokuon in romaji_stringbuffer, e.g. "'k" to "kk"
local sokuon_first_char_array = {
"b", "d", "f", "g", "h", "j", "k", "m", "p", "q", "r", "s", "t", "v", "w", "y", "z"}
local sokuon_first_char_table = {}
for _, str in ipairs(sokuon_first_char_array) do
sokuon_first_char_table[str] = true
end
for i, romaji in ipairs(romaji_stringbuffer) do
if romaji == "'" then
local next = romaji_stringbuffer[i + 1]
if next then
if string.len(next) > 0 then
local first_char = usub(next, 1, 1)
if first_char == "c" then
romaji_stringbuffer[i] = "t"
elseif sokuon_first_char_table[first_char] then
romaji_stringbuffer[i] = first_char
end
end
end
end
end
--patch n-apostrophe in romaji_stringbuffer, e.g. "んあ" -> "n'a"
local nn_apostrophe_first_char_array = {
"a", "e", "i", "o", "u", "ā", "ē", "ī", "ō", "ū",
"á", "é", "í", "ó", "ú", "à", "è", "ì", "ò", "ù", "y" }
local nn_apostrophe_first_char_table = {}
for _, str in ipairs(nn_apostrophe_first_char_array) do
nn_apostrophe_first_char_table[str] = true
end
for i, romaji in ipairs(romaji_stringbuffer) do
if romaji == "n" or romaji == "ń" or romaji == "ǹ" then
local next = romaji_stringbuffer[i + 1]
if next then
if string.len(next) > 0 then
local first_char = usub(next, 1, 1)
if nn_apostrophe_first_char_table[first_char] then
romaji_stringbuffer[i] = romaji .. "'"
end
end
end
end
end
--build the final output_stringbuffer
for _, str in ipairs(kana_stringbuffer) do
table.insert(output_stringbuffer, str)
end
if #romaji_stringbuffer > 0 then
table.insert(output_stringbuffer, ' <span class="Latn"><samp>[')
for i, str in ipairs(romaji_stringbuffer) do
table.insert(output_stringbuffer, str)
end
table.insert(output_stringbuffer, "]</samp></span>")
end
if #accname_stringbuffer > 0 and #accnumber_stringbuffer > 0 then
table.insert(output_stringbuffer, " (")
for i, str in ipairs(accname_stringbuffer) do
table.insert(output_stringbuffer, str)
if i ~= #accname_stringbuffer then
table.insert(output_stringbuffer, " + ")
end
end
table.insert(output_stringbuffer, " – ")
for i, str in ipairs(accnumber_stringbuffer) do
table.insert(output_stringbuffer, str)
if i ~= #accnumber_stringbuffer then
table.insert(output_stringbuffer, "-")
end
end
table.insert(output_stringbuffer, ")")
end
local accent_ref = args["accent_ref"]
if accent_ref then
for _, accref in ipairs(accent_ref) do
local accref_text = add_acc_refs(frame, accref)
table.insert(output_stringbuffer, accref_text)
end
end
local accent_note = args["accent_note"]
if accent_note then
table.insert(output_stringbuffer, " ")
table.insert(output_stringbuffer, accent_note)
end
table.insert(output_stringbuffer, "</li></ul>")
return table.concat(output_stringbuffer)
end
return export