Module:User:Dine2016/ja-kanjitab
Appearance
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Dine2016, for his own experimentation. Items in this module may be added and removed at Dine2016's discretion; do not rely on this module's stability.
local export = {}
local m_utilities = require("Module:utilities")
local m_ja = require("Module:ja")
local title = mw.title.getCurrentTitle()
local PAGENAME = title.text
local NAMESPACE = title.nsText
local lang = require("Module:languages").getByCode("ja")
-- TODO: centralize internal tables here
local kanji_pattern = "㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-"
local kanji_grade_links = {
"[[:w:Kyōiku_kanji|Grade: 1]]",
"[[:w:Kyōiku_kanji|Grade: 2]]",
"[[:w:Kyōiku_kanji|Grade: 3]]",
"[[:w:Kyōiku_kanji|Grade: 4]]",
"[[:w:Kyōiku_kanji|Grade: 5]]",
"[[:w:Kyōiku_kanji|Grade: 6]]",
"[[:w:Jōyō kanji|Grade: S]]", -- 7
"[[:w:Jinmeiyō kanji|Jinmeiyō]]", -- 8
"[[:w:Hyōgai kanji|Hyōgaiji]]" -- 9
}
local function link(text)
if text == title then
return '<span lang="ja" class="Jpan" style="font-weight: bold;">' .. text .. '</span>'
else
return '<span lang="ja" class="Jpan">' .. '[[' .. text .. '#Japanese|' .. text .. ']]' .. '</span>'
end
end
-- export this?
local function shin_to_kyu(shin)
local ambiguous_char = '[弁芸缶]' -- TODO: check if there is more
if mw.ustring.find(shin, ambiguous_char) then return nil end
local s_to_k = {
["亜"] = "亞", ["悪"] = "惡", ["圧"] = "壓", ["囲"] = "圍", ["医"] = "醫", ["為"] = "爲",
["壱"] = "壹", ["逸"] = "逸", ["隠"] = "隱", ["栄"] = "榮", ["営"] = "營", ["衛"] = "衞",
["駅"] = "驛", ["謁"] = "謁", ["円"] = "圓", ["塩"] = "鹽", ["縁"] = "緣", ["艶"] = "艷",
["応"] = "應", ["欧"] = "歐", ["殴"] = "毆", ["桜"] = "櫻", ["奥"] = "奧", ["横"] = "橫",
["温"] = "溫", ["穏"] = "穩", ["仮"] = "假", ["価"] = "價", ["禍"] = "禍", ["画"] = "畫",
["会"] = "會", ["悔"] = "悔", ["海"] = "海", ["絵"] = "繪", ["壊"] = "壞", ["懐"] = "懷",
["慨"] = "慨", ["概"] = "槪", ["拡"] = "擴", ["殻"] = "殼", ["覚"] = "覺", ["学"] = "學",
["岳"] = "嶽", ["楽"] = "樂", ["喝"] = "喝", ["渇"] = "渴", ["褐"] = "褐", ["缶"] = "罐",
["巻"] = "卷", ["陥"] = "陷", ["勧"] = "勸", ["寛"] = "寬", ["漢"] = "漢", ["関"] = "關",
["歓"] = "歡", ["観"] = "觀", ["気"] = "氣", ["祈"] = "祈", ["既"] = "卽", ["帰"] = "歸",
["器"] = "器", ["偽"] = "僞", ["戯"] = "戲", ["犠"] = "犧", ["旧"] = "舊", ["拠"] = "據",
["挙"] = "擧", ["虚"] = "虛", ["峡"] = "峽", ["挟"] = "挾", ["狭"] = "狹", ["郷"] = "鄕",
["響"] = "響", ["暁"] = "曉", ["勤"] = "勤", ["謹"] = "謹", ["区"] = "區", ["駆"] = "驅",
["勲"] = "勳", ["薫"] = "薰", ["径"] = "徑", ["茎"] = "莖", ["恵"] = "惠", ["掲"] = "揭",
["渓"] = "溪", ["経"] = "經", ["蛍"] = "螢", ["軽"] = "輕", ["継"] = "繼", ["鶏"] = "鷄",
["芸"] = "藝", ["撃"] = "擊", ["欠"] = "缺", ["研"] = "硏", ["県"] = "縣", ["倹"] = "儉",
["剣"] = "劍", ["険"] = "險", ["圏"] = "圈", ["検"] = "檢", ["献"] = "獻", ["権"] = "權",
["顕"] = "顯", ["験"] = "驗", ["厳"] = "嚴", ["広"] = "廣", ["効"] = "效", ["恒"] = "恆",
["黄"] = "黃", ["鉱"] = "鑛", ["号"] = "號", ["国"] = "國", ["黒"] = "黑", ["穀"] = "穀",
["砕"] = "碎", ["済"] = "濟", ["斎"] = "齋", ["剤"] = "劑", ["殺"] = "殺", ["雑"] = "雜",
["参"] = "參", ["桟"] = "棧", ["蚕"] = "蠶", ["惨"] = "慘", ["賛"] = "贊", ["残"] = "殘",
["糸"] = "絲", ["祉"] = "祉", ["視"] = "視", ["歯"] = "齒", ["児"] = "兒", ["辞"] = "辭",
["湿"] = "濕", ["実"] = "實", ["写"] = "寫", ["社"] = "社", ["者"] = "者", ["煮"] = "煮",
["釈"] = "釋", ["寿"] = "壽", ["収"] = "收", ["臭"] = "臭", ["従"] = "從", ["渋"] = "澁",
["獣"] = "獸", ["縦"] = "縱", ["祝"] = "祝", ["粛"] = "肅", ["処"] = "處", ["暑"] = "暑",
["署"] = "署", ["緒"] = "緖", ["諸"] = "諸", ["叙"] = "敍", ["将"] = "將", ["祥"] = "祥",
["称"] = "稱", ["渉"] = "涉", ["焼"] = "燒", ["証"] = "證", ["奨"] = "獎", ["条"] = "條",
["状"] = "狀", ["乗"] = "乘", ["浄"] = "淨", ["剰"] = "剩", ["畳"] = "疊", ["縄"] = "繩",
["壌"] = "壤", ["嬢"] = "孃", ["譲"] = "讓", ["醸"] = "釀", ["触"] = "觸", ["嘱"] = "囑",
["神"] = "神", ["真"] = "眞", ["寝"] = "寢", ["慎"] = "愼", ["尽"] = "盡", ["図"] = "圖",
["粋"] = "粹", ["酔"] = "醉", ["穂"] = "穗", ["随"] = "隨", ["髄"] = "髓", ["枢"] = "樞",
["数"] = "數", ["瀬"] = "瀨", ["声"] = "聲", ["斉"] = "齊", ["静"] = "靜", ["窃"] = "竊",
["摂"] = "攝", ["節"] = "節", ["専"] = "專", ["浅"] = "淺", ["戦"] = "戰", ["践"] = "踐",
["銭"] = "錢", ["潜"] = "潛", ["繊"] = "纖", ["禅"] = "禪", ["祖"] = "祖", ["双"] = "雙",
["壮"] = "壯", ["争"] = "爭", ["荘"] = "莊", ["捜"] = "搜", ["挿"] = "插", ["巣"] = "巢",
["曽"] = "曾", ["痩"] = "瘦", ["装"] = "裝", ["僧"] = "僧", ["層"] = "層", ["総"] = "總",
["騒"] = "騷", ["増"] = "增", ["憎"] = "憎", ["蔵"] = "藏", ["贈"] = "贈", ["臓"] = "臟",
["即"] = "卽", ["属"] = "屬", ["続"] = "續", ["堕"] = "墮", ["対"] = "對", ["体"] = "體",
["帯"] = "帶", ["滞"] = "滯", ["台"] = "臺", ["滝"] = "瀧", ["択"] = "擇", ["沢"] = "澤",
["担"] = "擔", ["単"] = "單", ["胆"] = "膽", ["嘆"] = "嘆", ["団"] = "團", ["断"] = "斷",
["弾"] = "彈", ["遅"] = "遲", ["痴"] = "癡", ["虫"] = "蟲", ["昼"] = "晝", ["鋳"] = "鑄",
["著"] = "著", ["庁"] = "廳", ["徴"] = "徵", ["聴"] = "聽", ["懲"] = "懲", ["勅"] = "敕",
["鎮"] = "鎭", ["塚"] = "塚", ["逓"] = "遞", ["鉄"] = "鐵", ["点"] = "點", ["転"] = "轉",
["伝"] = "傳", ["都"] = "都", ["灯"] = "燈", ["当"] = "當", ["党"] = "黨", ["盗"] = "盜",
["稲"] = "稻", ["闘"] = "鬭", ["徳"] = "德", ["独"] = "獨", ["読"] = "讀", ["突"] = "突",
["届"] = "屆", ["難"] = "難", ["弐"] = "貳", ["悩"] = "惱", ["脳"] = "腦", ["覇"] = "霸",
["拝"] = "拜", ["廃"] = "廢", ["売"] = "賣", ["梅"] = "梅", ["麦"] = "麥", ["発"] = "發",
["髪"] = "髮", ["抜"] = "拔", ["繁"] = "繁", ["晩"] = "晚", ["蛮"] = "蠻", ["卑"] = "卑",
["秘"] = "祕", ["碑"] = "碑", ["浜"] = "濱", ["賓"] = "賓", ["頻"] = "頻", ["敏"] = "敏",
["瓶"] = "甁", ["侮"] = "侮", ["福"] = "福", ["払"] = "拂", ["仏"] = "佛", ["併"] = "倂",
["並"] = "竝", ["塀"] = "塀", ["餅"] = "餠", ["辺"] = "邊", ["変"] = "變", ["勉"] = "勉",
["歩"] = "步", ["宝"] = "寶", ["豊"] = "豐", ["褒"] = "襃", ["墨"] = "墨", ["翻"] = "飜",
["毎"] = "每", ["万"] = "萬", ["満"] = "滿", ["免"] = "免", ["麺"] = "麵", ["弥"] = "彌",
["黙"] = "默", ["訳"] = "譯", ["薬"] = "藥", ["与"] = "與", ["予"] = "豫", ["余"] = "餘",
["誉"] = "譽", ["揺"] = "搖", ["様"] = "樣", ["謡"] = "謠", ["来"] = "來", ["頼"] = "賴",
["乱"] = "亂", ["覧"] = "覽", ["欄"] = "欄", ["竜"] = "龍", ["隆"] = "隆", ["虜"] = "虜",
["両"] = "兩", ["猟"] = "獵", ["緑"] = "綠", ["涙"] = "淚", ["塁"] = "壘", ["類"] = "類",
["礼"] = "禮", ["励"] = "勵", ["戻"] = "戾", ["霊"] = "靈", ["齢"] = "齡", ["暦"] = "曆",
["歴"] = "歷", ["恋"] = "戀", ["練"] = "練", ["錬"] = "鍊", ["炉"] = "爐", ["労"] = "勞",
["郎"] = "郞", ["朗"] = "朗", ["廊"] = "廊", ["楼"] = "樓", ["録"] = "錄", ["湾"] = "灣",
-- TODO: add Z-variants like 青/靑, 説/說
}
return mw.ustring.gsub(shin, '.', s_to_k)
end
local function genspec(args)
--[=[
kanji_length -- the number of kanji in the term
kanji -- e.g. { '追払', '追拂' }
kanji_types -- e.g. { 'shin', 'kyu' } (or 位: { 'shinkyu' }, 儘/侭: { 'kyu', 'extshin' })
kanji_spellings -- e.g. { '追い払う', '追い拂う' }
yomi -- a list of { original reading, actual reading, okurigana } of the kanji spans, e.g. { { 'お', 'お', 'い' }, { 'はら', 'はら', 'う' } }.
yomi_types -- a list of reading types, each formatted like { 'on', span=1 }, { 'jukujikun', span=2 }. The sum of the spans should equal kanji_length.
altforms -- e.g. { { '追いはらう', type='' }, { '追払う', '追拂う', type='irregular okurigana' }, { '追ひ払ふ', '追ひ拂ふ', type='historical kana' } }
]=]
local kanji_types = {}
local kanji_spellings = {}
local shin = args.s; if shin == '-' then shin = nil end
local kyu = args.k; if kyu == '-' then kyu = nil end
local extshin = args.es; if extshin == '-' then extshin = nil end
if kyu and not shin and not extshin then shin = PAGENAME end
if extshin and not shin and not kyu then kyu = PAGENAME end
if shin and kyu and not extshin then
kanji_types = { 'shin', 'kyu' }
kanji_spellings = { shin, kyu }
elseif kyu and extshin and not shin then
kanji_types = { 'kyu', 'extshin' }
kanji_spellings = { kyu, extshin }
elseif not kyu and not extshin then
if not shin then shin = PAGENAME end
kanji_types = { 'shin', 'kyu' }
kanji_spellings = { shin, shin_to_kyu(shin) or error('automatic jitai conversion of ' .. shin .. ' failed, please supply the kyujitai manually') }
else
error('combination of kanji types not yet supported') -- shin + kyu + extshin can be supported if needed
end
if #kanji_spellings == 2 and kanji_spellings[1] == kanji_spellings[2] then
kanji_types = { 'shinkyu' }
kanji_spellings = { kanji_spellings[1] }
end
local kanji = {}
for _, i in ipairs(kanji_spellings) do
local kj = mw.ustring.gsub(i, '([' .. kanji_pattern .. '])々', '%1%1')
kj = mw.ustring.gsub(kj, '[^' .. kanji_pattern .. ']', '')
table.insert(kanji, kj)
end
local kanji_length = mw.ustring.len(kanji[1])
if kanji_length == 0 then error('there is no kanji in the term ' .. kanji[1]) end
local yomi = {}
for i = 1, args[1].maxindex do
local ym = args[1][i] or ''
if not mw.ustring.find(ym, '%.') then ym = ym .. '.' end
if not mw.ustring.find(ym, '>') then ym = mw.ustring.gsub(ym, '^(.*)%.(.*)$', '%1>%1.%2') end
local _, _, a, b, c = mw.ustring.find(ym, '^(.*)>(.*)%.(.*)$')
table.insert(yomi, { a, b, c })
end
local yomi_types = {}
local yt = args['yomi'] or ''
local yomi_code = {
o = 'on', on = 'on',
kanon = 'kanon',
goon = 'goon',
toon = 'toon',
kan = 'kanyoon', kanyo = 'kanyoon', kanyoon = 'kanyoon',
k = 'kun', kun = 'kun',
juku = 'jukujikun', jukuji = 'jukujikun', jukujikun = 'jukujikun', -- j alone is jūbakoyomi (on+kun) for backward compatibility
irr = 'irregular', irreg = 'irregular', irregular = 'irregular',
p = 'phonetic', phonetic = 'phonetic',
nanori = 'nanori',
[''] = '', none = '',
}
if yt == 'j' then yt = 'on,kun' elseif yt == 'y' then yt='kun,on' end
if not mw.ustring.find(yt, ',') and not yomi_code[mw.ustring.gsub(yt, '^(.-)[0-9]*$', '%1')] then
yt = mw.ustring.gsub(mw.ustring.gsub(yt, '([a-z][0-9]*)', '%1,'), ',$', '') -- 'j2kk' to 'j2,k,k'
end
yt = mw.text.split(yt, ',')
for _, i in ipairs(yt) do
local _, _, a, b = mw.ustring.find(i, '^([a-z]*)([0-9]*)$')
a = yomi_code[a] or error('cannot recognize yomi type "' .. args['yomi'] .. '"')
b = tonumber(b) or 1
table.insert(yomi_types, { a, span=b })
end
-- allow using a single yomi type for the whole kanji
-- Case 1: {{ja-kanjitab|かん|れん|yomi=kan|s=関連}} -> kanon, kanon
-- Case 2: {{ja-kanjitab|やまと|yomi=j|s=大和}} -> jukujikun2
if #yomi_types == 1 and yomi_types[1].span == 1 and kanji_length > 1 then
local yomi_jukujikun_type = { ['jukujikun'] = true, ['irregular'] = true, ['phonetic'] = true }
if yomi_jukujikun_type[yomi_types[1][1]] then -- Case 2
yomi_types[1].span = kanji_length
else -- Case 1
local a = yomi_types[1][1]
local b = yomi_types[1].span
for i = 2, kanji_length do table.insert(yomi_types, { a, span=b }) end
end
end
-- check the sum of the spans
local span_sum = 0
for _, i in ipairs(yomi_types) do span_sum = span_sum + i.span end
if span_sum ~= kanji_length then error('the |yomi= parameter does not match the number of kanji: ' .. span_sum .. ' vs ' .. kanji_length) end
local altforms = {}
local af = args.alt or ''
af = mw.text.split(af, ',')
for _, i in ipairs(af) do
if i ~= '' then
if not mw.ustring.find(i, '-') then i = i .. '-' end
local _, _, a, b = mw.ustring.find(i, '^(.*)%-(.*)$')
local alt_code = {
['io'] = 'irregular okurigana',
['hk'] = 'historical kana',
['vk'] = 'variant kana',
['ok'] = 'obsolete kana', -- e.g. 用ひる
}
if alt_code[b] then b = alt_code[b] end
-- syntax borrowed from {{zh-l}}: *字体 supresses automatic conversion, and 字体/字體 provides manual conversions
if mw.ustring.find(a, '%*') then
table.insert(altforms, { mw.ustring.gsub(a, '%*', ''), type=b })
elseif mw.ustring.find(a, '/') then
a = mw.text.split(a, '/')
a['type'] = b
table.insert(altforms, a)
else
local shin = a
local kyu = shin_to_kyu(shin) or shin
if shin == kyu then
table.insert(altforms, { shin, type=b })
else
table.insert(altforms, { shin, kyu, type=b })
end
end
end
end
return { kanji_length = kanji_length, kanji = kanji, kanji_types = kanji_types, kanji_spellings = kanji_spellings, yomi = yomi, yomi_types = yomi_types, altforms = altforms }
end
local function gentable(spec, collapsed)
local table_code = '{| align=right style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"\n!\n'
-- generate the yomi tabs
for i, j in ipairs(spec.yomi_types) do
local yomi = spec.yomi[i] or { '', '', '' }
local yomi_text = (yomi[1] == yomi[2]) and (yomi[1] .. '`') or (yomi[1] .. '` > ' .. yomi[2] .. '`')
yomi_text = mw.ustring.gsub(yomi_text, '`', (yomi[3] == '') and '' or ('(' .. yomi[3] .. ')'))
local yomi_type = j[1]
local yomi_link = {
on = "[[音読み#Japanese|on’yomi]]",
kanon = "[[漢音#Japanese|kan’on]]",
goon = "[[呉音#Japanese|goon]]",
kanyoon = "[[慣用音#Japanese|kan’yōon]]",
toon = "[[唐音#Japanese|tōon]]",
kun = "[[訓読み#Japanese|kun’yomi]]",
jukujikun = "[[熟字訓#Japanese|jukujikun]]",
irregular = "irregular",
phonetic = "phonetic",
nanori = "[[名乗り#Japanese|nanori]]",
[""] = "",
}
local span = j.span
table_code = table_code .. '! style="padding: 0.5em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%;" colspan=' .. span .. '|<span class="Jpan" lang="ja">' .. yomi_text .. '</span><br>' .. yomi_link[yomi_type] .. '\n'
end
-- generate the main jitai lines
for i = 1, #spec.kanji_types do
table_code = table_code .. '|- style="line-height:1.3"\n'
local jitai_link = {
shin = '[[shinjitai]]',
kyu = '[[kyūjitai]]',
shinkyu = '[[shinjitai|shin.]] and [[kyūjitai|kyū.]]',
extshin = '[[shinjitai]] ([[wikipedia:Extended shinjitai|extended]])',
}
local kanji_type = spec.kanji_types[i]
-- cell showing the jitai type and the spelling
table_code = table_code .. '! style="padding: 0.5em;border: 1px solid #aaa;background: #E0FFFF;font-weight: normal;font-size: smaller;" |' .. jitai_link[kanji_type]
local kanji_spelling = spec.kanji_spellings[i]
if mw.ustring.len(kanji_spelling) > 6 then
table_code = table_code .. ' <span style="font-size:100%">(' .. link(kanji_spelling) .. ')</span>'
elseif mw.ustring.len(kanji_spelling) > 1 then
table_code = table_code .. ' <span style="font-size:140%">(' .. link(kanji_spelling) .. ')</span>'
end
table_code = table_code .. '\n'
-- cells showing the kanji
local kanji_string = spec.kanji[i]
for j = 1, mw.ustring.len(kanji_string) do
local kanji = mw.ustring.sub(kanji_string, j, j)
table_code = table_code .. '| style="padding: 0.5em; background-color:white;border-right: 1px solid #aaa;border-bottom: 1px solid #aaa;" | <span style="font-size:x-large">' .. link(kanji) .. '</span>'
local kanji_grade = m_ja.kanji_grade(kanji)
table_code = table_code .. '<br><small>' .. kanji_grade_links[kanji_grade] .. '</small>'
table_code = table_code .. '\n'
end
end
-- alt forms cell
if #spec.altforms > 0 then
table_code = table_code .. '|-\n| style="background-color:white;border: 1px solid #aaa; font-size:90%;" colspan=' .. spec.kanji_length + 1 .. '|\n<b>Variant forms</b><br>'
local alt1 = {}
for _, altform in ipairs(spec.altforms) do
local alt2 = {}
for _, i in ipairs(altform) do table.insert(alt2, link(i)) end
alt2 = table.concat(alt2, '/')
if altform['type'] and altform['type'] ~= '' then
alt2 = alt2 .. ' <span style="font-size:70%"><i>' .. altform['type'] .. '</i></span>'
end
table.insert(alt1, alt2)
end
table_code = table_code .. table.concat(alt1, '<br>') .. '\n'
end
table_code = table_code .. '|}\n'
return table_code
end
function export.show(frame)
local params = {
[1] = { list = true, allow_holes = true, allow_empty = true },
["s"] = {},
["k"] = {},
["es"] = {},
["alt"] = {},
["sort"] = {},
["yomi"] = {},
["pagename"] = {},
}
local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
for key, value in pairs(unrecognized_args) do
local additional
if mw.ustring.sub(key, 1, 1) == "y" then
additional = " Perhaps you meant “yomi”?"
end
error("“" .. key .. "” is not a recognized parameter." .. (additional or ""))
end
if args.pagename then
if NAMESPACE == "" then error("The pagename parameter should not be used in entries, as it is only for testing.") end
PAGENAME = args.pagename
end
return gentable(genspec(args))
end
function export:test()
--[[
mw.logObject(genspec({
[1] = { 'き', 'つ>づ.く', maxindex = 2 },
s = '気付く',
yomi = 'ok',
alt = '気づく,気ずく-ik',
}))
mw.logObject(genspec({
[1] = { 'とう', 'きょう', 'けしき', maxindex = 3 },
s = '東京景色',
yomi = 'kan,go,j2',
}))
mw.logObject(genspec({
[1] = { 'まま', maxindex = 1 },
k = '儘',
es = '侭',
yomi = 'k',
}))
mw.logObject(genspec({
[1] = { maxindex = 0 },
s = 'かんじなし',
}))
mw.logObject(genspec({
[1] = { 'やまと', maxindex = 1 },
s = '大和',
yomi = 'j',
}))
]]--
mw.log(gentable(genspec({
[1] = { 'こく', 'さい', 'おん', 'せい', 'き', 'ごう', maxindex = 6 },
s = '国際音声記号',
yomi = 'o',
})))
end
return export