Module:category tree/poscatboiler/data/lang-specific/jpx

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module handles generating the descriptions and categorization for Japonic category pages of the format "Japonic LABEL" where LABEL can be any text. Examples are Category:Bulgarian conjugation 2.1 verbs and Category:Russian velar-stem neuter-form nouns. This module is part of the poscatboiler system, which is a general framework for generating the descriptions and categorization of category pages.

For more information, see Module:category tree/poscatboiler/data/lang-specific/documentation.

NOTE: If you add a new language-specific module, you must add the language code to the list at the top of Module:category tree/poscatboiler/data/lang-specific in order for the module to be recognized.


local export = {}

local concat = table.concat
local insert = table.insert
local Hani_sort = require("Module:Hani-sortkey").makeSortKey
local sort = table.sort

local Hira = require("Module:scripts").getByCode("Hira")
local Jpan = require("Module:scripts").getByCode("Jpan")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_numeric = require("Module:ConvertNumeric")
local rmatch = mw.ustring.match

local yomi_data = require("Module:kanjitab/data")


function export.add_labels(labels, lang)
	labels["adnominals"] = {
		description = "{{{langname}}} adnominals, or {{ja-r|連%体%詞|れん%たい%し}}, which modify nouns, and do not conjugate or [[predicate#Verb|predicate]].",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["hiragana"] = {
		description = "{{{langname}}} terms with hiragana {{mdash}} {{ja-r|平%仮%名|ひら%が%な}} {{mdash}} forms, sorted by conventional hiragana sequence. The hiragana form is a [[phonetic]] representation of that word. " ..
		"Wiktionary represents {{{langname}}}-language segments in three ways: in normal form (with [[kanji]], if appropriate), in [[hiragana]] " ..
		"form (this differs from kanji form only when the segment contains kanji), and in [[romaji]] form.",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} katakana]]",
		toc_template = "categoryTOC-hiragana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["historical hiragana"] = {
		description = "{{{langname}}} historical [[hiragana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical katakana]].",
		toc_template = "categoryTOC-hiragana",
		parents = {
			"hiragana",
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["katakana"] = {
		description = "{{{langname}}} terms with katakana {{mdash}} {{ja-r|片%仮%名|かた%か%な}} {{mdash}} forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in [[shinjitai]].",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} hiragana]]",
		toc_template = "categoryTOC-katakana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["historical katakana"] = {
		description = "{{{langname}}} historical [[katakana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical hiragana]].",
		toc_template = "categoryTOC-katakana",
		parents = {
			"katakana",
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["terms spelled with mixed kana"] = {
		description = "{{{langname}}} terms which combine [[hiragana]] and [[katakana]] characters, potentially with [[kanji]] too.",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"hiragana",
			"katakana",
		},
	}

	labels["honorifics"] = {
		topright = "{{wikipedia|Honorific speech in Japanese}}",
		description = "{{{langname}}} [[honorific]]s.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["humble language"] = {
		description = "{{{langname}}} humble terms, or {{ja-r|謙%譲%語|けん%じょう%ご}}, which is a type of honorific speech that lowers the speaker in relation to the listener.",
		parents = "honorifics",
	}

	labels["respectful language"] = {
		description = "{{{langname}}} respectful terms, or {{ja-r|尊%敬%語|そん%けい%ご}}, which is a type of honorific speech that elevates the listener in relation to the speaker.",
		parents = "honorifics",
	}

	labels["kanji by reading"] = {
		description = "{{{langname}}} kanji categorized by reading.",
		parents = {{name = "Han characters", sort = "reading"}},
	}
	
	labels["makurakotoba"] = {
		topright = "{{wikipedia|Makurakotoba}}",
		description = "{{{langname}}} idioms used in poetry to introduce specific words.",
		parents = {"idioms"},
	}

	labels["terms by kanji readings"] = {
		description = "{{{langname}}} categories grouped with regard to the readings of the kanji with which they are spelled.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms by reading pattern"] = {
		description = "{{{langname}}} categories with terms grouped by their reading patterns.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	local function handle_onyomi_list(category, category_type, cat_yomi_type)
		local onyomi, seen = {}, {}
		for _, yomi in pairs(yomi_data) do
			if not seen[yomi] and yomi.onyomi then
				local yomi_catname = yomi[category_type]
				if yomi_catname ~= false then
					local yomi_type = yomi.type
					if yomi_type ~= "on'yomi" and yomi_type ~= cat_yomi_type then
						insert(onyomi, "[[:Category:{{{langname}}} " .. category:gsub("{{{yomi_catname}}}", yomi_catname) .. "]]")
					end
				end
			end
			seen[yomi] = true
		end
		sort(onyomi)
		return onyomi
	end
	
	local function add_yomi_category(category, category_type, parent, description)
		for _, yomi in pairs(yomi_data) do
			local yomi_catname = yomi[category_type]
			if yomi_catname ~= false then
				local yomi_type = yomi.type
				local yomi_desc = yomi.link or yomi_catname
				if yomi.description then
					yomi_desc = yomi_desc .. "; " .. yomi.description
				end
				local label = {
					description = description .. " " .. yomi_desc .. ".",
					breadcrumb = yomi_type,
					parents = {{name = parent, sort = yomi_catname}},
				}
				if yomi.onyomi then
					local onyomi = handle_onyomi_list(category, category_type, yomi_type)
					
					label.additional = "Categories of terms with " ..
						(yomi_type == "on'yomi" and "more" or "other") ..
						" specific types of on'yomi readings can be found in the following categories:\n* " .. concat(onyomi, "\n* ")
					
					if yomi_type ~= "on'yomi" then
						insert(label.parents, 1, {
							name = (category:gsub("{{{yomi_catname}}}", yomi_data.on[category_type])),
							sort = yomi_catname
						})
					end
				end
				labels[category:gsub("{{{yomi_catname}}}", yomi_catname)] = label
			end
		end
	end
	
	add_yomi_category(
		"terms read with {{{yomi_catname}}}",
		"reading_category",
		"terms by reading pattern",
		"{{{langname}}} terms exhibiting"
	)
	
	add_yomi_category(
		"terms spelled with kanji with {{{yomi_catname}}} readings",
		"kanji_category",
		"terms by kanji reading type",
		"{{{langname}}} categories with terms that are spelled with one or more kanji exhibiting"
	)
	
	labels["terms with missing yomi"] = {
		description = "{{{langname}}} terms where at least one [[Appendix:Japanese glossary#yomi|yomi]] is missing from {{tl|{{{langcode}}}-kanjitab}}.",
		hidden = true,
		can_be_empty = true,
		parents = {"entry maintenance"},
	}
	
	labels["terms by kanji reading type"] = {
		description = "{{{langname}}} categories with terms grouped with regard to the types of readings of the kanji with which " ..
		"they are spelled; broadly, those of Chinese origin, {{ja-r|音|おん}} readings, and those of non-Chinese origin, {{ja-r|訓|くん}} readings.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["terms spelled with ateji"] = {
		topright = "{{wikipedia|Ateji}}",
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#ateji|ateji]] {{mdash}} {{ja-r|当て字|あてじ}} {{mdash}} which are [[kanji]] used to represent sounds rather than meanings (though meaning may have some influence on which kanji are chosen).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms spelled with daiyōji"] = {
		description = "Japanese terms spelled using [[Appendix:Japanese glossary#daiyouji|daiyōji]], categorized using {{temp|ja-daiyouji}}.",
		parents = {"terms by etymology"},
	}

	labels["terms spelled with jukujikun"] = {
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#jukujikun|jukujikun]] {{mdash}} {{ja-r|熟%字%訓|じゅく%じ%くん}} {{mdash}} which are [[kanji]] used to represent meanings rather than sounds.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms spelled with jōyō kanji"] = {
		topright = "{{wikipedia|Jōyō kanji}}",
		description = "{{{langname}}} terms spelled with at least one kanji, where all kanji in the terms are included on the official list of {{ja-r|常%用 漢%字|じょう%よう かん%じ}}.",
		additional = "See also [[:Category:{{{langname}}} terms spelled with non-jōyō kanji]].",
		parents = {{name = "terms by orthographic property", sort = "jōyō"}},
	}

	labels["terms spelled with non-jōyō kanji"] = {
		topright = "{{wikipedia|Jōyō kanji}}",
		description = "{{{langname}}} terms spelled with at least one kanji not included in the official list of {{ja-r|常%用 漢%字|じょう%よう かん%じ}}.",
		additional = "See also [[:Category:{{{langname}}} terms spelled with jōyō kanji]].",
		parents = {{name = "terms by orthographic property", sort = "non-jōyō"}},
	}

	for _, non_joyo_type in ipairs {"hyōgaiji", "jinmeiyō"} do
		labels["terms spelled with " .. non_joyo_type .. " kanji"] = {
			description = "{{{langname}}} terms spelled with " .. non_joyo_type .. " kanji.",
			parents = {{name = "terms spelled with non-jōyō kanji", sort = non_joyo_type}},
		}
	end

	for i = 1, 6 do
		local ord = m_numeric.ones_position_ord[i]
		labels["terms spelled with " .. ord .. " grade kanji"] = {
			description = "{{{langname}}} terms spelled with " .. ord .. " grade kanji.",
			parents = {{name = "terms spelled with jōyō kanji", sort = i}},
		}
	end

	labels["terms spelled with secondary school kanji"] = {
		description = "{{{langname}}} terms spelled with secondary school kanji.",
		parents = {{name = "terms spelled with jōyō kanji", sort = "secondary school"}},
	}
		
	labels["terms with multiple readings"] = {
		description = "{{{langname}}} terms with multiple pronunciations (hence multiple [[kana]] spellings).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["kanji readings by number of morae"] = {
		description = "{{{langname}}} categories grouped with regard to the number of morae in their kanji readings.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["single-kanji terms"] = {
		description = "{{{langname}}} terms written as a single kanji.",
		parents = {"terms by orthographic property", {name = "character counts", sort = " "}},
	}
	
	labels["verbs without transitivity"] = {
		description = "{{{langname}}} verbs missing the <code>tr=</code> parameter in the headword template.",
		hidden = true,
		can_be_empty = true,
		parents = {"entry maintenance"},
	}
	
	labels["yojijukugo"] = {
		topright = "{{wikipedia|Yojijukugo}}",
		description = "{{{langname}}} four-[[kanji]] compound terms, {{ja-r|四%字 熟%語|よ%じ じゅく%ご}}, with idiomatic meanings; typically derived from Classical Chinese, Buddhist scripture or traditional Japanese proverbs.",
		additional = "Compare [[w:Chengyu|chengyu]] in Sinitic languages.",
		parents = {"idioms"},
	}
end



function export.add_handlers(handlers, lang, m_lang)
	-- FIXME: Only works for 0 through 19.
	local word_to_number = {}
	for k, v in pairs(m_numeric.ones_position) do
		word_to_number[v] = k
	end

	local kana_capture = "([-ぁ-ー𛀁𛀆]+)"

	local periods = {
		historical = true,
		ancient = true,
	}

	local function get_period_text_and_reading_type_link(period, reading_type)
		period = period ~= "" and period or nil
		if period and not periods[period] then
			return nil
		end
		local period_text = period and period .. " " or nil

		-- Allow periods (historical or ancient) by themselves; they will parse as reading types.
		if not period and periods[reading_type] then
			return nil, reading_type
		end

		local reading_type_link = "[[Appendix:Japanese glossary#" .. reading_type .. "|" .. reading_type .. "]]"
		return period_text, reading_type_link
	end
	
	local function get_sc(str)
		return mw.ustring.match(str:gsub('[%z\1-\127]', ''), '[^' .. Hira:getCharacters() .. ']') and Jpan or Hira
	end
			
	local function get_tagged_reading(reading)
		return require("Module:script utilities").tag_text(reading, lang, get_sc(reading))
	end

	local function get_reading_link(reading, historical, link)
		local display = reading:gsub('[%.%- ]', '')
		return require("Module:links").full_link({
			lang = lang,
			sc = get_sc(reading),
			term = link or display,
			alt = display,
			tr = kana_to_romaji((reading:gsub('%-', '')), lang:getCode(), nil, {hist = historical}),
		}, 'term')
	end

	local function is_on_subtype(reading_type)
		return reading_type:find(".on$")
	end


	insert(handlers, function(data)
		local count, plural = data.label:match("^terms written with (.+) Han script character(s?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} terms written with " .. count .. " Han script character" .. plural .. " (also known as [[kanji]]).",
				breadcrumb = num,
				parents = {{name = "character counts", sort = num}},
			}
		end
	end)


	insert(handlers, function(data)
		local count, plural = data.label:match("^kanji readings with (.+) mora(e?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} kanji readings containing " .. count .. " mora" .. plural .. ".",
				breadcrumb = num,
				parents = {{name = "kanji readings by number of morae", sort = num}},
			}
		end
	end)


	insert(handlers, function(data)
		local label_pref, period, reading_type, reading = rmatch(data.label, "^(kanji with ([a-z]-) ?([%a']+) reading )" .. kana_capture .. "$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				{name = "kanji by " .. (period_text or "") .. reading_type .. " reading", sort = (data.lang:makeSortKey(reading))}
			}
			if is_on_subtype(reading_type) then
				insert(parents, {name = "kanji with " .. (period_text or "") .. "on reading " .. reading, sort = reading_type})
			elseif period_text then
				insert(parents, {name = "kanji with " .. period_text .. "reading " .. reading, sort = reading_type})
			end
			if not period_text then
				insert(parents, {name = "kanji read as " .. reading, sort = reading_type})
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] with the " .. (period_text or "") .. reading_type_link .. " reading " ..
					get_reading_link(reading, period_text) .. ".",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	insert(handlers, function(data)
		local period, reading_type = rmatch(data.label, "^kanji by ([a-z]-) ?([%a']+) reading$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				is_on_subtype(reading_type) and {name = "kanji by " .. (period_text or "") .. "on reading", sort = reading_type} or
				period_text and {name = "kanji by " .. reading_type .. " reading", sort = period} or
				{name = "kanji by reading", sort = reading_type}
			}
			if period_text then
				insert(parents, {name = "kanji by " .. period_text .. "reading", sort = reading_type})
			end

			-- Compute description.
			local description = "{{{langname}}} [[kanji]] categorized by " .. (period_text or "") .. reading_type_link .. " reading."
			return {
				description = description,
				breadcrumb = (period_text or "") .. reading_type,
				parents = parents,
			}
		end
	end)


	insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(kanji read as )" .. kana_capture .. "$")
		if reading then
			local params = {
				["histconsol"] = {},
			}
			local args = require("Module:parameters").process(data.args, params)
			local parents = {{name = "kanji by reading", sort = (data.lang:makeSortKey(reading))}}
			local addl
			local period_text
			if args.histconsol then
				period_text = "historical"
				addl = ("This is a [[Wikipedia:Historical kana orthography|historical]] [[Wikipedia:Kanazukai|reading]], now " ..
				"consolidated with the [[Wikipedia:Modern kana usage|modern reading]] of " ..
				get_reading_link(args.histconsol, nil, ("Category:Japanese kanji read as %s"):format(args.histconsol)) .. ".")
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] read as " .. get_reading_link(reading, period_text) .. ".",
				additional = addl,
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}, true
		end
	end)


	insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(terms spelled with kanji read as )" .. kana_capture .. "$")
		if reading then
			-- Compute parents.
			local sort_key = (data.lang:makeSortKey(reading))
			local mora_count = m_lang.count_morae(reading)
			local mora_count_words = m_numeric.spell_number(tostring(mora_count))
			local parents = {
				{name = "terms by kanji readings", sort = sort_key},
				{name = "kanji readings with " .. mora_count_words .. " mora" .. (mora_count > 1 and "e" or ""), sort = sort_key},
				{name = "kanji read as " .. reading, sort = " "},
			}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} terms that contain kanji that exhibit a reading of " .. get_reading_link(reading) ..
				" in those terms prior to any sound changes.",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	insert(handlers, function(data)
		local kanji, reading = rmatch(data.label, "^terms spelled with (.) read as " .. kana_capture .. "$")
		if not kanji then
			return nil
		end
		local params = {
			[1] = {list = true},
		}
		local args = require("Module:parameters").process(data.args, params)
		if #args[1] == 0 then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms spelled with KANJI read as READING\", at least one reading type (e.g. {{code|kun}} or {{code|on}}) must be specified using 1=, 2=, 3=, etc.")
		end
		local yomi_types, parents = {}, {}
		for _, yomi, category in ipairs(args[1]) do
			local yomi_data = yomi_data[yomi]
			if not yomi_data then
				error("The yomi type \"" .. yomi .. "\" is not recognized.")
			end
			category = yomi_data.kanji_category
			if not category then
				error("The yomi type \"" .. yomi .. "\" is not valid for this type of category.")
			end
			insert(yomi_types, "<i>" .. yomi_data.type .. "</i> reading")
			insert(parents, {
				name = "terms spelled with kanji with " .. category .. " readings",
				sort = (data.lang:makeSortKey(reading))
			})
		end
		
		yomi_types = require("Module:table").serialCommaJoin(yomi_types, {conj = "or"})
		
		insert(parents, 1, {name = "terms spelled with " .. kanji, sort = (data.lang:makeSortKey(reading))})
		insert(parents, 2, {name = "terms spelled with kanji read as " .. reading, sort = Hani_sort(kanji)})
		
		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_reading = get_tagged_reading(reading)
		return {
			description = "{{{langname}}} terms spelled with {{l|{{{langcode}}}|" .. kanji .. "}} with its " ..
				yomi_types .. " of " .. get_reading_link(reading) .. ".",
			displaytitle = "{{{langname}}} terms spelled with " .. tagged_kanji .. " read as " .. tagged_reading,
			breadcrumb = "read as " .. tagged_reading,
			parents = parents,
		}, true
	end)


	insert(handlers, function(data)
		local kanji, daiyoji = rmatch(data.label, "^terms with (.) replaced by daiyōji (.)$")
		if not kanji then
			return nil
		end
		local params = {
			["sort"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		if not args.sort then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms with KANJI replaced by daiyōji DAIYOJI\", the sort key must be specified using sort=")
		end

		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_daiyoji = get_tagged_reading(daiyoji)
		return {
			description = "{{{langname}}} terms with {{l|{{{langcode}}}|" .. kanji .. "}} replaced by [[Appendix:Japanese glossary#daiyouji|daiyōji]] {{l|{{{langcode}}}|" .. daiyoji .. "}}.",
			displaytitle = "{{{langname}}} terms with " .. tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			breadcrumb = tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			parents = {{name = "terms spelled with daiyōji", sort = args.sort}},
		}, true
	end)
end


return export