Module:User:Suzukaze-c/zh-l

The following documentation is located at Module:User:Suzukaze-c/zh-l/documentation. ^[edit] Categories were auto-generated by Module:documentation. ^[edit]

Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox

What I don't like about {{zh-l}} automatically picking up Mandarin is that pinyin may be interpreted as POJ or Cantonese Yale (or the other way around) or something

中國／中国 (M. Zhōngguó; M-S. Zong1gue2; DG. Җун1гуй2/Җун1гуә2; C. zung1 gwok3; C-T. zuung1 gok2; MD. Dṳ̆ng-guók; MN. ml,tw,pn:Tiong-kok; MN-T. dong1 gog4, “China”, proper noun, literally “Middle Country”)

英國／英国 (M. Yīngguó)
英國／英国
英國／英国 (C. jing1 gwok3; MN. Eng-kok)
英國／英国 (C. jing1 gwok3; MN. Eng-kok, “the UK”)
英國／英国 (“the UK”)
英國／英国 (M. Yīngguó, “the UK”)

CD (M. xīdì,xīdī,sēidì,py=CD)
CD
CD (“gloss”)
CD (C. si1 di1)
CD (C. si1 di1, “gloss”)

附著 (M. fùzhuó)
fixme: bad simplified form 附著／附著 (M. fùzhuó)
fixme: bad simplified form 擦／擦 (M. cā)
附著／附着 (M. fùzhuó)
附著／附着
臺灣話／台灣話／台湾话 / 台湾话／台湾话／台湾话 (M. Táiwānhuà)
臺灣話／台灣話／台湾话 / 台湾话／台湾话／台湾话 (M. Táiwānhuà)
潮陽區／潮阳区 (M. Cháoyáng qū//ōu; C. ciu4 joeng4 keoi1//au1; MN. xm,tw:Tiô-iûⁿ xm,qz,jj,tw:khu/zz:khi//xm,zz,tw:o͘/qz:io/xm,qz,zz:au)
潮陽區／潮阳区 (C. ciu4 joeng4 keoi1//au1)
華盛頓州西雅圖／华盛顿州西雅图
華盛頓州西雅圖／华盛顿州西雅图 (M. Huázhōu//Huàzhōu Xīyǎtú)
華盛頓州西雅圖／华盛顿州西雅图

著 (M. zhe; C. zoek6, verb particle)
著／著 (M. zhe; C. zoek6, verb particle)
附著／附著 (“fixme too”)

開／开
*原嚟如此
*原 (“maybe there shouldn't be a romanization here? or there should be an asterisk in the pinyin/gloss”)

Lua error at line 19: attempt to index field '?' (a nil value)
Lua error at line 82: attempt to index a nil value
英國／英国 (M. Rìběn; C. me1 waa2, “override param 1 with manual tr”)
Lua error at line 95: "md" pronunciation not found for 科普!
我去！／我去！ (M. wǒ qù)
～縣／～县
臺灣話／台灣話 / 台湾话／台湾话
卡／卡
開／开 ([[|MC.]] y, “maybe leave this job to t:ltc-l”)

local export = {}

local M = require("Module:zh")
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_script_utilities = require("Module:script utilities")
local m_test1 = require("Module:User:Suzukaze-c/zh-extract")

local lang = m_languages.getByCode("zh")
local varinfo = mw.loadData("Module:User:Suzukaze-c/zh/data/info").data

local match = mw.ustring.match
local gsub = mw.ustring.gsub
local split = mw.text.split

local match_Han = '[㐀-鿕𠀀-𬺡]'

local function abbr_gen(abbr)
	local page, tooltip, upper = varinfo[abbr]['rom_w'], varinfo[abbr]['var'], mw.ustring.upper(abbr)
	return '[[' .. page .. '|<abbr title="' .. tooltip .. '">' .. upper .. '.</abbr>]]'
end

function export.link(frame)
	local args = frame:getParent().args
	local varieties, word, gloss = '', '', ''

	if match(args[1], match_Han) then
		-- variety specification has been left out; $1 is definitely a word here as it is in the Han script
		varieties = 'm'
		word = args[1] or false
		gloss = args[2] or false
	elseif not args[2] then
		-- we have been given only a word, POSSIBLY in the Latin script, and nothing else
		varieties = 'm'
		word = args[1] or false
		gloss = false
	else
		varieties = args[1] or false
		word = args[2] or false
		gloss = args[3] or false
	end
	local pos = args["pos"] or false
	local lit = args["lit"] or false
	local manual_roman = args["tr"] or false
	local force_simp = args["s"] or false

	varieties = split(varieties, ",", true)
	
	-- link repression
	if match(word, "@") then
		word = gsub(word, "@", "")
		no_link = true
	end
	if match(word, "%*") then
		-- the usual linguistic *
		no_link = true
	end

	-- cleanup
	word = gsub(word, "%/", "／")

	local lookup_targets = word
	lookup_targets = gsub(lookup_targets, '[^㐀-鿕𠀀-𬺡A-Za-z0-9|%[%]／-]', '') -- filter out things like punctuation
	if match(word, "／") then
		-- allow roman to be picked up even with explicit alternate forms
		lookup_targets = split(lookup_targets, "／", true)
		lookup_targets = { [1] = lookup_targets[1] } -- save first table item into table
	elseif match(word, "%[%[") then
		-- we have been given multiple terms
		lookup_targets = gsub(lookup_targets, "|[^%]]+", "") -- remove link titles if present
		lookup_targets = gsub(lookup_targets, "[%[%]]", " ") -- replace all square brackets with spaces
		lookup_targets = gsub(lookup_targets, " +", " ") -- reduce consecutive spaces
		lookup_targets = mw.text.trim(lookup_targets) -- remove excess spaces
		lookup_targets = split(lookup_targets, " ", true) -- now we have a table of each linked item (theoretically)
	else
		lookup_targets = { [1] = lookup_targets } -- change to table
	end

	-- check if all pages exist
	local pages_exist = ''
	for i, word in ipairs(lookup_targets) do
		if not mw.title.new(word).exists then
			pages_exist = pages_exist .. 'n'
		end
	end
	pages_exist = not match(pages_exist, 'n')

	-- extract every pronunciation for every word
	local roman_for_each_word = {}
	if not manual_roman and pages_exist and varieties[1] ~= '' then
		for i, word in ipairs(lookup_targets) do
			roman_for_each_word[i] = {}
			local roman_all = m_test1.extract_roman(word, 1)
			for j, variety in ipairs(varieties) do
				roman_for_each_word[i][variety] = (roman_all[variety] and roman_all[variety] or error('"'..variety..'" pronunciation not found for [['..word..']]!'))
			end
		end
	end
	-- if true then return '\n'..require('module:debug').dump(roman_for_each_word) end

	local tr = {}
	if roman_for_each_word[1] then
		for j, variety in ipairs(varieties) do
			tr[j] = {}
			for i, word in ipairs(lookup_targets) do
				table.insert(tr[j], roman_for_each_word[i][variety])
			end
			tr[j] = table.concat(tr[j], ' ')
		end
		-- if true then return '\n'..require('module:debug').dump(tr) end

		for i, roman in ipairs(tr) do
			tr[i] = abbr_gen(varieties[i]) .. ' ' .. tr[i]
		end
		-- if true then return '\n'..require('module:debug').dump(tr) end

		tr = table.concat(tr, '; ')
	elseif manual_roman then
		manual_roman = split(manual_roman, "/", true)
		for i, set in ipairs(manual_roman) do
			local variety, roman = match(set, "(.+):(.+)")
			table.insert(tr, abbr_gen(variety) .. ' ' .. roman)
		end
		tr = table.concat(tr, '; ')
	else
		tr = false
	end

	-- finalize link
	if match(word, "%[%[") then
		-- "[[美國]][[聖地亞戈]]"→"[[美國]][[聖地亞戈]]／[[美国]][[圣地亚戈]]"
		word = word .. "／" .. M.ts(word)
	elseif match(word, "／") then
		-- "臺灣話／台灣話／台湾话"→"[[臺灣話]]／[[台灣話]]／[[台湾话]]"
		word = '[[' .. gsub(word, "／", "]]／[[") .. ']]'
	elseif M.ts_determ(word) == "trad" or force_simp then
		-- "附著"→"[[附著]]／[[附着]]"
		word = "[[" .. word .. "]]／[[" .. M.ts(word) .. "]]"
	end

	-- build the link
	local terminfo = {lang = lang, term = word, tr = tr, gloss = gloss, pos = pos, lit = lit}
	if no_link then
		word = m_links.remove_links(word) -- "easier to destroy than create"
		text = m_script_utilities.tag_text(word, lang) .. m_links.format_link_annotations(terminfo)
	else
		text = m_links.full_link(terminfo)
	end

	return text
end

return export