Module:User:Suzukaze-c/zh-extract
Appearance
- The following documentation is located at Module:User:Suzukaze-c/zh-extract/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
{{#invoke:User:Suzukaze-c/zh-extract|extract_roman|度}}
{{#invoke:User:Suzukaze-c/zh-extract|extract_roman|度|yes}}
{{#invoke:User:Suzukaze-c/zh-extract|extract_roman|蘋|yes}}
local export = {}
local replace = mw.ustring.gsub
local match = mw.ustring.match
local itermatch = mw.ustring.gmatch
local split = mw.text.split
-- idea: +simplified? since we're pulling stuff from the page anyway
local default_set_separator = '//'
function export.extract_roman(word, combine, set_separator)
local plaintext = false
if type(word) == 'table' then
plaintext = true
word, combine, set_separator = word.args[1], word.args[2], word.args[3]
end
mw.log('PROCESSING: [[' .. word .. ']]')
local content = mw.title.new(word):getContent() or error('the [[' .. word .. ']] entry does not exist!?')
local each = {}
local roman_final = {}
content = replace(content, "{{zh%-pron", "ⓐⓐⓐⓐⓐ")
content = replace(content, "(|cat=[a-z,:]*)\n?}}\n", "%1ⓩⓩⓩⓩⓩ") -- making assumptions about formatting
if match(content, "ⓐ") and not match(content, "ⓩ") then
error("please add the cat param to zh-pron at [[" .. word .. "]]")
end
-- Convert each {{zh-pron}} instance to a table subsumed in $each
local box_i = 1
for innards in itermatch(content, "ⓐⓐⓐⓐⓐ([^ⓩ]+)ⓩⓩⓩⓩⓩ") do
each[box_i] = {}
innards = split(innards, "\n|")
table.remove(innards, 1)
for i, item in ipairs(innards) do
local param, value = match(item, "^([^=]+)=(.*)$")
each[box_i][param] = value
end
box_i = box_i + 1
end
-- If told to combine tables, then combine each $each sub-table into a mega-table,
-- otherwise return the data of the first {{zh-pron}} instance
if combine then
-- make $roman_final[param] a table containing every possible $value
for i, etable in ipairs(each) do
for param, value in pairs(etable) do
if not roman_final[param] then roman_final[param] = {} end
if value ~= '' then table.insert(roman_final[param], value) end
end
end
-- flatten $roman_final[param] into text
for param, value in pairs(roman_final) do
roman_final[param] = table.concat(roman_final[param], (set_separator or default_set_separator))
end
else
roman_final = each[1]
end
if plaintext then
return require('module:debug').dump(roman_final)
else
return roman_final
end
end
return export