Module:encodings
Appearance
- The following documentation is located at Module:encodings/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
The module defines a set "encoders" which are able to encode the text into a given encoding. More encoders can be added to the module as necessary.
encode
Function encode(text, encoding)
Takes 'text' in UTF-8 encoding, encodes into 'encoding' and into %-encoding, returns the resulting string.
Explanation
- UTF-8: the wiki software and all its pages and output use UTF-8.
- %-encoding: 1) encoded text is bound to contain invalid UTF-8 text and Scribunto does not allow modules to return invalid UTF-8 text (replaces any invalid bytes in the output with � [U+FFFD REPLACEMENT CHARACTER]), 2) primary use of this function is to encode text for use in URLs (external links), for certain sites that use older encodings.
Custom encoder methods
Number difference
- Paste input and output hex-numbers to LibreOffice Calc and sort by input
- Convert base-16 [hexadecimal] to base-10 [decimal] (e.g. with onlinenumbertools.com, toolslick.com, or see below custom converter)
- Calculate difference (using references)
- Sort by difference, color the background of repeating differences, sort by input
Custom converter
<!DOCTYPE html>
<html>
<head>
<script>
function baseconvert() {
var base0 = parseInt( document.querySelector("#base0").innerText );
var base1 = parseInt( document.querySelector("#base1").innerText );
var dText0 = document.querySelector("#text0");
var dText1 = document.querySelector("#text1");
var ns = dText0.value.split("\n"); // numbers
dText1.innerHTML = "";
for (var i=0; i < ns.length; i++) {
if (i)
dText1.append( "\n" );
if ( ns[i].search(/^[0-9A-Z]+$/gi) != -1 )
dText1.append( parseInt(ns[i],base0).toString(base1) ); // XX > 10 > ZZ
}
dText0.style["height"] = "32px"; // variable .text height
dText1.style["height"] = "32px";
dText0.style["height"] = dText0.scrollHeight + "px";
dText1.style["height"] = dText1.scrollHeight + "px";
}
document.addEventListener("DOMContentLoaded", function() {
baseconvert();
document.querySelector("#text0").addEventListener("input", baseconvert);
});
</script>
<style>
body {background: lightgray; display: grid; grid-template-columns: auto auto; gap: 8px;}
#desc {grid-column: 1 / -1;}
#desc, #base0, #base1 {justify-self: center; width: auto;}
#text0, #text1 {font-family: monospace, monospace; resize: none;}
</style>
</head>
<body>
<div id="desc">Convert from left to right</div>
<div id="base0" contenteditable>16</div>
<div id="base1" contenteditable>10</div>
<textarea id="text0">BADCAFE</textarea>
<textarea id="text1" readonly></textarea>
</body>
</html>
{{R:IEC2}}
(Catalan IEC dictionary) required input in ISO 8859-1 encoding:
{{#invoke:encodings|encode|abundància|ISO 8859-1}}
- abund%E0ncia (< abundància)
See also
- {{urlencode:text|QUERY}} (abund%C3%A0ncia < abundància)
- {{PAGENAMEE}}
local export = {}
local m_str_utils = require("Module:string utilities")
local char = string.char
local concat = table.concat
local gcodepoint = m_str_utils.gcodepoint
local insert = table.insert
local u = m_str_utils.char
local encoders = {}
encoders["ISO 8859-1"] = function(text)
local ret = {}
for cp in gcodepoint(text) do
if cp >= 256 then
error("Invalid ISO 8859-1 character \"" .. u(cp) .. "\".")
end
insert(ret, char(cp))
end
return concat(ret)
end
encoders["cp1251"] = function(text) -- [[d:Q1748665|cp1251]]
local ret = {}
local range -- 0 1 2 3 4 5 6 7
local diff2 = {[25]=57, [26]=98, [27]=98, [28]=58, [29]=40, [30]=52, [31]=56, [32]=69, [33]=95, [34]=94, [35]=93, [36]=95, [38]=77, [39]=96}
local diff4 = {[5]=21, [6]=62, [7]=76, [8]=22, [9]=19, [10]=31, [11]=20, [12]=24, [13]=59, [14]=58, [15]=57, [16]=59, [18]=56, [19]=60, [68]=103, [69]=89}
local diff6 = {[11]=61, [12]=61, [16]=71, [17]=71, [18]=88, [20]=73, [21]=73, [22]=90, [24]=90, [25]=90, [26]=77, [30]=97, [40]=103, [49]=110, [50]=95, [164]=228, [270]=285, [282]=329}
for cp in gcodepoint(text) do
range = (191<cp and 1 or 0) + (1023<cp and 1 or 0) + (1039<cp and 1 or 0) + (1104<cp and 1 or 0) + (1169<cp and 1 or 0) + (8210<cp and 1 or 0) + (8482<cp and 1 or 0)
if range==2 and diff2[cp-1000] then
cp = cp-800-diff2[cp-1000]
elseif range==3 then
cp = cp-848
elseif range==4 and diff4[cp-1100] then
cp = cp - 900 - diff4[cp-1100]
elseif range==6 and diff6[cp-8200] then
cp = cp -8000 - diff6[cp-8200]
elseif range>0 then
cp = 63 -- '?'
end
insert(ret, char(cp))
end
return concat(ret)
end
function export.encode(text, encoding)
if type(text) == "table" then
local params = {
[1] = {required = true, allow_empty = true},
[2] = {required = true},
}
local args = require("Module:parameters").process(text.args, params)
text = args[1]
encoding = args[2]
end
local encoder = encoders[encoding]
if not encoder then
error("No encoder exists for the encoding \"" .. encoding .. "\".")
end
return mw.uri.encode(encoder(text))
end
return export