Module:chemical formula
Jump to navigation
Jump to search
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
-- [[w:Module:Chem2]]
local decode_entities = require("Module:string utilities").decode_entities
local getArgs = require('Module:Arguments').getArgs
local export = {} -- module's table
-- Elements with wiki links
local am = {
H = "Hydrogen",
He = "Helium",
Li = "Lithium",
Be = "Beryllium",
B = "Boron",
C = "Carbon",
N = "Nitrogen",
O = "Oxygen",
F = "Fluorine",
Ne = "Neon",
Na = "Sodium",
Mg = "Magnesium",
Al = "Aluminium",
Si = "Silicon",
P = "Phosphorus",
S = "Sulfur",
Cl = "Chlorine",
Ar = "Argon",
K = "Potassium",
Ca = "Calcium",
Sc = "Scandium",
Ti = "Titanium",
V = "Vanadium",
Cr = "Chromium",
Mn = "Manganese",
Fe = "Iron",
Co = "Cobalt",
Ni = "Nickel",
Cu = "Copper",
Zn = "Zinc",
Ga = "Gallium",
Ge = "Germanium",
As = "Arsenic",
Se = "Selenium",
Br = "Bromine",
Kr = "Krypton",
Rb = "Rubidium",
Sr = "Strontium",
Y = "Yttrium",
Zr = "Zirconium",
Nb = "Niobium",
Mo = "Molybdenum",
Tc = "Technetium",
Ru = "Ruthenium",
Rh = "Rhodium",
Pd = "Palladium",
Ag = "Silver",
Cd = "Cadmium",
In = "Indium",
Sn = "Tin",
Sb = "Antimony",
Te = "Tellurium",
I = "Iodine",
Xe = "Xenon",
Cs = "Caesium",
Ba = "Barium",
La = "Lanthanum",
Ce = "Cerium",
Pr = "Praseodymium",
Nd = "Neodymium",
Pm = "Promethium",
Sm = "Samarium",
Eu = "Europium",
Gd = "Gadolinium",
Tb = "Terbium",
Dy = "Dysprosium",
Ho = "Holmium",
Er = "Erbium",
Tm = "Thulium",
Yb = "Ytterbium",
Lu = "Lutetium",
Hf = "Hafnium",
Ta = "Tantalum",
W = "Tungsten",
Re = "Rhenium",
Os = "Osmium",
Ir = "Iridium",
Pt = "Platinum",
Au = "Gold",
Hg = "Mercury (element)",
Tl = "Thallium",
Pb = "Lead",
Bi = "Bismuth",
Po = "Polonium",
At = "Astatine",
Rn = "Radon",
Fr = "Francium",
Ra = "Radium",
Ac = "Actinium",
Th = "Thorium",
Pa = "Protactinium",
U = "Uranium",
Np = "Neptunium",
Pu = "Plutonium",
Am = "Americium",
Cm = "Curium",
Bk = "Berkelium",
Cf = "Californium",
Es = "Einsteinium",
Fm = "Fermium",
Md = "Mendelevium",
No = "Nobelium",
Lr = "Lawrencium",
Rf = "Rutherfordium",
Db = "Dubnium",
Sg = "Seaborgium",
Bh = "Bohrium",
Hs = "Hassium",
Mt = "Meitnerium",
Ds = "Darmstadtium",
Rg = "Roentgenium",
Cp = "Copernicium",
Nh = "Nihonium",
Fl = "Flerovium",
Mc = "Moscovium",
Lv = "Livermorium",
Ts = "Tennessine",
Og = "Oganesson",
-- Groups etc with element-like names
Bn = 'Benzyl group',
Bz = 'Benzoyl group',
D = 'Deuterium',
Et = 'Ethyl group',
Ln = 'Lanthanide',
Nu = 'Nucleophile',
Ph = 'Phenyl group',
R = 'Substituent',
T = 'Tritium',
Tf = 'Trifluoromethylsulfonyl group',
X = 'Halogen',
}
local T_ELEM = 0 -- token types
local T_NUM = 1 -- number
local T_OPEN = 2 -- open '('
local T_CLOSE = 3 -- close ')'
local T_PM_CHARGE = 4 -- + or −
local T_WATER = 6 -- .xH2O x number
local T_CRYSTAL = 9 -- .x
local T_CHARGE = 8 -- charge (x+), (x-)
local T_SUF_CHARGE = 10 -- suffix and charge e.g. 2+ from H2+
local T_SUF_CHARGE2 = 12 -- suffix and (charge) e.g. 2(2+) from He2(2+)
local T_SPECIAL = 14 -- starting with \ e.g. \d for double bond (=)
local T_SPECIAL2 = 16 -- starting with \y{x} e.g. \i{12} for isotope with mass number 12
local T_ARROW_R = 17 -- match: ->
local T_ARROW_EQ = 18 -- match: <->
local T_UNDERSCORE = 19 -- _{ ... }
local T_CARET = 20 -- ^{ ... }
local T_LINKOPEN = 21 -- Opening of link, always like "[[target|" even if the source wasn't
local T_NOCHANGE = 30 -- Anything else like ☃
function su(up, down)
if up == "" then
return ('<sub class="template-chem2-sub">%s</sub>'):format(down)
end
if down == "" then
return ('<sup class="template-chem2-sup">%s</sup>'):format(up)
end
return ('<span class="template-chem2-su"><span>%s</span><span>%s</span></span>'):format(up, down)
end
function DotIt()
return '·'
end
function item(f) -- (iterator) returns one token (type, value) at a time from the formula 'f'
local i = 1
return function ()
local t, x = nil, nil
if (i == 1) and f:match('^[0-9]', i) then
x = f:match('^[%d.]+', i); t = T_NOCHANGE; i = i + x:len(); -- matching coefficient (need a space first)
elseif i <= f:len() then
x = f:match('^%s+[%d.]+', i); t = T_NOCHANGE; -- matching coefficient (need a space first)
if not x then x = f:match('^%s[+]', i); t = T_NOCHANGE; end -- matching + (H2O + H2O)
if not x then x = f:match('^%&%#[%w%d]+%;', i); t = T_NOCHANGE; end -- &#...;
if not x then x = f:match('^%<%-%>', i); t = T_ARROW_EQ; end -- matching <->
if not x then x = f:match('^%-%>', i); t = T_ARROW_R; end -- matching ->
if not x then x = f:match('^%u%l*', i); t = T_ELEM; end -- matching symbols like Aaaaa
if not x then x = f:match('^%d+[+-]', i); t = T_SUF_CHARGE; end -- matching x+, x-
if not x then x = f:match('^%d+%(%d*[+-]%)', i); t = T_SUF_CHARGE2; end -- matching x(y+/-), x(+/-)
if not x then x = f:match('^%(%d*[+-]%)', i); t = T_CHARGE; end -- matching (x+) (xx+), (x-) (xx-)
if not x then x = f:match('^[%d.]+', i); t = T_NUM; end -- matching number
if not x and (f:match('^%[%[%[[^[]', i) or f:match('^%[[^[]', i)) then
i = i + 1; return T_OPEN, '[' end -- escape [[[X or [X (relevant to auto-linking)
if not x and f:sub(i, i + 1) == '[[' then
x = f:match('^%[%[([^]|]*)', i) -- link target
local len = x:len() + 3
x = '[[' .. x .. '|'
if f:sub(len + i, len + i) == ']' then
-- We're going to read the link twice, once as target and once as
-- chemical markup, e.g. [[CH3]] => "[[CH3|", "CH3]]"
i = i + 2
else
i = i + len
end
return T_LINKOPEN, x
end
if not x then x = f:match('^[(|{|%[]', i); t = T_OPEN; end -- matching ({[
if not x then x = f:match('^[)|}|%]]', i); t = T_CLOSE; end -- matching )}]
if not x then x = f:match('^[+-]', i); t = T_PM_CHARGE; end -- matching + or -
if not x then x = f:match('^%*[%d.]*H2O', i); t = T_WATER; end -- Crystal water
if not x then x = f:match('^%*[%d.]*', i); t = T_CRYSTAL; end -- Crystal
if not x then x = f:match('^[\\].{%d+}', i); t = T_SPECIAL2; end -- \y{x}
if not x then x = f:match('^[\\].', i); t = T_SPECIAL; end -- \x
if not x then x = f:match('^_{[^}]*}', i); t = T_UNDERSCORE; end -- _{...}
if not x then x = f:match('^^{[^}]*}', i); t = T_CARET; end -- ^{...}
if not x then x = f:match('^.', i); t = T_NOCHANGE; end --the rest - one by one
if x then i = i + x:len(); else i = i + 999; error("Invalid character in formula! : "..f) end
end
return t, x
end
end
function export._chem(args)
local f = args[1] or ''
f = decode_entities(f) -- handle entity input (like −): decode right away
f = string.gsub(f, "–", "-") -- replace – with - (hyphen not ndash)
f = string.gsub(f, "−", "-") -- replace – with - (hyphen not minus sign)
local formula = ''
local t, x
local link = args['link'] or ""
local auto = args['auto'] or ""
local seen = {}
local _debug = false
if not (link == '') then formula = formula .. "[[" .. link .. "|"; end -- wikilink start [[link|
for t, x in item(f) do
if _debug then
formula = ("%s\n* %d %s"):format(formula, t, x)
elseif t == T_ELEM then
if (auto == '') or (not am[x]) or seen[x] then formula = formula .. x
else formula = ("%s[[%s|%s]]"):format(formula, x, x); seen[x] = true
end
elseif t == T_COEFFICIENT then formula = formula .. x
elseif t == T_NUM then formula = formula .. su("", x);
elseif t == T_LINKOPEN then formula = formula .. x; -- [[Link|
elseif t == T_OPEN then formula = formula .. x; -- ([{
elseif t == T_CLOSE then formula = formula .. x; -- )]}
elseif t == T_PM_CHARGE then formula = formula .. su(x:gsub("-", "−"), "");
elseif t == T_SUF_CHARGE then
formula = formula .. su(x:match("[+-]"):gsub("-", "−"), x:match("%d+"), "");
elseif t == T_SUF_CHARGE2 then
formula = formula .. su(x:match("%(%d*[+-]"):gsub("-", "−"):sub(2, -1), x:match("%d+"))
elseif t == T_CHARGE then
formula = formula .. "<sup>"
if x:match("%d+") then formula = formula .. x:match("%d+"); end
formula = formula .. x:match("[%+-]"):gsub("-", "−") .. "</sup>";
-- Cannot concatenat a nil value from x:match("%d+");
elseif t == T_CRYSTAL then formula = formula .. DotIt() .. string.gsub( x, "*", '', 1 );
elseif t == T_SPECIAL then
parameter = x:sub(2, 2) -- x fra \x
if parameter == "s" then formula = formula .. "−" -- single bond
elseif parameter == "d" then formula = formula .. "=" -- double bond
elseif parameter == "t" then formula = formula .. "≡" -- tripple bond
elseif parameter == "q" then formula = formula .. "≣" -- Quadruple bond
elseif parameter == "h" then formula = formula .. "η" -- η, hapticity
elseif parameter == "*" then formula = formula .. "*" -- *, normal *
elseif parameter == "-" then formula = formula .. "-" -- -
elseif parameter == "\\" then formula = formula .. "\\" -- \
elseif parameter == "\'" then formula = formula .. "'" -- html-code for '
end
elseif t == T_SPECIAL2 then -- \y{x}
parameter = x:sub(2, 2) -- y fra \y{x}
if parameter == "h" then
formula = formula .. "η<sup>" .. x:match('%d+') .. "</sup>-" --[[hapticity]]
elseif parameter == "m" then
formula = formula .. "μ<sub>" .. x:match('%d+') .. "</sub>-" -- mu ([[bridging ligand]])
end
elseif t == T_WATER then
if x:match("^%*[%d.]") then
formula = formula .. DotIt() .. x:match("%f[%.%d]%d*%.?%d*%f[^%.%d%]]") .. "H<sub>2</sub>O";
else
formula = formula .. DotIt() .. "H<sub>2</sub>O";
end
elseif t == T_UNDERSCORE then formula = formula .. su("", x:gsub("-", "−"):sub(3, -2)) -- x contains _{string}
elseif t == T_CARET then formula = formula .. su(x:gsub("-", "−"):sub(3, -2), "") -- x contains ^{string}
elseif t == T_ARROW_R then formula = formula .. " → "
elseif t == T_ARROW_EQ then formula = formula .. " ⇌ "
elseif t == T_NOCHANGE then formula = formula .. x; -- The rest - everything which isn't captured by the regular expresions.
else error('unreachable - ???') end -- in fact, unreachable
end
if not (link == nil or link == '') then formula = formula .. "]]"; end -- wikilink closing ]]
formula = mw.getCurrentFrame():extensionTag("templatestyles", "", {src = "chemical formula/styles.css"}) ..
'<span class="chemf nowrap">' .. formula .. '</span>'
return formula
end
function export.chem(frame)
local args = getArgs(frame)
return export._chem(args)
end
return export