User:Fenakhay/cleanup.js

Note: You may have to bypass your browser’s cache to see the changes. In addition, after saving a sitewide CSS file such as MediaWiki:Common.css, it will take 5-10 minutes before the changes take effect, even if you clear your cache.
Mozilla / Firefox / Safari: hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (Command-R on a Macintosh);
Konqueror and Chrome: click Reload or press F5;
Opera: clear the cache in Tools → Preferences;
Internet Explorer: hold Ctrl while clicking Refresh, or press Ctrl-F5.
This user script lacks a documentation subpage. Please create it.
Useful links: root page • root page’s subpages • links • redirects • your own
/*
 * My set of wikitext cleanup buttons that perform tedious editing tasks.
 * The buttons are inserted above the textbox when certain conditions are
 * fulfilled using [[User:Erutuon/scripts/CleanupButtons.js]].
 */

/* jshint boss: true, esversion: 6, eqeqeq: true, varstmt: true, unused: true, undef: true */
/* globals $, CleanupButtons, mw */
// <nowiki>


if ( [ "edit", "submit" ].includes(mw.config.get("wgAction"))
		&& mw.config.get("wgPageContentModel") === "wikitext"
		// Not in edit conflict view.
		&& !document.querySelector(".mw-twocolconflict-changes-col")) {
$.when(
	$.getScript("//en.wiktionary.org/w/index.php?title=User:Erutuon/scripts/CleanupButtons.js&action=raw&ctype=text/javascript"),
	$.ready
).done(function () {
"use strict";

const namespaceNumber = mw.config.get("wgNamespaceNumber");
const entryspace = namespaceNumber === 0 || namespaceNumber === 118;
const isTemplate = namespaceNumber === 10;
const pageName = mw.config.get("wgPageName");
const categories = mw.config.get("wgCategories");

// Regular expression for Han (Chinese) characters, based on [[Module:Unicode data/scripts]].
// JSHint still doesn't understand Unicode-flagged regular expressions or
// Unicode codepoint escapes.
const HaniRegex = /(?:[\u2E80-\u303F\u337B-\u337F\u3400-\u9FFF]|[\uD840-\uD879][\uDC00-\uDFFF]|\uD87A[\uDC00-\uDFE0])/;

const POSHeaders = `Abbreviation
Acronym
Adjectival noun
Adjective
Adnominal
Adverb
Affix
Article
Circumfix
Classifier
Combining form
Conjugation
Conjunction
Contraction
Counter
Declension
Definitions
Determiner
Diacritical mark
Gerund
Hanja
Hanzi
Idiom
Infix
Initialism
Interfix
Interjection
Kanji
Letter
Ligature
Logogram
Noun
Number
Numeral
Ordinal number
Participle
Particle
Phrase
Postposition
Predicative
Prefix
Preposition
Prepositional phrase
Pronoun
Pronunciation
Proper noun
Proverb
Punctuation mark
Relative
Romanization
Root
Stem
Suffix
Syllable
Symbol
Verb
Verbal noun`;

const POSHeaderAndTemplate = new RegExp("((?:"
	+ POSHeaders.replace(/\n/g, "|")
	+ ")\s*=+)[ \t]*\n+([^\n]+)\n+(?=#)");

function normalize(entry) {
	return entry
		.replace(/\t/g, " ")
		.replace(/^([;:#*]+)(?=[^;:#*\s])/gm, "$1 ")
		.replace(/(?:\n *)*\n==/g, "\n\n==")
		.replace(/^((?!=)[^\n]*(?:\n(?!=)[^\n]*?)*?)\n+==/, "$1\n==")
		.replace(/==\n\n+(?!=)/g, "==\n")
		// Single newline between PoS header and headword template;
		// two newlines after headword template.
		.replace(POSHeaderAndTemplate, "$1\n$2\n\n")
		.replace(/^\n*----+\n*|\n+----+\s*$/, "")
		.replace(/\n+----+\s+/g, "\n\n----\n\n")
		.replace(/\n +| +\n/g, "\n")
		.replace(/(^|\n)(=+) *([^\n]+?) *\2(\n|$)/g, "$1$2$3$2$4");
}

window.normalize = normalize;

const cleanupFunctions = [
// Template for adding new rules
{
	condition: false,
	textBoxIncludes: "",
	button: { text: "" },
	minorEdit: false,
	func:
		function (content) {
			const oldContent = content;
			let count = 0;
			
			content = content.replace(
				//g,
				function(wholeMatch)
				{
					++count;
					
					return wholeMatch;
				}
			);
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	condition: entryspace,
	textBoxIncludes: /[\u0080-\u009F]/,
	button: { text: "C1 controls" },
	minorEdit: true,
	func:
		function (content) {
			const oldContent = content;
			
			content = content.replace(/\u0096(&ndash;|–)|(&ndash;|–)\u0096/g, "$1");
			content = content.replace(/\u0097(&mdash;|—)|(&mdash;|—)\u0097/g, "$1");
			content = content.replace(/\u0092/g, "’");
			content = content.replace(/\u0096/g, "–");
			content = content.replace(/\u0097/g, "—");
			content = content.replace(/[\u0080-\u009F]/g, "");
			
			return content;
		}
},
{
	condition: function(entry) {
		return entryspace && normalize(entry) !== entry;
	},
	button: { text: "normalize" },
	minorEdit: true,
	func:
		function (content) {
			const oldContent = content;
			
			content = normalize(content);
			
			return content;
		}
},
{
	textBoxIncludes: /\[https?:\/\/en\.wikipedia\.org\/wiki\//,
	button: { text: "fix Wikipedia links" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			content = content.replace(
				/\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ ]+) ([^\]]+)\]/g,
				function(wholeMatch, fullPageName, linkText)
				{
					++count;
					fullPageName = fullPageName.replace(/_/g, ' ');
					linkText = linkText.replace(/_/g, ' ');
					
					if (fullPageName === linkText)
						return `{{w|${fullPageName}}}`;
					else
						return `[[w:${fullPageName}|${linkText}]]`;
				}
			);
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	condition: pageName === "Appendix:French_doublets",
	button: { text: "templatize doublet tables" },
	minorEdit: false,
	func:
		function(content) {
			const oldContent = content;
			
			// Escape refs, which contain links that shouldn't be modified.
			const escaped = [];
			let i = 0;
			
			const escape = function (text, regexString) {
				const regex = new RegExp(regexString, "g");
				text = text.replace(
					regex,
					function(match) {
						escaped[i] = match;
						let replacement = "%%" + i + "%%";
						i += 1;
						return replacement;
					}
				);
				return text;
			};
			
			content = escape(content, "<ref[^>]*>[^<]+<\\/ref>");
			
			// Rows and cells
			content = content.replace(/\n\|-/g, "\n");
			content = content.replace(/\n! |\n\| | ?\|\| –?/g, "|");
			
			// Link templates
			content = content.replace(/(\{\{(?:l|m)[^}]+\}\}), ([\w, ]+)/g, "$1 ($2)");
			content = content.replace(
				/\{\{(?:l|m)\|[a-z-]+\|([^}]+)}}/g,
				function (wholeMatch, link) {
					if ( link.includes("|") )
						return "[[" + link + "]]";
					else
						return link;
				});
			
			// Table to template
			content = content.replace(
				/\n\{\|[^|\n]+(.+)\n(.+)/g,
				function (wholeMatch, headers, firstLine) {
					if ( firstLine === "" )
						mw.notify("Malformed table: " + wholeMatch);
					
					const cols = firstLine.replace(/[^|]+/g, "").length;
					return "\n{{/table|cols=" + cols + "\n" + headers + "\n" + firstLine;
				});
			
			content = content.replace(/\n\|\}/g, "\n}}");
			
			content = content.replace(/\(''([^']+)''\)/g, "($1)");
			
			// Unescape
			content = content.replace(
				/%%(\d+)%%/g,
				function(wholematch, number) {
					return escaped[Number(number)];
				});
			
			return content;
		}
},
{
	textBoxIncludes: /{{grc-[^}]+head=/,
	button: { text: "update grc headword" },
	minorEdit: true,
	func:
		function (content) {
			const oldContent = content;
			
			content = content.replace(
				/\{\{(grc-[^|}]+)([^}]*)\|head=([^|}]+)/g,
				"{{$1|$3$2"
			);
			
			return content;
		}
},
{
	textBoxIncludes: /'[ἀ-῾]|[ἀ-῾]'/,
	button: { text: "curly apostrophe in Ancient Greek" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			const letter = "[Α-Ωα-ω]";
			// macron, breve, rough breathing, smooth breathing,
			// diaeresis, acute, grave, circumflex
			const diacritic = "[\u0304\u0306\u0314\u0313\u0308\u0301\u0300\u0342]";
			const apostrophe = "['᾿ʼ᾽]";
			// Actually letter plus any sequence of diacritics plus apostrophe
			// followed by spacing character or end of string.
			const letterPlusApostrophe = new RegExp ("(" + letter + diacritic + "*)" + apostrophe + "(?=\\s|$)", "g");
			// Actually spacing character or pipe plus apostrophe
			// followed by letter.
			const apostrophePlusLetter = new RegExp ("(\\s|\\|)" + apostrophe + "(?=" + letter + ")", "g");
			const curlyApostrophe = "’";
			
			content = content.replace(
				/\{\{Q[^}]+\}\}/g,
				function(wholeMatch) {
					return wholeMatch.replace(
						/quote=[^|}]+/g,
						function (wholeMatch) {
							return wholeMatch
								.normalize("NFD")
								.replace(
									letterPlusApostrophe,
									function(wholeMatch, letter) {
										++count;
										return letter + curlyApostrophe;
									})
								.replace(
									apostrophePlusLetter,
									function(wholeMatch, before) {
										++count;
										return before + curlyApostrophe;
									})
								.normalize("NFC");
						});
				});
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	condition: function (content) {
		return content.includes("==Ancient Greek") &&
			(content.includes('==Noun') || content.includes('==Proper noun'));
	},
	button: { text: "update declension" },
	minorEdit: false,
	func:
		function (content) {
			const oldContent = content;
			
			content = content.replace(
				/Inflection(?===)/g,
				"Declension"
			);
			
			content = content.replace(
				/References(?===)/g,
				"Further reading"
			);
			
			let genitive = content.match(/\{\{grc-(?:noun|proper noun)\|([^|]+)/);
			if (genitive !== null)
				genitive = genitive[1];
			
			content = content.replace(
				/\{\{grc-decl[^|]+(\|[^}]+)\}\}/g,
				function (wholeMatch, templateContent) {
					const out = [];
					templateContent.replace(
						/\|(?:([^=|]+)=)?([^|]+)/g,
						function (wholeMatch, key, value) {
							if (key === "form")
								out.push(`|${key}=${value}`);
						});
					out.push(`|${pageName}${genitive ? "|" + genitive : ""}`);
					return `{{grc-decl${out.join("")}}}`;
				});
			
			return content;
		}
},
{
	condition: false,
	textBoxIncludes: /0x[0-9a-f]+/,
	button: { text: "uppercase hexadecimal" },
	minorEdit: false,
	func:
		function (content) {
			return content.replace(
				/0x([0-9a-f]+)/g,
				function(wholeMatch, digits) {
					return `0x${digits.toUpperCase()}`;
				});
		}
},
{
	condition: function (content) {
		const jaReadingsTemplate = content.match(/\{\{ja-readings[^}]+\}\}/);
		if (jaReadingsTemplate === null)
			return null;
		
		if (jaReadingsTemplate[0].match(/\[\[/))
			return true;
		else
			return false;
	},
	button: { text: "update {{ja-readings}}" },
	minorEdit: false,
	func:
		function (content) {
			const oldContent = content;
			
			content = content.replace(
				/(?:\*\s+)?\{\{ja-readings/g,
				"{{subst:#invoke:User:Suzukaze-c/02|test_13"
			);
			
			return content;
		}
},
{
	textBoxIncludes: /Ancient Greek (?:(?:proper )?noun|adjective|verb) forms/,
	button: { text: "cleanup" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			content = content.replace(
				/(=== ?((?:Proper )?[Nn]oun|Adjective|Verb) ?===\n)(.+)/g,
				function(wholeMatch, header, POS, headword) {
					++count;
					
					let head = "";
					if ( headword.normalize("NFD").match(/[αιυ]/) )
						head = "|head={{subst:chars|grc|2=" + pageName + "}}";
					
					POS = POS.toLowerCase();
					
					return header + "{{head|grc|" + POS + " form" + head + "}}";
				});
			
			content = content.replace(
				/\s*\[\[Category:Ancient Greek (?:(?:proper )?noun|adjective|verb) forms(?:\|[^\]]+)?\]\]\s*/g,
				function() {
					++count;
					
					return "\n\n";
				});
			
			content = content.replace(
				/(?:# .+)+/g,
				function(wholeMatch) {
					++count;
					
					return wholeMatch.replace(
						/(inflection of(?:\|lang=grc)?\|)([^|]+)/g,
						function(wholeMatch, before, word) {
							if ( wholeMatch.normalize("NFD").match(/[αιυ]/) ) {
								word = "{{subst:chars|grc|2=" + word + "}}";
								return before + word;
							} else
								return wholeMatch;
						});
				});
			
			content = content.replace(
				/===Alternative forms===(?:\n.+\n*)+?(?====)/g,
				function(wholeMatch) {
					wholeMatch = wholeMatch.replace(
						/\{\{l\|/g,
						function() {
							++count;
							
							return "{{alter|";
						})
					.replace(
						/(\{\{alter\|grc)((?:\|[^|}]+)+)(?=\|\||\})/g,
						function(wholeMatch, before, terms) {
							if ( terms.normalize("NFD").match(/[αιυ]/) ) {
								terms = terms.replace(/\|/g, "!");
								terms = "{{subst:chars|grc|2=" + terms + "}}";
								return before + terms;
							} else
								return wholeMatch;
						});
					
					return wholeMatch;
				});
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	textBoxIncludes: /Category:(:?Vietnamese|Chinese|vi|zh)/,
	button: { text: "templatize categories" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			const getLangCode = function(langName) {
				return { Chinese: "zh", Vietnamese: "vi" }[langName];
			};
			
			content = content.replace(
				/\[\[Category:(Vietnamese|Chinese) ([^\]|]+)(?:\|[^\]]+)?\]\]/g,
				function(wholeMatch, langName, cat) {
					++count;
					
					const langCode = getLangCode(langName);
					
					return "{{cln|" + langCode + "|" + cat + "}}";
				})
			.replace(
				/\[\[Category:(vi|zh):([^|\]]+)(?:\|[^\]]+)?\]\]/g,
				function(wholeMatch, langCode, cat) {
					++count;
					
					return "{{C|" + langCode + "|" + cat + "}}";
				});
			
			const combineCats =
			function(wholeMatch, template, langCode, cat1, cat2) {
				++count;
				
				return "{{" + template + "|" + langCode + cat1 + cat2 + "}}";
			};
			
			while ( content.match(/\{\{(C|cln)\|(vi|zh)((?:\|[^}\n]+)+)\}\}\s+\{\{\1\|\2((?:\|[^|}\n]+)+)\}\}/) ) {
				const oldContent = content;
				content = content.replace(
					/\{\{(C|cln)\|(vi|zh)((?:\|[^}\n]+)+)\}\}\s+\{\{\1\|\2((?:\|[^}\n]+)+)\}\}/g,
					combineCats
				);
				if ( oldContent === content )
					break;
			}
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	textBoxIncludes: "<nowiki>{{",
	button: { text: "add {{temp}}" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			content = content.replace(
				/<nowiki>\{\{((?:[^{}\n]+|\{\{[^}\n]+\}\})+\}\})<\/nowiki>/g,
				function(wholeMatch, match1) {
					++count;
					return "{{temp|" + match1;
				}
			);
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},
{
	textBoxIncludes: /<\/?tt>/,
	button: { text: "replace &lt;tt&gt; with &lt;code&gt;" },
	minorEdit: true,
	func:
		function (content) {
			let count = 0;
			
			content = content.replace(
				/<(\/?)tt>/g,
				function(wholematch, closing) {
					++count;
					return `<${closing}code>`;
				}
			);
			
			CleanupButtons.notifyReplacements(count);
			
			return content;
		}
},

/*	Replaces, for example, <code>1=</code> with {{para|1}},
	and <tt> with <code>.									*/
{
	textBoxIncludes: /<(code|tt)\>\|?[^=\n<]+=[^<]*<\/(code|tt)\>/,
	button: {
		text: "add {{para}}"
	},
	minorEdit: true,
	func: function (content) {
		let count1 = 0, count2 = 0, count3 = 0, count4 = 0;
		
		content = content.replace(
			/<(?:code|tt)\>\|?([^=\n<]+)=<\/(?:code|tt)\>/g,
			function(wholematch, match1) {
				++count1;
				return `{{para|${match1}}}`;
			})
		.replace(
			/<(?:code|tt)\>\|?([^=\n<]+)=([^<|]+)<\/(?:code|tt)\>/g,
			function(wholematch, match1, match2) {
				++count1;
				return `{{para|${match1}|${match2}}}`;
			})
		.replace(
			/\* *(\{\{para[^}]+\}\}) *- *(.+)/g,
			function(wholematch, match1, match2) {
				++count2;
				return `; ${match1}\n: ${match2}`;
			})
		.replace(
			/<(\/?)tt\>/g,
			function(wholematch, match1) {
				++count3;
				match1 = match1 || "";
				return `<${match1}code>`;
			})
		.replace(
			/\{\{para[^}]+\}\}/g,
			function (wholematch) {
				if ( !wholematch.includes("'''") )
					return wholematch.replace(
						/''([^']+)''/g,
						function(wholematch, match1) {
							++count4;
							return `<var>${match1}</var>`;
						});
				else
					return wholematch;
			});
		
		const totalcount = count1 + count2 + count3 + count4;
		
		CleanupButtons.notifyReplacements(totalcount);
		
		return content;
	}
		
},
{
	condition: function(entry) {
		return isTemplate && !pageName.endsWith("/documentation") && !entry.includes("{{documentation}}")
	},
	button: { text: "add {{documentation}}" },
	minorEdit: true,
	func: function(content) {
		content = content.trim() + "<noinclude>{{documentation}}</noinclude>";
		return content;
	},
}, ];

const buttons = new CleanupButtons();

for ( const buttonInfo of cleanupFunctions )
	if (CleanupButtons.evaluateConditions(buttonInfo.condition, buttonInfo.textBoxIncludes))
		buttons.addButton(buttonInfo);

}); // $.when.done
} // if

// </nowiki>