Jump to content

User:Scsbot/wikised

From Wiktionary, the free dictionary
host=
baseurl=
login="?title=Special:Userlogin"

driverscript=
reason=
minoredit=

okaytocreate=
mustcreate=

delay=			# sleep between edits (seconds)

checkscript=
editscript=
filter=
postcheckscript=

checkdiffs=no
expecteddelete=
expectedinsert=

rm="/bin/rm -f"

username=
password=

loginconfig=

masterscript=

configs="wikised.configs"

# argument parse

while expr "$1" : '^-' > /dev/null
do	case $1 in
		-\?|-h|-help|--help)
			echo $usage
			echo "options:"
			echo "	-host h	set host of wiki"
			echo "	-url u	set full base URL of wiki"
			echo "	-why m	set edit summary message"
			echo "	-m	mark edits as minor"
			echo "	-d f	set driver/data file"
			echo "	-cre	okay to create new pages"
			echo "	-new	don't overwrite old pages (must create)"
			echo "	-s s	sleep for s seconds between edits"
			echo "	-pre s	set pre-check script"
			echo "	-post s	set post-check script"
			echo "	-edit s	set main edit script"
			echo "	-filter	edit script is a pure filter"
			echo "	-chkd	check diffs"
			echo "	-ins n	expected insert"
			echo "	-del n	expected delete"
			echo "	-user u	set username"
			echo "	-pass p	set password"
			echo "	-cfg c	login configuration c"
			echo "	-f s	master setup script s"
			echo "	-?,-h	print this help"
			printedhelp=yes;;

		-host)	host=$2
			shift;;
		-u|-url|-baseurl)
			baseurl=$2
			shift;;
		-why|-reason|-message|-summary|-editsummary)
			reason=$2
			shift;;
		-m|-minor)
			minoredit=yes;;
		-d|-driver|-driverscript|-data)
			driverscript=$2
			shift;;
		-cre|-create|-okaytocreate)
			okaytocreate=yes;;
		-new|-mustcreate)
			mustcreate=yes;;
		-s|-sleep|-delay)
			delay=$2
			shift;;
		-pre|-precheck|-precheckscript|-checkscript)
			checkscript=$2
			shift;;
		-post|-postcheckscript)
			postcheckscript=$2
			shift;;
		-edit|-editscript)
			editscript=$2
			shift;;
		-filter)
			filter=yes;;
		-chkd|-checkdiff|-checkdiffs)
			checkdiffs=yes;;
		-ins|-nins|-ninsert|-expectedinsert)
			expectedinsert=$2
			shift;;
		-del|-ndel|-ndelete|-expecteddelete)
			expecteddelete=$2
			shift;;
		-user|-username)
			username=$2
			shift;;
		-pass|-password)
			password=$2
			shift;;
		-cfg|-config)
			loginconfig=$2
			shift;;
		-f|-masterscript)
			masterscript=$2
			shift;;
		*)	echo "$0: unrecognized option $1" 1>&2;;
	esac
	shift
done

if test $# -ge 1
then
	if test -n "$masterscript"
	then	echo "master script specified twice (use -f or argument)" >&2
		exit 1
	fi
	masterscript=$1
	shift
fi

# login config shortcut (but don't override cmd line)

if test -n "$loginconfig"
then
	ent=`dbgrep -i -k name $loginconfig $configs`
	if test -z "$ent"
	then
		echo "no such configuration $loginconfig" >&2
		# exit?
	else
		nl=`echo "$ent" | wc -l`
		i=1
		while test $i -le $nl
		do
			line=`echo "$ent" | line $i`
			k=`expr "$line" : '\([^ 	]*\).*'`
			v=`expr "$line" : '[^ 	]*[ 	]*\(.*\)'`
			v=`echo "$v" | sed 's/  *$//'`
			case $k in
				host)	if test -z "$host"; then host=$v; fi;;
				baseurl)
					if test -z "$baseurl"; then baseurl=$v; fi;;
				defaultlogin)
					defaultlogin=$v;;
				confirm)
					confirm=$v;;
			esac
			i=`expr $i + 1`
		done
	fi
fi

# "master script" (but don't override cmd line)

if test -n "$masterscript"
then
	if test ! -r "$masterscript"
	then
		echo "master script $masterscript does not exist or is not readable" >&2
		exit
	fi

	# would like to use "while read k v < $masterscript",
	# but that sets vars in subshell and so is useless

	nl=`wc -l < $masterscript`
	i=1
	while test $i -le $nl
	do
		line=`line $i $masterscript`
		k=`expr "$line" : '\([^ 	]*\).*'`
		v=`expr "$line" : '[^ 	]*[ 	]*\(.*\)'`
		v=`echo "$v" | sed 's/  *$//'`
		case $k in
			host)	if test -z "$host"; then host=$v; fi;;
			baseurl) if test -z "$baseurl"; then baseurl=$v; fi;;
			driverscript)	if test -z "$driverscript"; then driverscript=$v; fi;;
			reason)	if test -z "$reason"; then reason=$v; fi;;
			minoredit)
				if test -z "$minoredit"
				then
					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
					then	minoredit=no
					else	minoredit=yes
					fi
				fi;;
			okaytocreate)
				if test -z "$okaytocreate"
				then
					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
					then	okaytocreate=no
					else	okaytocreate=yes
					fi
				fi;;
			mustcreate)
				if test -z "$mustcreate"
				then
					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
					then	mustcreate=no
					else	mustcreate=yes
					fi
				fi;;
			delay)	if test -z "$delay"; then delay=$v; fi;;
			checkscript) if test -z "$checkscript"; then checkscript=$v; fi;;
			editscript) if test -z "$editscript"; then editscript=$v; fi;;
			filter)
				if test -z "$filter"
				then
					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
					then	filter=no
					else	filter=yes
					fi
				fi;;
			postcheckscript) if test -z "$postcheckscript"; then postcheckscript=$v; fi;;
			checkdiffs)
				if test -z "$checkdiffs"
				then
					 if test "$v" = "0" -o "$v" = "no" -o "$v" = "false"
					then	checkdiffs=no
					else	checkdiffs=yes
					fi
				fi;;
			expecteddelete) if test -z "$expecteddelete"; then expecteddelete=$v; fi;;
			expectedinsert)	if test -z "$expectedinsert"; then expectedinsert=$v; fi;;
			username)	if test -z "$username"; then username=$v; fi;;
			password)	if test -z "$password"; then password=$v; fi;;
		esac
		i=`expr $i + 1`
	done
fi

# Most args/flags are required.  Check them all.

errs=0

if test -z "$baseurl"
then
	if test -n "$host"
	then	baseurl="http://$host/w/index.php"
	fi
fi

if test -z "$baseurl"
then
	echo "base url not specified; use -url, or -h to specify host" >&2
	errs=`expr $errs + 1`
fi

if test -z "$driverscript"
then
	echo "driver script not specified; use -d to specify" >&2
	errs=`expr $errs + 1`
fi

if test -z "$minoredit"
then	minoredit=no
fi

if test -z "$okaytocreate"
then	okaytocreate=no
fi

if test -z "$mustcreate"
then	mustcreate=no
fi

if test -z "$delay"
then	delay=60
fi

if test -z "$editscript"
then
	echo "edit script not specified; use -edit to specify" >&2
	errs=`expr $errs + 1`
fi

if test -z "$filter"
then	filter=no
fi

if test -z "$checkdiffs"
then	checkdiffs=no
fi

if test -z "$username"
then	username=$defaultlogin
fi

if test -z "$username"
then
	echo "login user name not specified; use -user to specify" >&2
	errs=`expr $errs + 1`
fi

if test -z "$password"
then
	: attempt to look up
fi

if test $errs -gt 0
then	exit 1
fi

if test -z "$password"
then
	if test "$confirm" = yes
	then	echo -n "enter password for $username to edit live wiki: "
	else	echo -n "enter password for $username: "
	fi
	read password
elif test "$confirm" = yes
then	echo -n "Confirm editing live wiki: "
	read ans
	if expr "$ans" : '[Yy]' > /dev/null
	then	:
	else	exit
	fi
fi

logfilebase=$driverscript.`date +%Y$m$d.%H%M`
acceptlog=$logfilebase.accept
rejectlog=$logfilebase.reject

tf=/tmp/tmpfile$$
tf4=/tmp/tmpfile$$.4

echo -n "Fetching login page..."
httpget -cookies -cookiefile $tf.cook "$baseurl$login" > $tf
echo " got it."

formsetup formtag$$ $tf || exit

formselect formtag$$ userlogin || exit

formcheckfields formtag$$ wpName wpPassword || exit

echo $username > `formgettmpfile formtag$$ wpName`
echo $password > `formgettmpfile formtag$$ wpPassword`

formsubmitbutton formtag$$ wpLoginattempt || exit

echo -n "Submitting login information..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " got it."

if test $stat -ne 0; then exit 1; fi

if grep -q "Incorrect password entered" $tf4
then
	echo "bad password; login failed" >&2
	exit 1
fi


formfinish formtag$$

# ---

cat $driverscript |
while read article editscriptparms
do

sleep $delay

echo -n "Fetching \"$article\" to edit..."
httpget -cookies -cookiefile $tf.cook "$baseurl?title=$article&action=edit" > $tf
echo " got it."

if grep -q "<title>User is blocked" $tf ||
		grep -q "Your user name or IP address has been blocked" $tf
then
	echo "bot user $username blocked!" >&2
	exit
fi

formsetup formtag$$ $tf || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`

# ---

# Imperfect test for missing page, since it depends on text that can
# be and typically is customized per wiki.  I'm using two different
# strings from each of generic mediawiki, Wikipedia, and Wiktionary.
# The failure mode if it fails to notice that the page doesn't exist
# isn't too bad, because the check and/or edit scripts will usually
# fail on a blank textarea.

missing=no

if grep -q "does not yet have an entry for $article" $tf ||
	grep -q "start .* entry.* type .* box .* click .*[Ss]ave.*changes .* visible immediately" $tf ||
	grep -q "followed a link to a page that doesn't exist yet" $tf ||
	grep -q "To create the page, start typing in the box below" $tf ||
	grep -q "does not have an article with this exact name" $tf ||
	grep -q "To start a page called $article, type .* box .* and .* [Ss]ave .* changes .* visible immediately" $tf
then
	missing=yes
fi

if test $mustcreate = yes -a $missing = no
then
	echo "$article: page already exists" >&2
	# XXX duplicated
	formfinish formtag$$
	echo "$article	$editscriptparms	already exists" >> $rejectlog
	continue
elif test $okaytocreate = no -a $missing = yes
then
	echo "$article: no such page yet" >&2
	# XXX duplicated
	formfinish formtag$$
	echo "$article	$editscriptparms	no such page" >> $rejectlog
	continue
fi

if grep -i -q '^#redirect' < $tftextarea
then	
	# XXX duplicated
	formfinish formtag$$
	echo "$article is a redirect" >&2
	echo "$article	$editscriptparms	is redirect" >> $rejectlog
	continue
fi

if test -z "$checkscript" || sh $checkscript $checkscriptparms < $tftextarea
then	:
else
	# XXX duplicated
	formfinish formtag$$
	echo "$article	$editscriptparms	check script failure" >> $rejectlog
	continue
fi

# ---

# XXX questionable who/where should do this unescaping, and how aggressively
filter "sed -e 's/&lt;/</g' -e 's/&gt;/>/g' -e 's/&amp;/\&/g' -e 's/&quot;/\"/g'" $tftextarea

edit_script_output=""

if test "$filter" = "yes"
then	filter "sh $editscript $editscriptparms" $tftextarea
else	edit_script_output=`sh $editscript $tftextarea $editscriptparms`
fi

if test $? -ne 0
then
	echo $edit_script_output >&2
	# XXX duplicated
	formfinish formtag$$
	echo "$article	$editscriptparms	edit script failure" >> $rejectlog
	continue
fi

if test -n "$edit_script_output"
then
	if echo "$edit_script_output" | grep -q 'expect.*insertions'
	then
		expectedinsert=`expr "$edit_script_output" : 'expect \([0-9]*\) insertions'`
	fi
fi

# ---

if test "$checkdiffs" = "yes"
then

formsubmitbutton formtag$$ wpDiff || exit

echo -n "Fetching diffs..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

ndel=`grep -c "td class=['\\"]diff-deletedline['\\"]" $tf4`
nadd=`grep -c "td class=['\\"]diff-addedline['\\"]" $tf4`

if test $ndel -ne $expecteddelete -o $nadd -ne $expectedinsert
then
	echo "edit would cause $nadd/$ndel insertions/deletions, but expected $expectedinsert/$expecteddelete" >&2
	echo "Canceling." >&2
	echo "$article	$editscriptparms	diff check failure" >> $rejectlog
	continue
fi

formsetup formtag$$ $tf4 || exit

formselect formtag$$ editform || exit

formcheckfields formtag$$ wpTextbox1 || exit

tftextarea=`formgettmpfile formtag$$ wpTextbox1`

# XXX questionable who/where should do this unescaping, and how aggressively
# XXX also this is in two places, though I missed it at first, leading
# to a bad edit *not* caught by the checkdiffs bad-edit check :-( # :-( :-(
filter "sed -e 's/&lt;/</g' -e 's/&gt;/>/g' -e 's/&amp;/\&/g' -e 's/&quot;/\"/g'" $tftextarea

fi

# ---

tfwatch=`formgettmpfile formtag$$ wpWatchthis`
if test -n "$tfwatch"
then
	$rm $tfwatch
fi

tfminor=`formgettmpfile formtag$$ wpMinoredit`
if test -n "$tfminor"
then
	if test $minoredit = yes
	then	echo 1 > $tfminor
	else	$rm $tfminor
	fi
fi

tfsummary=`formgettmpfile formtag$$ wpSummary`
if test -n "$tfsummary"
then
	echo "edited by robot" > $tfsummary
	if test -n "$reason"
	then
		(echo 1s/$/:/; echo w) | ed - $tfsummary
		echo "$reason" >> $tfsummary
	fi
fi

formsubmitbutton formtag$$ wpSave || exit

echo -n "Submitting edits..."
formsubmit formtag$$ $baseurl $tf.cook > $tf4
stat=$?
echo " done."

if test $stat -ne 0; then exit 1; fi

formfinish formtag$$

if grep -q "<title>.*[Ee]dit [Cc]onflict" $tf4
then
	echo "edit conflict" >&2
	echo "$article	$editscriptparms	edit conflict" >> $rejectlog
	continue
fi

echo "$article	$editscriptparms" >> $acceptlog

done

$rm $tf $tf4 $tf.cook