User:Scsbot/wikised
Appearance
host= baseurl= login="?title=Special:Userlogin" driverscript= reason= minoredit= okaytocreate= mustcreate= delay= # sleep between edits (seconds) checkscript= editscript= filter= postcheckscript= checkdiffs=no expecteddelete= expectedinsert= rm="/bin/rm -f" username= password= loginconfig= masterscript= configs="wikised.configs" # argument parse while expr "$1" : '^-' > /dev/null do case $1 in -\?|-h|-help|--help) echo $usage echo "options:" echo " -host h set host of wiki" echo " -url u set full base URL of wiki" echo " -why m set edit summary message" echo " -m mark edits as minor" echo " -d f set driver/data file" echo " -cre okay to create new pages" echo " -new don't overwrite old pages (must create)" echo " -s s sleep for s seconds between edits" echo " -pre s set pre-check script" echo " -post s set post-check script" echo " -edit s set main edit script" echo " -filter edit script is a pure filter" echo " -chkd check diffs" echo " -ins n expected insert" echo " -del n expected delete" echo " -user u set username" echo " -pass p set password" echo " -cfg c login configuration c" echo " -f s master setup script s" echo " -?,-h print this help" printedhelp=yes;; -host) host=$2 shift;; -u|-url|-baseurl) baseurl=$2 shift;; -why|-reason|-message|-summary|-editsummary) reason=$2 shift;; -m|-minor) minoredit=yes;; -d|-driver|-driverscript|-data) driverscript=$2 shift;; -cre|-create|-okaytocreate) okaytocreate=yes;; -new|-mustcreate) mustcreate=yes;; -s|-sleep|-delay) delay=$2 shift;; -pre|-precheck|-precheckscript|-checkscript) checkscript=$2 shift;; -post|-postcheckscript) postcheckscript=$2 shift;; -edit|-editscript) editscript=$2 shift;; -filter) filter=yes;; -chkd|-checkdiff|-checkdiffs) checkdiffs=yes;; -ins|-nins|-ninsert|-expectedinsert) expectedinsert=$2 shift;; -del|-ndel|-ndelete|-expecteddelete) expecteddelete=$2 shift;; -user|-username) username=$2 shift;; -pass|-password) password=$2 shift;; -cfg|-config) loginconfig=$2 shift;; -f|-masterscript) masterscript=$2 shift;; *) echo "$0: unrecognized option $1" 1>&2;; esac shift done if test $# -ge 1 then if test -n "$masterscript" then echo "master script specified twice (use -f or argument)" >&2 exit 1 fi masterscript=$1 shift fi # login config shortcut (but don't override cmd line) if test -n "$loginconfig" then ent=`dbgrep -i -k name $loginconfig $configs` if test -z "$ent" then echo "no such configuration $loginconfig" >&2 # exit? else nl=`echo "$ent" | wc -l` i=1 while test $i -le $nl do line=`echo "$ent" | line $i` k=`expr "$line" : '\([^ ]*\).*'` v=`expr "$line" : '[^ ]*[ ]*\(.*\)'` v=`echo "$v" | sed 's/ *$//'` case $k in host) if test -z "$host"; then host=$v; fi;; baseurl) if test -z "$baseurl"; then baseurl=$v; fi;; defaultlogin) defaultlogin=$v;; confirm) confirm=$v;; esac i=`expr $i + 1` done fi fi # "master script" (but don't override cmd line) if test -n "$masterscript" then if test ! -r "$masterscript" then echo "master script $masterscript does not exist or is not readable" >&2 exit fi # would like to use "while read k v < $masterscript", # but that sets vars in subshell and so is useless nl=`wc -l < $masterscript` i=1 while test $i -le $nl do line=`line $i $masterscript` k=`expr "$line" : '\([^ ]*\).*'` v=`expr "$line" : '[^ ]*[ ]*\(.*\)'` v=`echo "$v" | sed 's/ *$//'` case $k in host) if test -z "$host"; then host=$v; fi;; baseurl) if test -z "$baseurl"; then baseurl=$v; fi;; driverscript) if test -z "$driverscript"; then driverscript=$v; fi;; reason) if test -z "$reason"; then reason=$v; fi;; minoredit) if test -z "$minoredit" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then minoredit=no else minoredit=yes fi fi;; okaytocreate) if test -z "$okaytocreate" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then okaytocreate=no else okaytocreate=yes fi fi;; mustcreate) if test -z "$mustcreate" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then mustcreate=no else mustcreate=yes fi fi;; delay) if test -z "$delay"; then delay=$v; fi;; checkscript) if test -z "$checkscript"; then checkscript=$v; fi;; editscript) if test -z "$editscript"; then editscript=$v; fi;; filter) if test -z "$filter" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then filter=no else filter=yes fi fi;; postcheckscript) if test -z "$postcheckscript"; then postcheckscript=$v; fi;; checkdiffs) if test -z "$checkdiffs" then if test "$v" = "0" -o "$v" = "no" -o "$v" = "false" then checkdiffs=no else checkdiffs=yes fi fi;; expecteddelete) if test -z "$expecteddelete"; then expecteddelete=$v; fi;; expectedinsert) if test -z "$expectedinsert"; then expectedinsert=$v; fi;; username) if test -z "$username"; then username=$v; fi;; password) if test -z "$password"; then password=$v; fi;; esac i=`expr $i + 1` done fi # Most args/flags are required. Check them all. errs=0 if test -z "$baseurl" then if test -n "$host" then baseurl="http://$host/w/index.php" fi fi if test -z "$baseurl" then echo "base url not specified; use -url, or -h to specify host" >&2 errs=`expr $errs + 1` fi if test -z "$driverscript" then echo "driver script not specified; use -d to specify" >&2 errs=`expr $errs + 1` fi if test -z "$minoredit" then minoredit=no fi if test -z "$okaytocreate" then okaytocreate=no fi if test -z "$mustcreate" then mustcreate=no fi if test -z "$delay" then delay=60 fi if test -z "$editscript" then echo "edit script not specified; use -edit to specify" >&2 errs=`expr $errs + 1` fi if test -z "$filter" then filter=no fi if test -z "$checkdiffs" then checkdiffs=no fi if test -z "$username" then username=$defaultlogin fi if test -z "$username" then echo "login user name not specified; use -user to specify" >&2 errs=`expr $errs + 1` fi if test -z "$password" then : attempt to look up fi if test $errs -gt 0 then exit 1 fi if test -z "$password" then if test "$confirm" = yes then echo -n "enter password for $username to edit live wiki: " else echo -n "enter password for $username: " fi read password elif test "$confirm" = yes then echo -n "Confirm editing live wiki: " read ans if expr "$ans" : '[Yy]' > /dev/null then : else exit fi fi logfilebase=$driverscript.`date +%Y$m$d.%H%M` acceptlog=$logfilebase.accept rejectlog=$logfilebase.reject tf=/tmp/tmpfile$$ tf4=/tmp/tmpfile$$.4 echo -n "Fetching login page..." httpget -cookies -cookiefile $tf.cook "$baseurl$login" > $tf echo " got it." formsetup formtag$$ $tf || exit formselect formtag$$ userlogin || exit formcheckfields formtag$$ wpName wpPassword || exit echo $username > `formgettmpfile formtag$$ wpName` echo $password > `formgettmpfile formtag$$ wpPassword` formsubmitbutton formtag$$ wpLoginattempt || exit echo -n "Submitting login information..." formsubmit formtag$$ $baseurl $tf.cook > $tf4 stat=$? echo " got it." if test $stat -ne 0; then exit 1; fi if grep -q "Incorrect password entered" $tf4 then echo "bad password; login failed" >&2 exit 1 fi formfinish formtag$$ # --- cat $driverscript | while read article editscriptparms do sleep $delay echo -n "Fetching \"$article\" to edit..." httpget -cookies -cookiefile $tf.cook "$baseurl?title=$article&action=edit" > $tf echo " got it." if grep -q "<title>User is blocked" $tf || grep -q "Your user name or IP address has been blocked" $tf then echo "bot user $username blocked!" >&2 exit fi formsetup formtag$$ $tf || exit formselect formtag$$ editform || exit formcheckfields formtag$$ wpTextbox1 || exit tftextarea=`formgettmpfile formtag$$ wpTextbox1` # --- # Imperfect test for missing page, since it depends on text that can # be and typically is customized per wiki. I'm using two different # strings from each of generic mediawiki, Wikipedia, and Wiktionary. # The failure mode if it fails to notice that the page doesn't exist # isn't too bad, because the check and/or edit scripts will usually # fail on a blank textarea. missing=no if grep -q "does not yet have an entry for $article" $tf || grep -q "start .* entry.* type .* box .* click .*[Ss]ave.*changes .* visible immediately" $tf || grep -q "followed a link to a page that doesn't exist yet" $tf || grep -q "To create the page, start typing in the box below" $tf || grep -q "does not have an article with this exact name" $tf || grep -q "To start a page called $article, type .* box .* and .* [Ss]ave .* changes .* visible immediately" $tf then missing=yes fi if test $mustcreate = yes -a $missing = no then echo "$article: page already exists" >&2 # XXX duplicated formfinish formtag$$ echo "$article $editscriptparms already exists" >> $rejectlog continue elif test $okaytocreate = no -a $missing = yes then echo "$article: no such page yet" >&2 # XXX duplicated formfinish formtag$$ echo "$article $editscriptparms no such page" >> $rejectlog continue fi if grep -i -q '^#redirect' < $tftextarea then # XXX duplicated formfinish formtag$$ echo "$article is a redirect" >&2 echo "$article $editscriptparms is redirect" >> $rejectlog continue fi if test -z "$checkscript" || sh $checkscript $checkscriptparms < $tftextarea then : else # XXX duplicated formfinish formtag$$ echo "$article $editscriptparms check script failure" >> $rejectlog continue fi # --- # XXX questionable who/where should do this unescaping, and how aggressively filter "sed -e 's/</</g' -e 's/>/>/g' -e 's/&/\&/g' -e 's/"/\"/g'" $tftextarea edit_script_output="" if test "$filter" = "yes" then filter "sh $editscript $editscriptparms" $tftextarea else edit_script_output=`sh $editscript $tftextarea $editscriptparms` fi if test $? -ne 0 then echo $edit_script_output >&2 # XXX duplicated formfinish formtag$$ echo "$article $editscriptparms edit script failure" >> $rejectlog continue fi if test -n "$edit_script_output" then if echo "$edit_script_output" | grep -q 'expect.*insertions' then expectedinsert=`expr "$edit_script_output" : 'expect \([0-9]*\) insertions'` fi fi # --- if test "$checkdiffs" = "yes" then formsubmitbutton formtag$$ wpDiff || exit echo -n "Fetching diffs..." formsubmit formtag$$ $baseurl $tf.cook > $tf4 stat=$? echo " done." if test $stat -ne 0; then exit 1; fi formfinish formtag$$ ndel=`grep -c "td class=['\\"]diff-deletedline['\\"]" $tf4` nadd=`grep -c "td class=['\\"]diff-addedline['\\"]" $tf4` if test $ndel -ne $expecteddelete -o $nadd -ne $expectedinsert then echo "edit would cause $nadd/$ndel insertions/deletions, but expected $expectedinsert/$expecteddelete" >&2 echo "Canceling." >&2 echo "$article $editscriptparms diff check failure" >> $rejectlog continue fi formsetup formtag$$ $tf4 || exit formselect formtag$$ editform || exit formcheckfields formtag$$ wpTextbox1 || exit tftextarea=`formgettmpfile formtag$$ wpTextbox1` # XXX questionable who/where should do this unescaping, and how aggressively # XXX also this is in two places, though I missed it at first, leading # to a bad edit *not* caught by the checkdiffs bad-edit check :-( # :-( :-( filter "sed -e 's/</</g' -e 's/>/>/g' -e 's/&/\&/g' -e 's/"/\"/g'" $tftextarea fi # --- tfwatch=`formgettmpfile formtag$$ wpWatchthis` if test -n "$tfwatch" then $rm $tfwatch fi tfminor=`formgettmpfile formtag$$ wpMinoredit` if test -n "$tfminor" then if test $minoredit = yes then echo 1 > $tfminor else $rm $tfminor fi fi tfsummary=`formgettmpfile formtag$$ wpSummary` if test -n "$tfsummary" then echo "edited by robot" > $tfsummary if test -n "$reason" then (echo 1s/$/:/; echo w) | ed - $tfsummary echo "$reason" >> $tfsummary fi fi formsubmitbutton formtag$$ wpSave || exit echo -n "Submitting edits..." formsubmit formtag$$ $baseurl $tf.cook > $tf4 stat=$? echo " done." if test $stat -ne 0; then exit 1; fi formfinish formtag$$ if grep -q "<title>.*[Ee]dit [Cc]onflict" $tf4 then echo "edit conflict" >&2 echo "$article $editscriptparms edit conflict" >> $rejectlog continue fi echo "$article $editscriptparms" >> $acceptlog done $rm $tf $tf4 $tf.cook