User:BuchmeierBot/code
Appearance
I use a combination of bash and gawk scrips to analyze the page and write the wiki-code and mvs to upload the pages:
Bash code
[edit]#!/bin/bash
verb=$1
TEMP=$verb-wiki.txt
TEMPCONJ=$verb-conjugation.txt
ERROUT=$verb-ERRORS.txt
BOTINPUT=$verb-botinput.txt
rm -f $TEMP $TEMPCONJ $ERROUT $BOTINPUT
# exports the page to a text file $TEMP with lynx
lynx -width=1000 -nolist -underscore -dump http://en.wiktionary.org/wiki/$verb >$TEMP
recode latin1..utf8 $TEMP
# analyse text file $TEMP with gawk
export LC_ALL=C
gawk --assign ERROUT=$ERROUT --assign INF=$verb -f es-conj-verb-readconj.awk $TEMP |sort >$TEMPCONJ
# check for the file $ERROUT, written by previous awk script if the conjugation table contains lines with uexpected number of words (in which case forther execution is stopped)
if [ -f $ERROUT ]
then
cat $ERROUT
exit
fi
# write inputfile for pagefromfile.py using gawk
gawk --assign INF=$verb -f es-conj-verb-genpages.awk $TEMPCONJ >>$BOTINPUT
Awk code
[edit]es-conj-verb-readconj.awk
[edit]The awk script es-conj-verb-readconj.awk looks like this:
BEGIN {section="0"; MOOD=none; errfile=ERROUT; inf=INF;
inflength=length(inf); ending=substr(inf,inflength-1,2);
#if(ending=="ír") ending="ir";
if(index(inf,"ír")>0) ending="ir";
if(!((ending == "ar")||(ending == "er")||(ending == "ir")))
{print "unknown or unsupported ending: "ending" of infinitive: "inf>errfile; exit;}
}
/\[edit\] Spanish/ {section = "S"; next;}
/\[edit\] Conjugation/ {if (section == "S") section = "C"; next;}
/\[edit\]/ {if ((section == "C")||(section=="T")) exit;}
/Defective verb/ {if(section == "C") {
print "unsupported defective verb">errfile; exit;
}}
/Rule/ {next;}
/Irregular in the past participle/ {next;}
/infinitive/ {if(section == "C") {
inf2=$2;
if(inf==inf2) section="T";
}}
/gerund/ {if(section == "T") {
if(NF != 2) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood=gerund|[["inf"]]}}";
}}
/present participle/ {if(section == "T") {
if(NF != 3) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $3"\t{{es-verb form of|ending="ending"|mood=gerund|[["inf"]]}}"; next;
}}
/past participle/ {if(section == "T") {
if(NF != 3) {print "unexpected number of words on line:\n"$0>errfile; exit;}
partstem = $3; gsub(/o$/,"",partstem);
print partstem"o\t{{es-verb form of|ending="ending"|mood=past participle|gender=m|number=s|[["inf"]]}}\t"partstem;
print partstem"a\t{{es-verb form of|ending="ending"|mood=past participle|gender=f|number=s|[["inf"]]}}\t"partstem;
print partstem"os\t{{es-verb form of|ending="ending"|mood=past participle|gender=m|number=p|[["inf"]]}}\t"partstem;
print partstem"as\t{{es-verb form of|ending="ending"|mood=past participle|gender=f|number=p|[["inf"]]}}\t"partstem;
}}
/indicative/ {if(section == "T") MOOD="indicative";}
/subjunctive/ {if(section == "T") MOOD="subjunctive";}
/imperative/ {if(section == "T") MOOD="imperative";}
#/conditional/ {if(section == "T") MOOD="conditional";}
/present\ / {if(section == "T") {
if(NF != 7) {print "unexpected number of words on line present:\n"$0>errfile; exit;}
TENSE="present";
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/imperfect/ {if(section == "T") {TENSE="imperfect";
if(MOOD=="subjunctive") next;
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/\(ra\)/ {if(section == "T") {TENSE="imperfect";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=ra|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/\(se\)/ {if(section == "T") {TENSE="imperfect";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|sera=se|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/preterite/ {if(section == "T") {TENSE="preterite";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/future/ {if(section == "T") {TENSE="future";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/conditional/ {if(section == "T") {TENSE="conditional";
if(NF != 7) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=1|number=plural|[["inf"]]}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=3|number=plural|[["inf"]]}}";
print $7"\t{{es-verb form of|ending="ending"|mood="MOOD"|tense="TENSE"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/affirmative/ {if(section == "T") {
TENSE="affirmative";
if(NF != 6) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $2"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=2|formal=yes|number=singular|[["inf"]]}}";
print $4"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=1|number=plural|[["inf"]]}}";
print $5"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
print $6"\t{{es-verb form of|ending="ending"|mood="MOOD"|pers=2|formal=yes|number=plural|[["inf"]]}}";
}}
/negative/ {if(section == "T") {
TENSE="negative";
if(NF != 11) {print "unexpected number of words on line:\n"$0>errfile; exit;}
print $3"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=singular|[["inf"]]}}";
print $9"\t{{es-verb form of|ending="ending"|mood="MOOD"|sense="TENSE"|pers=2|formal=no|number=plural|[["inf"]]|region=Spain}}";
exit;
}}
es-conj-verb-genpages.awk
[edit]The awk script es-conj-verb-genpages.awk looks like this:
BEGIN {oldpage="0"; inf=INF; FS="\t";}
{ page=$1;
if(page==oldpage) {print "# "$2; oldpage=page; next}
if(FNR>1) print "{{-stop-}}";
print "{{-start-}}"; print "<<<"$1">>>";
if(NF==2)
print "==Spanish==\n\n===Verb===\n{{es-verb-form|"inf"}}\n";
if(NF==3)
print "==Spanish==\n\n===Verb===\n{{es-pp|"$3"|"inf"}}\n";
print "# "$2;
oldpage=page;
}
END {print "{{-stop-}}";}
The file $BOTINPUT is then uploaded using SemperBlotto's pagefromfile.py, modified for Spanish.