Jump to content

User:ElisaVan/Spanish bot code

From Wiktionary, the free dictionary
#coding: utf-8
"""
This python script, based on pagefromfile.py, is to be used by bots loading batches of words into Wiktionary.
 
*** This version (esfromfile.py) is configured for Spanish words loaded to the English Wiktionary.
 
All arguments in the original have been hard-coded (see the original for details).
 
The input text file can contain data for multiple words.
Data for each word is contained between "starttext" and "endtext"
Immediately after the "starttext" is a line containing the name of the word to be added between <<< and >>> arrows.
All subsequent lines of text (up to the "endtext") are added to the wiki.
 
If the word to be added already exists, a check is made to see if there is a Spanish section.
  If there is already an Spanish section,
    no data is added, and the word is skipped.
  If there is no Spanish section,
    data is appended at the end of the existing word and a template is added to invoke the auto cleanup bot.
 
If processing of a file is interrupted, it may be re-executed from the start with no duplication.
 
Note. Several unused variables and logic remain from the original and could be removed (but are doing no harm).
 
To use with another language (e.g. Romanian) within the English wiktionary . . .
1) Change "filename" from fr.txt to ro.txt (not essential, but reasonable if you are working with multiple languages)
2) Change ALL occurrences of "Spanish" to "Romanian" or whatever.
3) Save as rofromfile.py (or whatever) (not essential, but confusing otherwise)
"""
#
# (C) Andre Engels, 2004
#
# Distributed under the terms of the MIT license.
#
 
__version__='$Id: Exp $'
 
import wikipedia, config
import re, sys, codecs
 
msg={
    'en': u'creating Spanish noun form entry'
    }
 
starttext = "{{-start-}}"
endtext = "{{-stop-}}"
filename = "es.txt"
include = False
titlestart = u"<<<"
titleend = u">>>"
search_string = u""
force = False
append = "False"
notitle = True
 
def findpage(t):
    search_string = titlestart + "(.*?)" + titleend
    try:
        location = re.search(starttext+"([^\Z]*?)"+endtext,t)
        if include:
            contents = location.group()
        else:
            contents = location.group(1)
    except AttributeError:
        print 'Start or end marker not found.'
        return
    try:
        title = re.search(search_string, contents).group(1)
    except AttributeError:
        wikipedia.output(u"No title found - skipping a page.")
        return
    else:
        page = wikipedia.Page(mysite, title)
        wikipedia.output(page.title())
        if notitle:
          #Remove title (to allow creation of redirects)
          contents = re.sub(search_string, "", contents)
 
        # Remove trailing newlines (cause troubles when creating redirects)
        # (awful code, doesn't even do what it says!): contents = re.sub('^[\r\n]*','',contents)
        contents = contents.strip('\r\n ')
        # add "count page" if needed (if we append, AF will fix as needed)
        if "[[" not in contents: contents += "\n{{count page|[[Wiktionary:Page count]]}}"
 
        if page.exists():
            old_text = page.get()
            if not re.search(r'==\s*Spanish\s*==', old_text):
                contents = old_text + '\n\n----\n' + contents + '\n{{rfc-auto}}\n'
                commenttext_add = commenttext + " - appended"
                wikipedia.output(u"Page %s already exists, adding to entry!"%title)
                page.put(contents, comment = commenttext_add, minorEdit = False)
            else:
                wikipedia.output(u"Page %s already exists with Spanish section, not adding!"%title)
        else:
            page.put(contents, comment = commenttext, minorEdit = False)
 
    findpage(t[location.end()+1:])
    return
 
def main():
    text = []
    f = codecs.open(filename,'r', encoding = config.textfile_encoding)
    text = f.read()
    findpage(text)
 
mysite = wikipedia.getSite()
commenttext = wikipedia.translate(mysite,msg)
for arg in sys.argv[1:]:
    arg = wikipedia.argHandler(arg, 'pagefromfile')
    if arg:
        if arg.startswith("-start:"):
            starttext=arg[7:]
        elif arg.startswith("-end:"):
            endtext=arg[5:]
        elif arg.startswith("-file:"):
            filename=arg[6:]
        elif arg=="-include":
            include = True
        #elif arg=="-exclude":
            #exclude = True
        elif arg=="-appendtop":
            append = "Top"
        elif arg=="-appendbottom":
            append = "Bottom"
        elif arg=="-force":
            force=True
        elif arg=="-safe":
            force=False
            append="False"
        elif arg=='-notitle':
            notitle=True
        elif arg.startswith("-titlestart:"):
            titlestart=arg[12:]
        elif arg.startswith("-titleend:"):
            titleend=arg[10:]
        elif arg.startswith("-summary:"):
            commenttext=arg[9:]
        else:
            wikipedia.output(u"Disregarding unknown argument %s."%arg)
 
try:
    main()
except:
    wikipedia.stopme()
    raise
else:
    wikipedia.stopme()