User:FitBot/forms

lafromfile.py
#coding: utf-8
"""
This python script, based on pagefromfile.py, is to be used by bots loading batches of words into Wiktionary.

*** This version (lafromfile.py) is configured for Latin words loaded to the English Wiktionary.

All arguments in the original have been hard-coded (see the original for details).

The input text file can contain data for multiple words.
Data for each word is contained between "starttext" and "endtext"
Immediately after the "starttext" is a line containing the name of the word to be added between <<< and >>> arrows.
All subsequent lines of text (up to the "endtext") are added to the wiki.

If the word to be added already exists, a check is made to see if there is a Latin section.
  If there is already a Latin section,
    no data is added, and the word is skipped.
  If there is no Latin section,
    data is appended at the end of the existing word and a template is added to invoke the auto cleanup bot.

If processing of a file is interrupted, it may be re-executed from the start with no duplication.

Note. Several unused variables and logic remain from the original and could be removed (but are doing no harm).

To use with another language (e.g. Galician) within the English wiktionary . . .
1) Change "filename" from la.txt to gl.text (not essential, but reasonable if you are working with multiple languages)
2) Change ALL occurrences of "Latin" to "Galician" or whatever.
3) Save as glfromfile.py (or whatever) (not essential, but confusing otherwise)
"""
#
# (C) Andre Engels, 2004
#
# Distributed under the terms of the MIT license.
# Adapted from code modified by SemperBlotto at the English Wiktionary
#

__version__='$Id: Exp $'

import wikipedia, config
import re, sys, codecs

msg={
    'en': u'Batch loading of articles from a text file'
    }

starttext = "{{-start-}}"
endtext = "{{-stop-}}"
filename = "la.txt"
include = False
titlestart = u"<<<"
titleend = u">>>"
search_string = u""
force = False
append = "False"
notitle = True

def findpage(t):
    search_string = titlestart + "(.*?)" + titleend
    try:
        location = re.search(starttext+"([^\Z]*?)"+endtext,t)
        if include:
            contents = location.group()
        else:
            contents = location.group(1)
    except AttributeError:
        print 'Start or end marker not found.'
        return
    try:
        title = re.search(search_string, contents).group(1)
    except AttributeError:
        wikipedia.output(u"No title found - skipping a page.")
        return
    else:
        page = wikipedia.Page(mysite, title)
        wikipedia.output(page.title())
        if notitle:
          #Remove title (to allow creation of redirects)
          contents = re.sub(search_string, "", contents)
        #Remove trailing newlines (cause troubles when creating redirects)
        contents = re.sub('^[\r\n]*','',contents)
        if page.exists():
            old_text = page.get()
            if not re.search(r'==\s*Latin\s*==', old_text):
                contents = old_text + '\n\n----\n'  + contents + '\n{{rfc-auto}}\n'
                commenttext_add = commenttext + " - appended"
                wikipedia.output(u"Page %s already exists, adding to entry!"%title)
                page.put(contents, comment = commenttext_add, minorEdit = False)
            else:
                wikipedia.output(u"Page %s already exists with Latin section, not adding!"%title)
        else:
            page.put(contents, comment = commenttext, minorEdit = True) # was False (see above)
    findpage(t[location.end()+1:])
    return

def main():
    text = []
    f = codecs.open(filename,'r', encoding = config.textfile_encoding)
    text = f.read()
    findpage(text)

mysite = wikipedia.getSite()
commenttext = wikipedia.translate(mysite,msg)
for arg in sys.argv[1:]:
    arg = wikipedia.argHandler(arg, 'pagefromfile')
    if arg:
        if arg.startswith("-start:"):
            starttext=arg[7:]
        elif arg.startswith("-end:"):
            endtext=arg[5:]
        elif arg.startswith("-file:"):
            filename=arg[6:]
        elif arg=="-include":
            include = True
        #elif arg=="-exclude":
            #exclude = True
        elif arg=="-appendtop":
            append = "Top"
        elif arg=="-appendbottom":
            append = "Bottom"
        elif arg=="-force":
            force=True
        elif arg=="-safe":
            force=False
            append="False"
        elif arg=='-notitle':
            notitle=True
        elif arg.startswith("-titlestart:"):
            titlestart=arg[12:]
        elif arg.startswith("-titleend:"):
            titleend=arg[10:]
        elif arg.startswith("-summary:"):
            commenttext=arg[9:]
        else:
            wikipedia.output(u"Disregarding unknown argument %s."%arg)

try:
    main()
except:
    wikipedia.stopme()
    raise
else:
    wikipedia.stopme()