Jump to content

User:SemperBlottoBot/verbs

From Wiktionary, the free dictionary
  • itfronfile.py
#coding: utf-8
"""
This python script, based on pagefromfile.py, is to be used by bots loading batches of words into Wiktionary.

*** This version (itfromfile.py) is configured for Italian words loaded to the English Wiktionary.

All arguments in the original have been hard-coded (see the original for details).

The input text file can contain data for multiple words.
Data for each word is contained between "starttext" and "endtext"
Immediately after the "starttext" is a line containing the name of the word to be added between <<< and >>> arrows.
All subsequent lines of text (up to the "endtext") are added to the wiki.

If the word to be added already exists, a check is made to see if there is an Italian section.
  If there is already an Italian section,
    no data is added, and the word is skipped.
  If there is no Italian section,
    data is appended at the end of the existing word and a template is added to invoke the auto cleanup bot.

If processing of a file is interrupted, it may be re-executed from the start with no duplication.

Note. Several unused variables and logic remain from the original and could be removed (but are doing no harm).

To use with another language (e.g. French) within the English wiktionary . . .
1) Change "filename" from it.txt to fr.text (not essential, but reasonable if you are working with multiple languages)
2) Change ALL occurrences of "Italian" to "French" or whatever.
3) Save as frfromfile.py (or whatever) (not essential, but confusing otherwise)
"""
#
# (C) Andre Engels, 2004
#
# Distributed under the terms of the MIT license.
#

__version__='$Id: Exp $'

import wikipedia, config
import re, sys, codecs

msg={
    'en': u'Batch loading of articles from a text file'
    }

starttext = "{{-start-}}"
endtext = "{{-stop-}}"
filename = "it.txt"
include = False
titlestart = u"<<<"
titleend = u">>>"
search_string = u""
force = False
append = "False"
notitle = True

def findpage(t):
    search_string = titlestart + "(.*?)" + titleend
    try:
        location = re.search(starttext+"([^\Z]*?)"+endtext,t)
        if include:
            contents = location.group()
        else:
            contents = location.group(1)
    except AttributeError:
        print 'Start or end marker not found.'
        return
    try:
        title = re.search(search_string, contents).group(1)
    except AttributeError:
        wikipedia.output(u"No title found - skipping a page.")
        return
    else:
        page = wikipedia.Page(mysite, title)
        wikipedia.output(page.title())
        if notitle:
          #Remove title (to allow creation of redirects)
          contents = re.sub(search_string, "", contents)
        #Remove trailing newlines (cause troubles when creating redirects)
        contents = re.sub('^[\r\n]*','',contents)
        if page.exists():
            old_text = page.get()
            if not re.search(r'==\s*Italian\s*==', old_text):
                contents = old_text + '\n\n----\n'  + contents + '\n{{rfc-auto}}\n'
                commenttext_add = commenttext + " - appended"
                wikipedia.output(u"Page %s already exists, adding to entry!"%title)
                page.put(contents, comment = commenttext_add, minorEdit = False)
            else:
                wikipedia.output(u"Page %s already exists with Italian section, not adding!"%title)
        else:
            page.put(contents, comment = commenttext, minorEdit = True) # was False (see above)
    findpage(t[location.end()+1:])
    return

def main():
    text = []
    f = codecs.open(filename,'r', encoding = config.textfile_encoding)
    text = f.read()
    findpage(text)

mysite = wikipedia.getSite()
commenttext = wikipedia.translate(mysite,msg)
for arg in sys.argv[1:]:
    arg = wikipedia.argHandler(arg, 'pagefromfile')
    if arg:
        if arg.startswith("-start:"):
            starttext=arg[7:]
        elif arg.startswith("-end:"):
            endtext=arg[5:]
        elif arg.startswith("-file:"):
            filename=arg[6:]
        elif arg=="-include":
            include = True
        #elif arg=="-exclude":
            #exclude = True
        elif arg=="-appendtop":
            append = "Top"
        elif arg=="-appendbottom":
            append = "Bottom"
        elif arg=="-force":
            force=True
        elif arg=="-safe":
            force=False
            append="False"
        elif arg=='-notitle':
            notitle=True
        elif arg.startswith("-titlestart:"):
            titlestart=arg[12:]
        elif arg.startswith("-titleend:"):
            titleend=arg[10:]
        elif arg.startswith("-summary:"):
            commenttext=arg[9:]
        else:
            wikipedia.output(u"Disregarding unknown argument %s."%arg)

try:
    main()
except:
    wikipedia.stopme()
    raise
else:
    wikipedia.stopme()