User:Flubot/cedillaBot
Appearance
Run with:
- python cedillBot.py dușmăni (to edit only this page) ... or
- python cedillBot.py -cat:"Romanian nouns" (to edit pages in a category) ... or
- python cedillBot.py -file:listofwords (to retrieve page names from a list)
cedillaBot.py
[edit]#!/usr/bin/python
# -*- coding: utf-8 -*-
import wikipedia, pagegenerators, catlib
import re
class cedillabot:
def __init__(self, generator, site, debug_bul = True):
self.generator = generator
self.debug_bul = debug_bul
self.site = site
def run(self):
romanian = re.compile('==Romanian==')
language_header = re.compile('^==([^=]+)==')
Romanian_found = False
skip_this_line = False
etymology = re.compile('===Etymology===')
other_header = re.compile('^===(.*)===$')
ro_term = re.compile('\{\{term\|(.*)\|lang=ro\}\}')
turkish = re.compile('Turkish')
tr_term = re.compile('lang=tr')
tr_etyl = re.compile('\{\{etyl\|tr\|')
s1 = re.compile(u'ş')
s2 = re.compile(u'Ş')
t1 = re.compile(u'ţ')
t2 = re.compile(u'Ţ')
# Here is defined the entry to modificate
for p in self.generator:
titlos = p.title()
print(titlos)
page = wikipedia.Page(wikipedia.getSite(), titlos)
arxiko = page.get(get_redirect=True)
marker = '@@'
while marker in arxiko:
marker += '@'
site = self.site
interwiki = wikipedia.getLanguageLinks(arxiko, insite = site)
textnoiws = wikipedia.removeLanguageLinks(arxiko.replace(marker,'').strip(), site = self.site) + site.family.category_text_separator
# Replace
Romanian_found = False
skip_this_line = False
linesOftext = textnoiws.split("\n")
my_new_text = []
for lineOfMyText in linesOftext:
if romanian.search(lineOfMyText):
Romanian_found = True
skip_this_line = False
elif etymology.search(lineOfMyText):
skip_this_line = True
elif other_header.search(lineOfMyText):
skip_this_line = False
elif not turkish.search(lineOfMyText) and not tr_term.search(lineOfMyText) and not tr_etyl.search(lineOfMyText):
skip_this_line = False
elif language_header.search(lineOfMyText):
Romanian_found = False
if Romanian_found:
lineOfMyText = t1.sub(u'ț', lineOfMyText)
lineOfMyText = t2.sub(u'Ț', lineOfMyText)
if Romanian_found and not skip_this_line:
lineOfMyText = s1.sub(u'ș', lineOfMyText)
lineOfMyText = s2.sub(u'Ș', lineOfMyText)
if ro_term.search(lineOfMyText):
ro_word = ro_term.search(lineOfMyText).group(1)
ro_word2 = t1.sub(u'ț', ro_word)
ro_word2 = t2.sub(u'Ț', ro_word2)
ro_word2 = s1.sub(u'ş', ro_word2)
ro_word2 = s2.sub(u'Ş', ro_word2)
lineOfMyText = lineOfMyText.replace(ro_word, ro_word2)
my_new_text.append(lineOfMyText)
keimeno1 = "\n".join(my_new_text)
keimeno = wikipedia.replaceLanguageLinks(keimeno1, interwiki, site = self.site)
# Save page
wikipedia.setAction(u'bot:changing cedillas with commas in Romanian entries')
if keimeno1 != textnoiws:
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
## show what was changed
wikipedia.showDiff(page.get(), keimeno)
#choice= 'y'
if self.debug_bul:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['yes', 'No', 'all'], ['y', 'N', 'a'], 'N')
if choice == 'a':
choice = 'y'
self.debug_bul = False
elif choice != 'y':
choice='n'
if choice == 'y':
try:
# Save the page
page.put(keimeno)
except wikipedia.IsRedirectPage:
wikipedia.output(u'Skipping %s because it is a redirect' % (page.title()))
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
return 0
def main():
gen = None
pageTitle = []
for arg in wikipedia.handleArgs():
if arg:
if arg.startswith('-file:'):
gen = pagegenerators.TextfilePageGenerator(arg[6:])
elif arg.startswith('-cat:'):
cat = catlib.Category(wikipedia.getSite(), arg[5:])
gen = pagegenerators.CategorizedPageGenerator(cat)
else:
pageTitle.append(arg)
if pageTitle:
page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
gen = iter([page])
if not gen:
wikipedia.showHelp('touch')
else:
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = cedillabot(preloadingGen, wikipedia.getSite())
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()