User:Interwicket/code/iwlinks
Appearance
< User:Interwicket | code
#!/usr/bin/python
# -*- coding: utf-8 -*-
# wikipath en wiktionary User:Interwicket/code/iwlinks
import wikipedia
import re
renotags = re.compile(r'<nowiki>.*?||', \
re.IGNORECASE | re.DOTALL)
reiwiki = re.compile(r'\[\[([a-z-]{2,10}):([^\[\]\n]+)\]\]')
- match link to a-z, any non-null title, (if |, included in title, to be removed)
- Various other errors ignored
- routine to get iwiki links from entry text
- return dict of code->title
- ignores unknown codes; ignores duplicate codes (returns last found)
- explicit deletes are returned so we can remove them and reflect that in edit summary
def getiwlinks(text, flws):
mt = renotags.sub(, text)
links = { } for code, title in reiwiki.findall(mt):
if code not in flws: continue if flws[code].lockedwikt and not flws[code].deletecode: continue links[code] = title
return links
def replaceiwlinks(text, links, flw, flws):
links = links.copy() # private copy (shallow, okay)
# proceed as above in finding old links, but different action # duplicate codes are silently elided (probably not best, but as before)
mt = renotags.sub(, text)
for code, title in reiwiki.findall(mt):
if code not in flws: continue text = re.sub(r'\[\[' + code + ':' + re.escape(title) + r'\]\]\s*', , text)
# no add or remove links to locked wikts (mostly harmless, but not worth it) # do remove explicit deletes if flws[code].lockedwikt and not flws[code].deletecode and code not in links: links[code] = title
# strip WS at bottom (and top for pl.wikt) text = text.strip('\n ')
# sort if needed linklist = [ ] pf = flw.site.interwiki_putfirst() if pf: for code in pf: if code in links: linklist.append("[[" + code + ':' + links[code] + "]]") del links[code] # remaining, or all in code order: for code in sorted(links): linklist.append("[[" + code + ':' + links[code] + "]]")
if flw.oneline: ls = ' '.join(linklist) else: ls = '\n'.join(linklist)
if flw.attop: newt = ls + '\n' + text else: newt = text + '\n\n' + ls
return newt
- test code
if __name__ == "__main__":
from reciprocal import flws # init all the flws:
for code in flws['en'].site.family.langs: foo = flws[code]
code = 'sw' title = 'cat'
print "sh status", flws['sh'].status, "locked", flws['sh'].lockedwikt
# get some page, try a few things
page = wikipedia.Page(flws[code].site, title)
text = page.get()
links = getiwlinks(text, flws)
print title, ":", repr(links)
# if 'ta' in links: del links['ta'] # links['sw'] = title
# so following should be no-op if 'sh' in links: del links['sh']
newt = replaceiwlinks(text, links, flws[code], flws)
wikipedia.showDiff(text, newt)
page.put(newt)</nowiki>