User:OrphicBot/orphicbot.py
Appearance
import datetime, threading, time, json, requests, sqlite3, itertools, math, random
import socketIO_client
from utils import flatten, flattenAll, selectT, select, hashListByKey, a2u, u2a, nfc, number, txtToFile, is_number, urlencode, progress, dictToList, histogram
from getpass import getpass
from threading import Lock
from matplotlib import pyplot as plt
# utils
def slices(N, xs) : return [xs[N*x:N*(1+x)] for x in list(range(0,math.ceil(len(xs)/N)))]
kill = False
def fmt(fs, xss, threads = 32, fKill = lambda : kill, rvs = None, fNItems = lambda x : 1, silent = False) :
tf, tLock, rvs = [0, 0, 0, [0 for x in range(0,threads)], False], threading.Lock(), [None for x in fs] if rvs == None else rvs
def fT(n,f,tf) :
tLock.acquire()
tf[0], tf[1], tf[2], = tf[0]+n, tf[1]+f, tf[2] - 1
tLock.release()
def fi(n, i_fxs, fT, tfk) :
for i_fx in i_fxs :
if not fKill() and not tf[4]:
i,f,x = i_fx[0], i_fx[1][0], i_fx[1][1]
try :
rvs[i] = f(*x) if rvs[i] == None else rvs[i]
except :
print("Error at i={}; quitting.".format(i))
tf[4] = True
tf[3][n] = tf[3][n] + fNItems(x)
if not silent:
progress(sum(tf[3]))
if fKill() or tf[4] : print("Thread {}: killed".format(n))
fT(len(i_fxs), 1, tfk)
ts = [threading.Thread(target = fi, args=(n, i_fx, fT, tf)) for (n,i_fx) in number(slices(int(1+len(fs)/threads), number(list(zip(fs,xss)))))]
try :
for t in ts:
while tf[2] >= threads and not fKill() and not tf[4]:
time.sleep(.01)
tf[2] = tf[2]+ 1
t.start()
while tf[1] < len(ts) and not fKill() and not tf[4]:
time.sleep(.01)
except :
tf[4] = True
print("fmt killed.")
return rvs
def plot(xs, lang='en') :
plt.plot(xs)
plt.ylabel('xs')
plt.show()
# mw api : login, save, load, loadtitles
db = sqlite3.connect("enwiktionary.db")
def strToDt(ts) : return datetime.datetime.strptime(ts, "%Y-%m-%dT%H:%M:%SZ")
def dtToStr(dt, sep=False) : return dt.strftime("%Y%m%d%H%M%S" if sep == False else "%Y-%m-%dT%H:%M:%SZ")
def login(u, p) :
url, params3 = 'https://en.wiktionary.org/w/api.php', '?format=json&action=query&meta=tokens&continue='
login_token, cookies = [(x.json()['query']['tokens']['logintoken'], x.cookies.copy()) for x in [requests.post(url, data={'action': 'query', 'format': 'json', 'utf8': '', 'meta': 'tokens', 'type': 'login'})]][0]
cookies = requests.post(url, data={'action': 'login', 'format': 'json', 'utf8': '', 'lgname': u, 'lgpassword': p, 'lgtoken': login_token}, cookies=cookies).cookies.copy()
edit_token = requests.get(url + params3, cookies=cookies).json()['query']['tokens']['csrftoken']
return [cookies, edit_token, u, p]
def http(CT, params, postdata = [], token = True) :
uri = 'https://en.wiktionary.org/w/api.php' + params
rv = requests.get(uri, cookies = CT[0]) if postdata == [] else requests.post(uri, dict(postdata, **({'token':CT[1]} if token else {})), cookies = CT[0])
js = json.loads(rv.text)
if 'error' in js and 'code' in js['error'] and (js['error']['code'] == 'notoken' or js['error']['code'] == 'badtoken'):
raise Exception("Difficultyf logging in.")
if len(CT) < 4 : raise Exception("No login tokens or credentials.")
CT = [*login(*(CT[2:4]))] + CT[2:]
print("{}: Logged in as {}.".format(datetime.datetime.utcnow(), CT[2]))
return http(CT, params, postdata)
return rv.text
def loadMW5c(CT, ws, cols = ['content', 'timestamp'], qtype = 'titles', revisions = 1) :
fC = lambda c : '*' if c == 'content' else c
ocols = flattenAll([c if c != 'ids' else ['pageid', 'revid', 'parentid'] for c in cols])
post = [('action','query'),('prop','revisions'),('rvprop','|'.join([c for c in cols if c != 'title'])),('format','json'),(qtype,'|'.join(ws))] + ([('rvlimit',str(revisions))] if revisions != 1 else [])
tbl = json.loads(http(CT, "", post, token = False))
norm = dict([(x['from'], x['to']) for x in (tbl['query']['normalized'] if 'normalized' in tbl['query'] else {})])
iw = dict([(str(1/(2+x[0])), x[1]) for x in number(tbl['query']['interwiki'] if 'interwiki' in tbl['query'] else [])])
odict = dict([(w,n) for (n,w) in number(ws)])
fCRX = lambda c,r,p : r[fC(c)] if 'revisions' in p[1] and fC(c) in r else (p[1]['title'] if (c == 'title' and 'title' in p[1]) else p[0] if c == 'pageid' and (float(p[0]) <= 0 or float(p[0]) >=1) else p[0] if float(p[0]) >= 0 else '')
wToRow = hashListByKey(flatten([[((p[1]['title'] if qtype == 'titles' else str(p[0]) if qtype == 'pageids' else str(r['revid']),) + tuple(fCRX(c,r,p) for c in ocols)) for r in (p[1]['revisions'] if 'revisions' in p[1] else ['']) ] for p in dictToList(dict(tbl['query']['pages'] if 'pages' in tbl['query'] else {}, **iw))]), lambda x: x[0], lambda x: x[1:])
rv = flatten([(wToRow[norm[w] if w in norm else w] if (norm[w] if w in norm else w) in wToRow else [tuple('' for x in ocols)]) for w in ws])
return rv
def loadMW(CT, ws, cols = ['content', 'timestamp'], qtype = 'titles', revisions = 1, threads = 32, silent = False, startcount = 0, rvs = None) : return flatten([x for x in fmt([lambda x: loadMW5c(CT, x, cols = cols, qtype = qtype, revisions = revisions) for y in range(0,math.ceil(len(ws)/500))], [(s,) for s in slices(500, ws)], fNItems = lambda x : len(x[0])+startcount, silent = silent, threads = threads, rvs = rvs) if x != None])
def load(w, qtype = 'titles') : return loadMW5c(('',''), [w], qtype = qtype)[0][0]
def loadDB(x) :
return '\n'.join([l[0] for l in selectT(db, "select line from enwiktionary where name == ?", (x,))])
def saveMW(CT,w,x,c, sts = None, bts = None) : # add md5
w, sts, bts = (w[0], w[1], w[1]) if type(w) is tuple else (w, sts, bts)
return http(CT, "", [('action','edit'),('assert','user'),('format','json'),('utf8',''),('text',x),('summary',c),('title',w),('token',CT[1]),('bot',''),('minor',''),('starttimestamp',sts),('basetimestamp',bts)])
def moveMW(CT, s, d, c, redirect = False) :
url = 'https://en.wiktionary.org/w/api.php'
return json.loads(http(CT, "", [('action','move'), ('from',s), ('to',d), ('reason',c), ('format','json'), ('movetalk', ''), ('token',CT[1]), ('noredirect','')][0:(7 if redirect else 8)]))
def loadRevisions(db, CT, ws) :
cols = ['title', 'timestamp', 'user', 'comment', 'size', 'ids']
revs = loadMW(CT, ws, cols = cols, threads = 8)
total, n, rids = 0 , 0, []
while all(int(r) != 0 for r in rids) and len(revs) > 0 :
rids, ws, fO, total = [str(r[-1]) for r in revs if r[-1] != 0 and r[-1] != ''], [x[0] for x in revs if x[-1] != 0 and x[-1] != ''], print if len(revs) > 10000 else progress, total + len(revs)
db.executemany("insert or ignore into revisions values (?,?,?,?,?,?,?)", [x for x in revs if x[-2] != ''])
fO("{}: {} ({})".format(n,total, len(rids)))
revs, n = loadMW(CT, rids, cols = cols, qtype = 'revids', order = ws, silent = True if fO == progress else False), n + 1
return total
def loadTitles(CT = None, begin = '!', end = None) :
# can run parallel adjacent requests using hardcoded spacing data
words = []
while begin and (end == None or begin < end):
omnia = json.loads(http(CT, '?action=query&list=allpages&apfrom={}{}&aplimit={}&format=json&apfilterredir=nonredirects'.format(urlencode(begin), ("&apto="+end) if end != None else "", 500 if CT == None else 5000), []))
begin = omnia['continue']['apcontinue'] if 'continue' in omnia else None
words.extend(q['title'] for q in omnia['query']['allpages'])
progress(len(words))
return words
def addRevsToDb1(revs) :
db.executemany("insert or ignore into revisions values (?,?,?,?,?,?,?)", revs)
db.commit()
db.close()
return revs[0]
# mw rc client
def fOnChange(self, change) :
cols = ['title', 'timestamp', 'user', 'comment', 'title', 'length', 'id', 'revision']
info = flattenAll([[("{}-old".format(c), str(change[c]['old'])), ("{}-new".format(c), str(change[c]['new']))] if (c in change and (c == 'length' or c == 'revision')) else (c,str(change[c])) if c in change else (c,'') for c in cols])
print(("change: ({} items)\n".format(len(info)) + '\n'.join([" {}: {}".format(c,i.encode('utf-8')) for (c,i) in info])))
class WikiNamespace(socketIO_client.BaseNamespace):
def on_change(self, change):
fOnChange(change)
def on_connect(self):
self.emit('subscribe', 'en.wiktionary.org')
print('CONNECTED')
def subscribeRC(seconds) :
socketIO = socketIO_client.SocketIO('stream.wikimedia.org', 80)
socketIO.define(WikiNamespace, '/rc')
socketIO.wait(seconds)
myrunnings = [True]
def doongxi1(db, CT, lex, lix, f, runnings, fS = lambda a,b,c : saveMW(CT, a, b, c)) :
dlang = dict([(x,y) for (x,[y]) in hashListByKey(lix, lambda x: x[0], lambda x: x[2]).items()])
talkpage = load('User talk:OrphicBot')
mylangs = unique([x[1] for x in dlang.items()])
langs = dict([(x,unique(y)) for (x,y) in hashListByKey([(w,dlang[l]) for l in lex for w in lex[l]] , lambda x: x[0], lambda x: x[1]).items()])
def fOnChangeL(self, change) :
if runnings[0] == False :
raise Exception("No longer running; quitting.")
user = "" if not 'user' in change else change['user']
ulangs, title = ([], None) if not 'title' in change or not change['title'].lower() in langs else (langs[change['title'].lower()], change['title'])
if len(ulangs) > 0 :
print('Checking {} for new {} section{}{}'.format(title, ', '.join(ulangs), 's' if len(ulangs) > 1 else '', '' if user == '' else ' (user: {})'.format(user) ).encode('utf-8') )
currOld = [x[0] for x in loadMW(CT, [title], cols = ['content'], qtype = 'titles', revisions = 2)]
if any(x in currOld[0] and (len(currOld)==1 or not x in currOld[1]) for x in [ "=={}==\n".format(l) for l in ulangs]):
if talkpage != load('User talk:OrphicBot') :
print("User talk page changed. No action taken on {}. Quitting.".format(title))
runnings[0] = False
raise Exception("User talk page changed.")
result = runAllM(CT, [title], f, fSave = fS)
for (w,n,e0,e1,c,err,rv) in result :
if c != None and not 'No change.' in c :
print("Processed at {}: {}!".format(datetime.datetime.utcnow(), w).encode('utf-8'))
WikiNamespace.on_change = fOnChangeL
def curre () :
while runnings[0] :
subscribeRC(3600)
#except : print('Error subscribing; trying again.')
print('OrphicBot daemon process elapsed.')
threading.Thread(target = curre).start()
#agenda :
# 1. factor server from function
# 2. exclude accented Old Norse words (correct lex as argument)
# doongxi1(db, CT, lex, lix, addRefs, myrunnings, mySave)
# myrunnings = [True]
# doongxi1(db, CT, lex, lix, addRefs, myrunnings)
def recentChanges(CT, start, end) :
cols = ['title', 'user', 'ids', 'timestamp']
payload = {'action': 'query', 'format': 'json', 'utf8': '', 'list' : 'recentchanges', 'rcprop' : '|'.join(cols), 'rclimit' : '500', 'rcstart' : dtToStr(start), 'rcend' : dtToStr(end)}
n, rc, rccontinue = 0, [], ""
while n >= 0 :
result = json.loads(requests.post('https://en.wiktionary.org/w/api.php', data=payload).text)
rc.extend(result['query']['recentchanges'])
if 'continue' in result :
progress("{}: {} ({})".format(n, len(rc), result['continue']['rccontinue']) )
payload['rccontinue'] = result['continue']['rccontinue']
n = n + 1
else : n = -1
return rc
def updateRC(db, quiet = False) :
ts = select(db, "select max(timestamp) from recentchanges")[0][0]
cs = recentChanges(CT, datetime.datetime.utcnow(), strToDt(ts))
rs = [(c['rcid'], c['revid'], c['title'], c['user'] if 'user' in c else None, c['timestamp'], False) for c in cs if 'user' in c]
db.executemany("insert or ignore into recentchanges values (?,?,?,?,?,?)", rs)
db.commit()
if not quiet: print("{} items".format(len(rs)))
return rs
def update(db, wset) :
titles = [x[0] for x in select(db, 'select distinct(title) from recentchanges where user != "OrphicBot"') if not x[0] in wset]
def dongxi() :
test = updateRC(db)
histo = histogram([x[3] for x in test])
print(str(sorted([(x,histo[x]) for x in histo], key = lambda x: x[1])).encode('utf-8'))
ts = strToDt(test[0][4]) - strToDt(test[-1][4])
print(ts)
cheese = sorted(dictToList(histo), key = lambda x: x[1])[-1][1]
print(cheese/max(1,ts.seconds))
re
# parsing/editing
def foliage(x,t) :
def fH(x) : return x.strip('=')
def p1(x) :
def ffq(w,bl,br,n) : return lambda x : (x[0:len(w)+n] == n*bl + w) and x[-n:] == n*br
fCat, fC, fCLN, fDS = ffq("Category:","[","]",2), ffq("C|","{","}",2), ffq("catlangname|", "{", "}", 2), ffq("DEFAULTSORT:", "{", "}", 2)
return (x == '----') or fCat(x) or fDS(x) or fCLN(x) or fC(x) or (':' in x and "{}]]".format(t)==x.split(':')[1]) or x in ps
def fD(x) : return [l if d%2 == 0 and len(x) > d+1 and x[:l] == x[-l:] else (1 if p1(x) else 0) for (d,l) in [(d,int(d/2)) for d in [len(x) - len(fH(x))]]][0]
def fA(g,n) : return fH(xs[[l[1] for l in ls if l[0] == g and l[1] < n][-1]])
xs, ps = x.split('\n'), set([y for y in x[0:str.index(x, '==')].split('\n') if len(y) > 0])
hs, ls = [fD(x) for x in xs], []
[ls.extend([(hs[i],i)]) if hs[i] != 0 else ls.extend([]) for i in range(0,len(hs))]
return [(b[0], b[1], p[1]-b[1], fH(xs[b[1]] if b[1] < len(xs) else ""), [fA(l,b[1]) for l in range(2,b[0])], xs[b[1]:p[1]] ) for (b,p) in zip(ls, ls[1:]+[(0,len(hs))])]
def wiki(f) : return '\n'.join(flatten([x[-1] for x in f])).strip('\n')
def fParsingError(txt,t) :
try : Q = foliage(txt, t)
except : return "Invalid layout: hierarchy."
if any([x > 0 and Q[x][0] != 1 and (Q[x-1][0]==1 and Q[x-1][-1][0] != '----') and (Q[x+1][0]==1 and Q[x+1][-1][0] != '----') for x in range(0,len(Q)-1)]) :
return "Invalid layout: category or interwiki."
return None
def mapSections(ls, t, L, bs, fB) :
#if len(bs) == 0 : print("error ({}): no blocks")
lsNew, c = ls, ""
for e in bs:
lsNew = foliage(wiki([l if l[1] != e[1] else (0,0,0,0,0, fB(lsNew, e[-1])) for l in ls]), t)
return foliage(wiki(lsNew),t)
ffSort = lambda k : lambda LS,B : sorted(B, key = k)
fAll, fFirst, fLast, fOnly = lambda xs : xs, lambda xs : [xs[0]] if len(xs) > 0 else xs, lambda xs : [xs[-1]] if len(xs) > 0 else xs, lambda xs : xs if len(xs) == 1 else []
L, G = 'Latin', 'Ancient Greek'
E, R, lR, lEL, P = ('External links', fAll), ('References', fAll), ('References', fLast), ('External links', fLast), ('Pronunciation', fAll)
def fWithRefs(ls,t,Ls) :
def fWithRefI(ls, t, L) :
ixL = [l for l in ls if len(l[4])>0 and l[4][0] == L][-1][1] # no error checking #[x[1] for x in ls if x[0] > 1][-1]
lsN = foliage(wiki([ls[n] if ls[n][1] != ixL else (ls[n][0], ls[n][1], ls[n][2], ls[n][3], ls[n][4], ls[n][5] + [('\n' if ls[n][5][-1] != '' else '') + '===References===', ''] ) for n in range(0,len(ls))]), t)
return ls if 'References' in [l[-1][0].strip('=') for l in ls if L in l[4]] else foliage(wiki(lsN), t)
ls0 = ls
for L in Ls: ls0 = fWithRefI(ls0, t, L)
return ls0
def fWithELs(ls,t,Ls) :
if any(l[3]=='References' for l in ls) :
raise Exception('Will not create External links with extant References section.')
def fWithELsI(ls, t, L) :
ixL = [l for l in ls if len(l[4])>0 and l[4][0] == L][-1][1] # no error checking #[x[1] for x in ls if x[0] > 1][-1]
lsN = foliage(wiki([ls[n] if ls[n][1] != ixL else (ls[n][0], ls[n][1], ls[n][2], ls[n][3], ls[n][4], ls[n][5] + [('\n' if ls[n][5][-1] != '' else '') + '===External links===', ''] ) for n in range(0,len(ls))]), t)
return ls if 'External links' in [l[-1][0].strip('=') for l in ls if L in l[4]] else foliage(wiki(lsN), t)
ls0 = ls
for L in Ls: ls0 = fWithRefI(ls0, t, L)
return ls0
def blocks(ls, L, f_ss) :
def sections(ls,L,n) : return [l for l in ls if l[3] == n and l[4][0] == L]
return [e for (s,f) in f_ss for e in f(sections(ls,L,s))]
def run(t, Ls, ls, ss_n_fs) :
summaryG, summaryL, c = "", "", ""
for L in Ls :
for (ss,(n,f)) in ss_n_fs :
old = wiki(ls)
bs = blocks(ls, L, ss)
ls = mapSections(ls, t, L, bs, f) if len(bs) > 0 else ls
summaryL += "" if wiki(ls) == old else n + " "
summaryG, summaryL = summaryG if summaryL == "" else "{} {}: {}".format(summaryG, L, summaryL), ""
return (wiki(ls), "No change." if summaryG == "" else summaryG.strip())
# running
def diff(e0, e1) :
ls0, ls1 = e0.split('\n'), e1.split('\n')
S,E = ([x for x in range(0,min(len(ls0), len(ls1))) if ls0[x] != ls1[x]]+[len(ls0)])[0], -([x for x in range(0,min(len(ls0), len(ls1))) if ls0[-x-1] != ls1[-x-1]]+[len(ls1)])[0]
S = ([0]+[x for x in range(0,min(len(ls0),S)) if len(ls0[x])>1 and ls0[x][0:2]=='=='])[-1]
E0,E1 = ([x for x in range(E,0) if len(ls0[x])>1 and ls0[x][0:2]=='==' or ls0[x][0:4]=='----']+[len(ls0)])[0]+1, ([x for x in range(E,0) if len(ls1[x])>1 and ls1[x][0:2]=='==' or ls1[x][0:4]=='----']+[len(ls1)])[0]+1
return ('\n'.join(ls0[S:E0]), '\n'.join(ls1[S:E1]))
def makeDiffs(rvs) :
return '<sou'+'rce>\n\n\n\n' + '\n\n\n\n\n'.join(["{}: {}\n\n{}".format(x[0], x[4] if x[4] != None else x[5], appose(*diff(x[2], x[3] if x[3] != None else x[2]))) for x in rvs]) + '\n\n\n\n</sou'+'rce>'
def tryExcept(f, g) :
try : val = (f(), "")
except : val = g()
return val
# runOne2 : word, index, wiktionaryUserTalkPage, wiktionaryUserTalkPageOld, fRewrite, fSave ->
def runOne2(w,n,rw,fS,fL,fPS1s) :
while random.random() < fPS1s() :
time.sleep(1)
w,e0,t,err = (w[0],w[1],w[2],"") if type(w) is tuple else fL(w) + ("",)
F, err = (None, err) if err != "" else tryExcept(lambda : foliage(e0,w), lambda : (None, "E1: Cannot parse"))
err = err if err != "" else "" if e0 == wiki(F) else "E2: Round trip failed: original entry" # e1, c:
_, err = reversed((err, None) if err != "" else tryExcept(lambda : "E3: Errant space in references." if '===References===' in e0 and any([y.strip()=='' for y in [x[5] for x in F if x[3] == 'References'][0][:-1]] ) else "", lambda : (None,"E4: Test for malformed References section failed")))
((e1, c), err) = ((None, None), err) if err != "" else tryExcept(lambda : rw(F,w), lambda : ((None, None), "E5: Rewrite failed"))
err = err if err != "" else "E6: Round trip failed: rewritten entry" if e1 != wiki(foliage(e1,w)) else ""
err = err if err != "" else "E7: Idempotency test failed." if e1 != wiki(foliage(wiki(foliage(e1,w)),w)) else ""
rv, err = (None,err) if err != "" else tryExcept(lambda : "" if e0 == e1 else fS((w,t) if type(w) is tuple else w,e1,c), lambda : (None, "E8: Error saving."))
return w, n, e0, e1, c, err, rv
# return (logFull, logDiff, logSummary, logErr)
def persistLogs(logErr, logFull, logDiff, logSummary, wU, rwName, tS, fSave) :
for (l,n,c) in [(logErr, "Err", 1), (logFull, "Full,", 4), (logDiff, "Diff", 4), (logSummary, "Summary", 1)] :
l = sorted(l, key = lambda x: int(x.strip().split('.')[0].split('/')[0]))
ws = sorted([x.strip().split(':')[0].split('.')[1].strip() for x in l if not '(no change)' in x and len(x) > 0 and x.strip()[0].isdigit() and '.' in x and ':' in x])
print(n, len(ws))
if len(ws)>0 :
wS, wE = ws[0], ws[-1]
if(len(l)) > 0 :
path = '{}/EditLogs/{}/{}/{}_items_{}_to_{}_{}.txt'.format(wU,tS.strftime("%d%B%Y %Hh%Mm%Ss"),rwName, len(l), wS, wE, n)
if n == "Diff" or n == "Summary" or n == "Err":
fSave(path, "<sou"+"rce>\n\n"+('\n'*c).join(l)+"\n\n</sou"+"rce>", "{} {}".format(rwName, n))
def fLag(CT) : return float(json.loads(requests.get('https://en.wiktionary.org/w/api.php?action=query&titles=MediaWiki&format=json&maxlag=-1', cookies = CT[0]).text)['error']['info'].split(' ')[3])
running = True
def fUserPageChanged(CT, fKill, fL = lambda x : None) :
pages = ['User talk:OrphicBot', 'User:OrphicBot/Sandbox/test']
old = [loadMW(CT, [p])[0][0] for p in pages]
new = [o for o in old]
same = True
while not fKill() and same:
new = [loadMW(CT, [p])[0][0] for p in pages]
time.sleep(1)
fL(fLag(CT))
json.loads(requests.get('https://en.wiktionary.org/w/api.php?action=query&titles=MediaWiki&format=json&maxlag=-1', cookies = CT[0]).text)
requests
same = all(a==b for (a,b) in zip(old,new))
if not same:
print("User talk page changed.")
return True
def fPDelay1s(lag, threads) : return 0 if lag <= 1 else 1 - 2**(-lag-math.log((1 if lag > 2 else (lag-1))*threads,2))
def runAllM(CT, ws, rw, fSave = lambda a,b,c: datetime.datetime.utcnow(), fLoad = loadMW, rwName = "update", rvs = None, threads = 32) :
running = [True, 0]
def fRunningSet(x,n) : running[n] = x
threading.Thread(target = lambda : fRunningSet(not fUserPageChanged(CT, lambda : not running[0], lambda x: fRunningSet(x,1)), 0)).start()
try : rvs = fmt([runOne2 for _ in ws], [((w,a,t), n, rw, fSave, lambda x : loadMW(CT,[x], cols=['title','content','timestamp'])[0], lambda : fPDelay1s(running[1], threads)) for (n,(w,a,t)) in number(list(fLoad(CT, ws, cols = ['title', 'content', 'timestamp'])))], rvs = rvs, fKill = lambda : not running[0], threads = threads )
except : print("runAllM killed.")
finally : running[0] = False
return rvs
# user page lexica information
def slice(xs) : return [xs[2**21*x:2**21*(1+x)] for x in list(range(0,math.ceil(len(xs)/2**21)))]
def splice(xs) : return ''.join(xs)
def loadParts(w) :
for n in itertools.count():
x = load('{}.part{}'.format(w,n+1))
if x != '' :
yield x
else : break
def txtToLexica(txt) : return hashListByKey([(w,d) for (w,d) in [l.split('\t') for l in txt.split('\n')]], lambda x: x[1], lambda x: nfc(x[0]))
def lexicaToTxt(L) : return '\n'.join(sorted(flatten([["{}\t{}".format(nfc(l),k) for l in L[k]] for k in L]), key = lambda x: '\t'.join(reversed(x.split('\t')))))
def lexicaToPage(CT,L,w) : return [saveMW(CT,"{}.part{}".format(w,n+1), x, ', '.join(L)) for (n,x) in number(slice(u2a(lexicaToTxt(L)))) ]
def pageToLexica(w) : return txtToLexica(a2u(splice([x for x in loadParts(w)])))
def tableToTxt(t) : return u2a('\n'.join(['\t'.join([str(x) for x in l]) for l in ixHws]))
def txtToTable(t) : return [[x if not is_number(x) else float(x) for x in l.split('\t')] for l in a2u(t).split('\n')]