User:OrphicBot/updateAlso.py

From Wiktionary, the free dictionary
Jump to navigation Jump to search
from utils import unaccented, hashListByKey, hashListByKeys, unique, flatten, reGroups, dictToList, readFile
from functools import reduce
from getpass import getpass
from unicodeblock.blocks import of as iBlock
from orphicbot import login, loadTitles, loadMW, foliage, wiki, runAllM, makeDiffs
from obpwd import obpwd
import unicodedata


def minmax(s) : return (lambda os: (min(os), max(os)) if len(os) > 0 else (0,0))([ord(c) for c in s])

def block(x) : return iBlock(x) if iBlock(x) != None else "NONE"

def blocks(x) : return ([block(x[0])] if len(x) > 0 else []) if (lambda a,b: a==b)(*[block(chr(o)) for o in minmax(x)]) else unique(block(x) for x in x)

def makePTibetanSubjoinedConsonant() :
  tsc = set([3984, 3986, 3988, 3989, 3991, 3993, 3999, 4001, 4003, 4004, 4006, 4008, 4009, 4011, 4013, 4017, 4018, 4019, 4021, 4023])
  tscMin, tscMax = min(tsc), max(tsc)
  return lambda c : ord(c) >= tscMin and ord(c) <= tscMax and ord(c) in tsc

pTibetanSubjoinedConsonant = makePTibetanSubjoinedConsonant()

def inRange(x,a,b) : return x >= a and x <= b

def lcUnaccented(s) : return (''.join(c for c in unicodedata.normalize('NFD', s) if pTibetanSubjoinedConsonant(c) or not any(unicodedata.category(c)[0] == t for t in 'MPZ'))).lower()

def loadEquivalences(pages = ['User:OrphicBot/equivalences.txt']) : return unicodedata.normalize('NFC', '\n\n\n\n\n'.join([load(p) for p in pages]))

def parseEquivalences(txt) : 
  def split2(x,y) : return x.split(y,1) if y in x else [x, '']
  return [(n,lr,[],[],c) if len(lr) > 0 and lr[0] == '<' else ((n,"",) + tuple([[y.strip() for y in x.split('|')] for x in tuple(split2(lr,'<'))]) + (c,) ) for (n,lr,c) in [(n,)+tuple(split2(x, '//')) for (n,x) in number(txt.split('\n'))] ]

def hashEquivalences(es) : return hashListByKeys([x + (minmax(''.join(x[3])),blocks(''.join(x[3]))) for x in es], lambda x: x[6] if len(x[6])>0 else [''], lambda x: x)

def applyEquivalence(eq,xs) :
  L, rv = True, xs
  while L : 
    cs = set(rv)
    rv = unique(flattenAll([reduce(lambda ws, x: [w.replace(x,t) for w in ws] + ([] if len(x) == 1 else ws) , eq[3], rv) for t in eq[2]]))
    L = not all(r in cs for r in rv) 
  return rv    

def applyEquivalences(eqs,eqc,x) :
  cs = [unicodedata.normalize('NFC',x)]
  if any(c in eqc for c in x) :	
    for eq in eqs : 
      cs = applyEquivalence(eq, cs) if any(inRange(ord(c),*eq[5]) for c in x) else cs
  return unique([lcUnaccented(c) for c in cs])

def fAlsos(p) : return unique(flatten([x.split('|') for x in flatten(reGroups("(?:{{(?:see|also|see also)\|(.*?)}})", p.split('==')[0]  ))]))

def isAlso(l) : return any(l.startswith(x) for x in ['{{also|', '{{see also|', '{{see|']) and any(l.endswith(x) for x in ['}}', '}}[[Category:Requests for cleanup/also]]']) and not '{{' in l[2:]

def removeAlsos(page) : return  '=='.join(['\n'.join([l for l in b.split('\n') if not isAlso(l)]) if n == 0 else b for (n,b) in enumerate(page.split('=='))])

def printDiffs(rvs) : return "<source>\n\n{} items\n\n".format(len(rvs)) + '\n\n'.join(["{}{}: {}\n  {}\n  {}".format('' if x[-2] == '' else (x[-2] + ' (no action taken).\n'), x[0], x[4], x[2].split('\n')[0] if x[2][0:7]=="{{also|" else "(none)", x[3].split('\n')[0] if x[3][0:7]=='{{also|' else "(none)") for x in rvs if x[3] != None]) + "\n\n</sou"+"rce>"

def ixWord(w0, w1) : 
  def f_bloc(x) : return sorted(dictToList(hashListByKey([block(y) for y in x], lambda x : x, lambda x: x)), key = lambda x: -len(x[1]))[0][0]
  def s_apdx(x) : return 10**8 if 'Appendix:' in x else 0
  def s_func(x) : return 10**7 if len(reGroups("uni\d{0,2}=.*?", x)) > 0 else 0
  def s_bloc(x) : return 0 if f_bloc(x) == f_bloc(w0) else 10**6
  def s_diak(x) : return 0 if len(hashListByKey([block(y) for y in unicodedata.normalize('NFD', x) if not 'DIACRITIC' in block(y)], lambda x: x, lambda x: x)) == 1 else 10**5
  def s_diac(x) : 
    def swapAcuteGrave(n) : return 769 if n == 768 else 768 if n == 769 else n
    return 100*max([0]+[swapAcuteGrave(ord(y)) for y in unicodedata.normalize('NFD', x) if 'DIACRITIC' in block(y)])
  def s_punc(x) : return 0 if not any(block(x) == 'BASIC_PUNCTUATION' or block(x) == 'SPACE' for x in x) else 1000
  def s_caps(x) : return 0 if all(x==x.lower() for x in x) else 100 if x==x.title() else 200 if all(x==x.upper() for x in x) else 300
  return sum([f(w1) for f in [s_func, s_bloc, s_diac, s_diak, s_punc, s_caps, s_apdx]])

def makeUpdateAlsoM(eqs) :
  titles = loadTitles(CT)
  setTitles = set(titles)
  eqc = set(unique(''.join([''.join(x[3]) for x in flatten([eqs[x] for x in eqs])])))
  families = dict([((x,applyEquivalences(flatten([eqs[b] for b in blocks(x) if b in eqs]), eqc, x)),progress(n,1000))[0] for (n,x) in number(titles)])
  members = hashListByKeys(families, lambda x: families[x], lambda x: x)
  agenda = [t for (t,n) in [(t, len([x for x in flatten([members[f] for f in families[t]]) if x != t]) ) for t in titles] if n > 0 and n < 9]
  def updateAlsoM(ls,t) :
    pNonAppendixM = lambda f,fs : any(x in fs for x in flatten(m for m in f)) and len(flatten([members[c] for c in flatten([fs[x] for x in f if x in fs])])) < 9
    pTitleAndNotSelf, fNItemsX = lambda x: x in setTitles and x != t, lambda n,X : "{} item{} {}".format(n,"s" if n > 1 else "",X) if n > 0 else ""
    familyM, page = families[t], wiki(ls)
    pSkip = lambda : '{{also|' in page.split('==',1)[1] or '{{also|[[File:' in page
    retinenda, delenda = [[x for x in l if x != None] for l in zip(*[(x,None) if pTitleAndNotSelf(x) or 'Appendix:' in x or x[0:3] == 'uni' else (None,x) for x in fAlsos(page) ])] if len(fAlsos(page)) > 0 else ([],[])
    addenda = [x for x in (flatten([members[f] for f in familyM if f in members]) if pNonAppendixM(familyM, families) else []) if pTitleAndNotSelf(x) and not x in retinenda and not x in delenda]
    alsosNew = "{{{{also|{}}}}}".format("|".join(sorted(unique(retinenda+addenda), key = lambda x: ixWord(t,x))))
    b = "{}{}".format((alsosNew + '\n') if alsosNew != '{{also|}}' else "", removeAlsos(page))
    c = "{{{{also|...}}}} template updated; {}{}{}.".format(fNItemsX(len(addenda),"added"), " and " if len(delenda) > 0 and len(addenda) > 0 else "", fNItemsX(len(delenda), "removed")) 
    rewrite = (any(len(x) > 0 for x in [addenda, delenda])) and (len(retinenda)-len(delenda)+len(addenda) <= 8) and not pSkip()
    return (b,c) if rewrite else (page, "No change.")
  return titles, families, members, agenda, updateAlsoM

def runEq(CT, pathEqs, pathDiffs, fS = lambda a,b,c: a) :
  eqs = hashEquivalences(parseEquivalences(loadEquivalences([pathEqs])))
  titles, families, members, agenda, f = makeUpdateAlsoM(eqs)  
  rvs = runAllM(CT, agenda, f, fS)
  result = [x for x in rvs if x[4] != None and x[4] != 'No change.']
  diffs = printDiffs(result)
  dss = [fS("{} - {}".format("User:OrphicBot/EditLogs/{}".format(datetime.datetime.today().strftime('%d%B%Y')),n+1) , printDiffs(r), 'equivalence edit log') for (n,r) in number(slices(int(len(result)/(len(diffs)/1500000)), result))]
  return titles, families, members, agenda, f, rvs, result, diffs, dss

CT = login('OrphicBot', obpwd())

pathEqs, pathEqsSb = 'User:OrphicBot/equivalences.txt', 'User:OrphicBot/equivalences_sandbox.txt'


# add rewriteB to updateAlsoM 
def rewriteB(txt) : return '{{character info/new}}\n'+txt.replace('{{character info/new}}\n', '')

# run only if diffs file with timestamp of previous does not exist
titles, families, members, agenda, f, rvs, result, diffs, dss = runEq(CT, pathEqs, "User:OrphicBot/diffs equivalence (add timestamp of previous)", lambda a,b,c: a)

resultSB = runEq(CT, pathEqsSb, "User:OrphicBot/diffs-sb equivalence (add timestamp of previous)", lambda a,b,c: a)

#fS = lambda a,b,c : saveMW(CT, a, b, c)