Jump to content

User:Surjection/also-update.py

From Wiktionary, the free dictionary

import wikitextlib
import pywikibot
from pywikibot import Site, Page, Category
import sys
import collections
import functools
import queue
import argparse


ALSO_TEMPLATES = {"also", "see also", "also see", "seealso", "xsee", "xalso", "See also"}
ALSO_TEMPLATES_PREFERRED = "also"
PLACE_ALSO_BENEATH_CHARACTER_INFOS = False
DEBUG = False


enwikt = Site("en", fam="wiktionary")
enwikt.login()


@functools.cache
def resolve_page(title):
    return Page(enwikt, title)



def save_page(title, text, summary):
    page = resolve_page(title)
    page.text = text
    page.save(summary=summary, minor=False)


def should_visit(page):
    return page.exists() and page.namespace().id == 0


def get_also_template(page):
    find_from = ""

    for section in wikitextlib.iterate_sections(page.text):
        if section.heading.level == 2 and section.heading.text is None:
            find_from = section.text
            break

    for template in wikitextlib.find_templates(find_from):
        if template.name in ALSO_TEMPLATES:
            return wikitextlib.get_positional_args(template)[1:]
        if template.name.startswith("also/"):
            return ...
    
    return None


def make_also_template(name, variants, extra_args):
    args = collections.OrderedDict([(i + 1, variant) for i, variant in enumerate(variants)])
    if extra_args:
        for key, value in extra_args.items():
            if type(key) != int:
                args[key] = value
    return wikitextlib.make_template(name, args)


def update_also_template_parameters(template, page, variants, variation_pages):
    recognized = set(variants + variation_pages)
    recognized.add(page.title())
    present_args = wikitextlib.get_positional_args(template)[1:]
    unrecognized_variants = [variant for variant in present_args if variant not in recognized]
    named_args = collections.OrderedDict([(key, value) for key, value in template.args.items() if type(key) != int])
    return make_also_template(template.name, variants + unrecognized_variants + variation_pages, named_args)


def update_also_template(page, variants, variation_pages, edit_summary):
    has_template = get_also_template(page) is not None
    result = ""

    if not page.exists():
        # never create a new page
        return

    if has_template:
        has_replaced = False
        def replacement(template):
            nonlocal has_replaced
            if has_replaced:
                return ...
            has_replaced = True
            return update_also_template_parameters(template, page, variants, variation_pages)
            
        result = wikitextlib.replace_templates_if(page.text,
                 lambda template: template.name in ALSO_TEMPLATES,
                 replacement)
    else:
        assert ALSO_TEMPLATES_PREFERRED in ALSO_TEMPLATES
        also = make_also_template(ALSO_TEMPLATES_PREFERRED, variants + variation_pages, None)

        if PLACE_ALSO_BENEATH_CHARACTER_INFOS:
            lines = page.text.splitlines()

            character_infos = 0
            while character_infos < len(lines):
                line = lines[character_infos]
                has_character_info = False

                for template in wikitextlib.find_templates(line):
                    if template.name in {"character info", "character info/subpage", "character info/save memory", "character info/var"}:
                        has_character_info = True
                        break

                if not has_character_info:
                    break
                
                character_infos += 1

            result = "\n".join(lines[:character_infos] + [also] + lines[character_infos:])
        else:
            result = also + "\n" + page.text

    if page.text == result:
        return

    if DEBUG:
        print("Saving", page.title(), edit_summary)
        pywikibot.showDiff(page.text, result)
    else:
        page.text = result
        page.save(summary = edit_summary, minor = False)


def process_tree(root, edit_summary, add_links):
    variants = collections.OrderedDict([(root.title(), root)])
    visit_queue = queue.SimpleQueue()
    edit_queue = [root]
    variation_pages = []
    do_not_update = set()

    visit_queue.put(root)

    while not visit_queue.empty():
        page = visit_queue.get()
        if DEBUG:
            print("Visiting", page.title())
        if page is root and add_links:
            for candidate_name in add_links:
                target = resolve_page(candidate_name)
                variants[candidate_name] = target
                visit_queue.put(target)
                edit_queue.append(target)
            add_links = set()
        also = get_also_template(page)
        if also is ...:
            do_not_update.add(page.title())
        elif also is not None:
            for candidate_name in also:
                if "Appendix:Variations " in candidate_name:
                    variation_pages.append(candidate_name)
                elif candidate_name not in variants:
                    target = resolve_page(candidate_name)
                    variants[candidate_name] = target
                    visit_queue.put(target)
                    edit_queue.append(target)

    updated = set()

    for page in edit_queue:
        page_name = page.title()
        if page_name in do_not_update:
            continue
        variants_for_page = [variant_name for variant_name in variants.keys() if variant_name != page_name]
        updated.add(page_name)
        if variants_for_page:
            update_also_template(page, variants_for_page, variation_pages, edit_summary)

    return updated


def update_also(page_name, add_links):
    page = resolve_page(page_name)
    if should_visit(page):
        return process_tree(page, "(bot) update [[Template:also]] references from [[{}]]".format(page_name), add_links)
    else:
        return set()


if __name__ == "__main__":
    updated = set()

    parser = argparse.ArgumentParser()
    parser.add_argument('page', nargs='*')
    parser.add_argument('-x', '--do', action='store_true', help='actually save edits')
    parser.add_argument('-l', '--link', action='store_true', help='link all page parameters to each other')
    args = parser.parse_args()

    DEBUG = DEBUG or not args.do

    for page_name in args.page:
        if page_name not in updated:
            updated.update(update_also(page_name, (set(args.page) - {page_name}) if args.link else set()))