Skip to content

Commit

Permalink
Unit test revert all edits + Update MockSite to support recentchanges
Browse files Browse the repository at this point in the history
Also moved the main revert methods into helpers for easier testing.
  • Loading branch information
cdrini committed May 1, 2024
1 parent 5cd0ff2 commit 605246a
Show file tree
Hide file tree
Showing 3 changed files with 309 additions and 77 deletions.
60 changes: 53 additions & 7 deletions openlibrary/mocks/mock_infobase.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@

import datetime
import glob
import itertools
import json
import pytest
import web

from infogami.infobase import client, common, account, config as infobase_config
from infogami import config
from openlibrary.plugins.upstream.models import Changeset
from openlibrary.plugins.upstream.utils import safeget


key_patterns = {
Expand All @@ -34,6 +37,7 @@ def reset(self):

self._cache = {}
self.docs = {}
self.docs_historical = {}
self.changesets = []
self.index = []
self.keys = {'work': 0, 'author': 0, 'edition': 0}
Expand Down Expand Up @@ -67,21 +71,28 @@ def _save_doc(self, query, timestamp):
doc['created'] = self.docs[key]['created']

self.docs[key] = doc
self.docs_historical[(key, rev)] = doc

return doc

def save(self, query, comment=None, action=None, data=None, timestamp=None):
def save(
self, query, comment=None, action=None, data=None, timestamp=None, author=None
):
timestamp = timestamp or datetime.datetime.utcnow()

if author:
author = {"key": author.key}

doc = self._save_doc(query, timestamp)

changes = [{"key": doc['key'], "revision": doc['revision']}]
changes = [web.storage({"key": doc['key'], "revision": doc['revision']})]
changeset = self._make_changeset(
timestamp=timestamp,
kind=action,
comment=comment,
data=data,
changes=changes,
author=author,
)
self.changesets.append(changeset)

Expand All @@ -96,7 +107,10 @@ def save_many(
if author:
author = {"key": author.key}

changes = [{"key": doc['key'], "revision": doc['revision']} for doc in docs]
changes = [
web.storage({"key": doc['key'], "revision": doc['revision']})
for doc in docs
]
changeset = self._make_changeset(
timestamp=timestamp,
kind=action,
Expand Down Expand Up @@ -129,16 +143,48 @@ def _make_changeset(self, timestamp, kind, comment, data, changes, author=None):
"id": id,
"kind": kind or "update",
"comment": comment,
"data": data,
"data": data or {},
"changes": changes,
"timestamp": timestamp.isoformat(),
"author": author,
"ip": "127.0.0.1",
"bot": False,
}

def get(self, key, revision=None):
data = self.docs.get(key)
def get_change(self, cid: int) -> Changeset:
return Changeset(self, self.changesets[cid])

def recentchanges(self, query):
limit = query.pop("limit", 1000)
offset = query.pop("offset", 0)

author = query.pop("author", None)

if not author:
raise NotImplementedError(
"MockSite.recentchanges without author not implemented"
)

result = list(
itertools.islice(
(
Changeset(self, c)
for c in reversed(self.changesets)
if safeget(lambda: c['author']['key']) == author
),
offset,
offset + limit,
)
)

return result

def get(self, key, revision=None, lazy=False):
if revision:
data = self.docs_historical.get((key, revision))
else:
data = self.docs.get(key)

data = data and web.storage(common.parse_query(data))
return data and client.create_thing(self, key, self._process_dict(data))

Expand Down Expand Up @@ -176,7 +222,7 @@ def things(self, query):
# this corrects any nested keys that have been included
# in values.
flat = common.flatten_dict(v)[0]
k += '.' + web.rstrips(flat[0], '.key')
k = web.rstrips(k + '.' + flat[0], '.key')
v = flat[1]
keys = {k for k in self.filter_index(self.index, k, v) if k in keys}

Expand Down
151 changes: 81 additions & 70 deletions openlibrary/plugins/admin/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""

import os
from collections.abc import Iterable
import requests
import sys
import web
Expand All @@ -28,7 +29,7 @@
import openlibrary

from openlibrary import accounts
from openlibrary.accounts.model import clear_cookies
from openlibrary.accounts.model import Account, clear_cookies
from openlibrary.accounts.model import OpenLibraryAccount
from openlibrary.core import admin as admin_stats, helpers as h, imports, cache
from openlibrary.core.waitinglist import Stats as WLStats
Expand Down Expand Up @@ -57,6 +58,80 @@ def register_admin_page(path, cls, label=None, visible=True, librarians=False):
admin_tasks.append(t)


def revert_all_user_edits(account: Account) -> tuple[int, int]:
"""
:return: tuple of (number of edits reverted, number of documents deleted)
"""
i = 0
edit_count = 0
stop = False
keys_to_delete = set()
while not stop:
changes = account.get_recentchanges(limit=100, offset=100 * i)
added_records: list[list[dict]] = [
c.changes for c in changes if c.kind == 'add-book'
]
flattened_records: list[dict] = sum(added_records, [])
keys_to_delete |= {r['key'] for r in flattened_records}

keys_to_revert: dict[str, list[int]] = {
item.key: [] for change in changes for item in change.changes
}
for change in changes:
for item in change.changes:
keys_to_revert[item.key].append(change.id)

deleted_keys = web.ctx.site.things(
{'key': list(keys_to_revert), 'type': {'key': '/type/delete'}}
)

changesets_with_deleted_works = {
change_id for key in deleted_keys for change_id in keys_to_revert[key]
}

changeset_ids = [
c.id for c in changes if c.id not in changesets_with_deleted_works
]

_, len_docs = revert_changesets(changeset_ids, "Reverted Spam")
edit_count += len_docs
i += 1
if len(changes) < 100:
stop = True

delete_payload = [
{'key': key, 'type': {'key': '/type/delete'}} for key in keys_to_delete
]
web.ctx.site.save_many(delete_payload, 'Delete spam')
return edit_count, len(delete_payload)


def revert_changesets(changeset_ids: Iterable[int], comment: str):
"""
An aggressive revert function ; it rolls back all the documents to
the revision that existed before the changeset was applied.
Note this means that any edits made _after_ the given changeset will
also be lost.
"""

def get_doc(key: str, revision: int) -> dict:
if revision == 0:
return {"key": key, "type": {"key": "/type/delete"}}
else:
return web.ctx.site.get(key, revision).dict()

site = web.ctx.site
docs = [
get_doc(c['key'], c['revision'] - 1)
for cid in changeset_ids
for c in site.get_change(cid).changes
]
docs = [doc for doc in docs if doc.get('type', {}).get('key') != '/type/delete']
data = {"reverted_changesets": [str(cid) for cid in changeset_ids]}
manifest = web.ctx.site.save_many(docs, action="revert", data=data, comment=comment)
return manifest, len(docs)


class admin(delegate.page):
path = "/admin(?:/.*)?"

Expand Down Expand Up @@ -384,54 +459,12 @@ def POST_block_account(self, account):
account.block()
raise web.seeother(web.ctx.path)

def POST_block_account_and_revert(self, account):
def POST_block_account_and_revert(self, account: Account):
account.block()
i = 0
edits = 0
stop = False
keys_to_delete = set()
while not stop:
changes = account.get_recentchanges(limit=100, offset=100 * i)
added_records: list[list[dict]] = [
c.changes for c in changes if c.kind == 'add-book'
]
flattened_records: list[dict] = [
subitem for sublist in added_records for subitem in sublist
]
keys_to_delete |= {r['key'] for r in flattened_records}

keys_to_revert = {
item.key: [] for change in changes for item in change.changes
}
for change in changes:
for item in change.changes:
keys_to_revert[item.key].append(change.id)

deleted_keys = web.ctx.site.things(
{'key': list(keys_to_revert), 'type': {'key': '/type/delete'}}
)

changesets_with_deleted_works = {
change_id for key in deleted_keys for change_id in keys_to_revert[key]
}

changeset_ids = [
c.id for c in changes if c.id not in changesets_with_deleted_works
]

_, len_docs = ipaddress_view().revert(changeset_ids, "Reverted Spam")
edits += len_docs
i += 1
if len(changes) < 100:
stop = True

delete_payload = [
{'key': key, 'type': {'key': '/type/delete'}} for key in keys_to_delete
]
web.ctx.site.save_many(delete_payload, 'Delete spam')
edit_count, deleted_count = revert_all_user_edits(account)
add_flash_message(
"info",
f"Blocked the account and reverted all {edits} edits. {len(delete_payload)} records deleted.",
f"Blocked the account and reverted all {edit_count} edits. {deleted_count} records deleted.",
)
raise web.seeother(web.ctx.path)

Expand Down Expand Up @@ -526,7 +559,7 @@ def GET(self, username):
def POST(self, username):
i = web.input(changesets=[], comment="Revert", action="revert")
if i.action == "revert" and i.changesets:
ipaddress_view().revert(i.changesets, i.comment)
revert_changesets(i.changesets, i.comment)
raise web.redirect(web.ctx.path)


Expand All @@ -544,7 +577,7 @@ def POST(self, ip):
if i.action == "block":
self.block(ip)
else:
self.revert(i.changesets, i.comment)
revert_changesets(i.changesets, i.comment)
raise web.redirect(web.ctx.path)

def block(self, ip):
Expand All @@ -553,28 +586,6 @@ def block(self, ip):
ips.append(ip)
block().block_ips(ips)

def get_doc(self, key, revision):
if revision == 0:
return {"key": key, "type": {"key": "/type/delete"}}
else:
return web.ctx.site.get(key, revision).dict()

def revert(self, changeset_ids, comment):
logger.debug("Reverting changesets %s", changeset_ids)
site = web.ctx.site
docs = [
self.get_doc(c['key'], c['revision'] - 1)
for cid in changeset_ids
for c in site.get_change(cid).changes
]
docs = [doc for doc in docs if doc.get('type', {}).get('key') != '/type/delete']
logger.debug("Reverting %d docs", len(docs))
data = {"reverted_changesets": [str(cid) for cid in changeset_ids]}
manifest = web.ctx.site.save_many(
docs, action="revert", data=data, comment=comment
)
return manifest, len(docs)


class stats:
def GET(self, today):
Expand Down
Loading

0 comments on commit 605246a

Please sign in to comment.