Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deleted work prevents reverting a user's works #9013

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 53 additions & 7 deletions openlibrary/mocks/mock_infobase.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@

import datetime
import glob
import itertools
import json
import re
import pytest
import web

from infogami.infobase import client, common, account, config as infobase_config
from infogami import config
from openlibrary.plugins.upstream.models import Changeset
from openlibrary.plugins.upstream.utils import safeget


key_patterns = {
Expand All @@ -35,6 +38,7 @@ def reset(self):

self._cache = {}
self.docs = {}
self.docs_historical = {}
self.changesets = []
self.index = []
self.keys = {'work': 0, 'author': 0, 'edition': 0}
Expand Down Expand Up @@ -68,21 +72,28 @@ def _save_doc(self, query, timestamp):
doc['created'] = self.docs[key]['created']

self.docs[key] = doc
self.docs_historical[(key, rev)] = doc

return doc

def save(self, query, comment=None, action=None, data=None, timestamp=None):
def save(
self, query, comment=None, action=None, data=None, timestamp=None, author=None
):
timestamp = timestamp or datetime.datetime.utcnow()

if author:
author = {"key": author.key}

doc = self._save_doc(query, timestamp)

changes = [{"key": doc['key'], "revision": doc['revision']}]
changes = [web.storage({"key": doc['key'], "revision": doc['revision']})]
changeset = self._make_changeset(
timestamp=timestamp,
kind=action,
comment=comment,
data=data,
changes=changes,
author=author,
)
self.changesets.append(changeset)

Expand All @@ -97,7 +108,10 @@ def save_many(
if author:
author = {"key": author.key}

changes = [{"key": doc['key'], "revision": doc['revision']} for doc in docs]
changes = [
web.storage({"key": doc['key'], "revision": doc['revision']})
for doc in docs
]
changeset = self._make_changeset(
timestamp=timestamp,
kind=action,
Expand Down Expand Up @@ -130,16 +144,48 @@ def _make_changeset(self, timestamp, kind, comment, data, changes, author=None):
"id": id,
"kind": kind or "update",
"comment": comment,
"data": data,
"data": data or {},
"changes": changes,
"timestamp": timestamp.isoformat(),
"author": author,
"ip": "127.0.0.1",
"bot": False,
}

def get(self, key, revision=None):
data = self.docs.get(key)
def get_change(self, cid: int) -> Changeset:
return Changeset(self, self.changesets[cid])

def recentchanges(self, query):
limit = query.pop("limit", 1000)
offset = query.pop("offset", 0)

author = query.pop("author", None)

if not author:
raise NotImplementedError(
"MockSite.recentchanges without author not implemented"
)

result = list(
itertools.islice(
(
Changeset(self, c)
for c in reversed(self.changesets)
if safeget(lambda: c['author']['key']) == author
),
offset,
offset + limit,
)
)

return result

def get(self, key, revision=None, lazy=False):
if revision:
data = self.docs_historical.get((key, revision))
else:
data = self.docs.get(key)

data = data and web.storage(common.parse_query(data))
return data and client.create_thing(self, key, self._process_dict(data))

Expand Down Expand Up @@ -177,7 +223,7 @@ def things(self, query):
# this corrects any nested keys that have been included
# in values.
flat = common.flatten_dict(v)[0]
k += '.' + web.rstrips(flat[0], '.key')
k = web.rstrips(k + '.' + flat[0], '.key')
v = flat[1]
keys = {k for k in self.filter_index(self.index, k, v) if k in keys}

Expand Down
134 changes: 83 additions & 51 deletions openlibrary/plugins/admin/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""

import os
from collections.abc import Iterable
import requests
import sys
import web
Expand All @@ -28,7 +29,7 @@
import openlibrary

from openlibrary import accounts
from openlibrary.accounts.model import clear_cookies
from openlibrary.accounts.model import Account, clear_cookies
from openlibrary.accounts.model import OpenLibraryAccount
from openlibrary.core import admin as admin_stats, helpers as h, imports, cache
from openlibrary.core.waitinglist import Stats as WLStats
Expand Down Expand Up @@ -57,6 +58,82 @@ def register_admin_page(path, cls, label=None, visible=True, librarians=False):
admin_tasks.append(t)


def revert_all_user_edits(account: Account) -> tuple[int, int]:
"""
:return: tuple of (number of edits reverted, number of documents deleted)
"""
i = 0
edit_count = 0
stop = False
keys_to_delete = set()
while not stop:
changes = account.get_recentchanges(limit=100, offset=100 * i)
added_records: list[list[dict]] = [
c.changes for c in changes if c.kind == 'add-book'
]
flattened_records: list[dict] = [
record for lst in added_records for record in lst
]
keys_to_delete |= {r['key'] for r in flattened_records}

keys_to_revert: dict[str, list[int]] = {
item.key: [] for change in changes for item in change.changes
}
for change in changes:
for item in change.changes:
keys_to_revert[item.key].append(change.id)

deleted_keys = web.ctx.site.things(
{'key': list(keys_to_revert), 'type': {'key': '/type/delete'}}
)

changesets_with_deleted_works = {
change_id for key in deleted_keys for change_id in keys_to_revert[key]
}

changeset_ids = [
c.id for c in changes if c.id not in changesets_with_deleted_works
]

_, len_docs = revert_changesets(changeset_ids, "Reverted Spam")
edit_count += len_docs
i += 1
if len(changes) < 100:
stop = True

delete_payload = [
{'key': key, 'type': {'key': '/type/delete'}} for key in keys_to_delete
]
web.ctx.site.save_many(delete_payload, 'Delete spam')
return edit_count, len(delete_payload)


def revert_changesets(changeset_ids: Iterable[int], comment: str):
"""
An aggressive revert function ; it rolls back all the documents to
the revision that existed before the changeset was applied.
Note this means that any edits made _after_ the given changeset will
also be lost.
"""

def get_doc(key: str, revision: int) -> dict:
if revision == 0:
return {"key": key, "type": {"key": "/type/delete"}}
else:
return web.ctx.site.get(key, revision).dict()

site = web.ctx.site
docs = [
get_doc(c['key'], c['revision'] - 1)
for cid in changeset_ids
for c in site.get_change(cid).changes
]
docs = [doc for doc in docs if doc.get('type', {}).get('key') != '/type/delete']
data = {"reverted_changesets": [str(cid) for cid in changeset_ids]}
manifest = web.ctx.site.save_many(docs, action="revert", data=data, comment=comment)
return manifest, len(docs)


class admin(delegate.page):
path = "/admin(?:/.*)?"

Expand Down Expand Up @@ -382,35 +459,12 @@ def POST_block_account(self, account):
account.block()
raise web.seeother(web.ctx.path)

def POST_block_account_and_revert(self, account):
def POST_block_account_and_revert(self, account: Account):
account.block()
i = 0
edits = 0
stop = False
keys_to_delete = set()
while not stop:
changes = account.get_recentchanges(limit=100, offset=100 * i)
added_records: list[list[dict]] = [
c.changes for c in changes if c.kind == 'add-book'
]
flattened_records: list[dict] = [
subitem for sublist in added_records for subitem in sublist
]
keys_to_delete |= {r['key'] for r in flattened_records}
changeset_ids = [c.id for c in changes]
_, len_docs = ipaddress_view().revert(changeset_ids, "Reverted Spam")
edits += len_docs
i += 1
if len(changes) < 100:
stop = True

delete_payload = [
{'key': key, 'type': {'key': '/type/delete'}} for key in keys_to_delete
]
web.ctx.site.save_many(delete_payload, 'Delete spam')
edit_count, deleted_count = revert_all_user_edits(account)
add_flash_message(
"info",
f"Blocked the account and reverted all {edits} edits. {len(delete_payload)} records deleted.",
f"Blocked the account and reverted all {edit_count} edits. {deleted_count} records deleted.",
)
raise web.seeother(web.ctx.path)

Expand Down Expand Up @@ -505,7 +559,7 @@ def GET(self, username):
def POST(self, username):
i = web.input(changesets=[], comment="Revert", action="revert")
if i.action == "revert" and i.changesets:
ipaddress_view().revert(i.changesets, i.comment)
revert_changesets(i.changesets, i.comment)
raise web.redirect(web.ctx.path)


Expand All @@ -523,7 +577,7 @@ def POST(self, ip):
if i.action == "block":
self.block(ip)
else:
self.revert(i.changesets, i.comment)
revert_changesets(i.changesets, i.comment)
raise web.redirect(web.ctx.path)

def block(self, ip):
Expand All @@ -532,28 +586,6 @@ def block(self, ip):
ips.append(ip)
block().block_ips(ips)

def get_doc(self, key, revision):
if revision == 0:
return {"key": key, "type": {"key": "/type/delete"}}
else:
return web.ctx.site.get(key, revision).dict()

def revert(self, changeset_ids, comment):
logger.debug("Reverting changesets %s", changeset_ids)
site = web.ctx.site
docs = [
self.get_doc(c['key'], c['revision'] - 1)
for cid in changeset_ids
for c in site.get_change(cid).changes
]
docs = [doc for doc in docs if doc.get('type', {}).get('key') != '/type/delete']
logger.debug("Reverting %d docs", len(docs))
data = {"reverted_changesets": [str(cid) for cid in changeset_ids]}
manifest = web.ctx.site.save_many(
docs, action="revert", data=data, comment=comment
)
return manifest, len(docs)


class stats:
def GET(self, today):
Expand Down
Loading
Loading