This repository has been archived by the owner on Apr 26, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Fix background updates failing to add unique indexes on receipts #14453
Merged
squahtx
merged 7 commits into
develop
from
squah/fix_receipts_constraint_background_update
Nov 16, 2022
Merged
Changes from 6 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
b1d191d
Fix broken upserts to `receipts_linearized` and `receipts_graph`
580a416
Clean up duplicate receipts when creating unique indexes
dfe9946
Add newsfile
7bf42a9
Merge remote-tracking branch 'origin/develop' into squah/fix_receipts…
e654f8c
Fix background update on postgres and keep last linearized receipt
8cb1bcb
Merge remote-tracking branch 'origin/develop' into squah/fix_receipts…
6eb87c9
Merge remote-tracking branch 'origin/develop' into squah/fix_receipts…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Fix a bug introduced in Synapse 1.70.0 where the background updates to add non-thread unique indexes on receipts could fail when upgrading from 1.67.0 or earlier. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,24 +113,6 @@ def __init__( | |
prefilled_cache=receipts_stream_prefill, | ||
) | ||
|
||
self.db_pool.updates.register_background_index_update( | ||
"receipts_linearized_unique_index", | ||
index_name="receipts_linearized_unique_index", | ||
table="receipts_linearized", | ||
columns=["room_id", "receipt_type", "user_id"], | ||
where_clause="thread_id IS NULL", | ||
unique=True, | ||
) | ||
|
||
self.db_pool.updates.register_background_index_update( | ||
"receipts_graph_unique_index", | ||
index_name="receipts_graph_unique_index", | ||
table="receipts_graph", | ||
columns=["room_id", "receipt_type", "user_id"], | ||
where_clause="thread_id IS NULL", | ||
unique=True, | ||
) | ||
|
||
def get_max_receipt_stream_id(self) -> int: | ||
"""Get the current max stream ID for receipts stream""" | ||
return self._receipts_id_gen.get_current_token() | ||
|
@@ -702,9 +684,6 @@ def _insert_linearized_receipt_txn( | |
"data": json_encoder.encode(data), | ||
}, | ||
where_clause=where_clause, | ||
# receipts_linearized has a unique constraint on | ||
# (user_id, room_id, receipt_type), so no need to lock | ||
lock=False, | ||
Comment on lines
-705
to
-707
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To double check: is the table correctly deemed safe to upsert into when the relevant background updates have run? (Wasn't sure how the second commit would affect this, if at all) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, once the unique index has been added by the background update, we will be able to rely on native upserts again (and the value of |
||
) | ||
|
||
return rx_ts | ||
|
@@ -862,14 +841,13 @@ def _insert_graph_receipt_txn( | |
"data": json_encoder.encode(data), | ||
}, | ||
where_clause=where_clause, | ||
# receipts_graph has a unique constraint on | ||
# (user_id, room_id, receipt_type), so no need to lock | ||
lock=False, | ||
) | ||
|
||
|
||
class ReceiptsBackgroundUpdateStore(SQLBaseStore): | ||
POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering" | ||
RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME = "receipts_linearized_unique_index" | ||
RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME = "receipts_graph_unique_index" | ||
|
||
def __init__( | ||
self, | ||
|
@@ -883,6 +861,14 @@ def __init__( | |
self.POPULATE_RECEIPT_EVENT_STREAM_ORDERING, | ||
self._populate_receipt_event_stream_ordering, | ||
) | ||
self.db_pool.updates.register_background_update_handler( | ||
self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME, | ||
self._background_receipts_linearized_unique_index, | ||
) | ||
self.db_pool.updates.register_background_update_handler( | ||
self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME, | ||
self._background_receipts_graph_unique_index, | ||
) | ||
|
||
async def _populate_receipt_event_stream_ordering( | ||
self, progress: JsonDict, batch_size: int | ||
|
@@ -938,6 +924,143 @@ def _populate_receipt_event_stream_ordering_txn( | |
|
||
return batch_size | ||
|
||
async def _create_receipts_index(self, index_name: str, table: str) -> None: | ||
"""Adds a unique index on `(room_id, receipt_type, user_id)` to the given | ||
receipts table, for non-thread receipts.""" | ||
|
||
def _create_index(conn: LoggingDatabaseConnection) -> None: | ||
conn.rollback() | ||
|
||
# we have to set autocommit, because postgres refuses to | ||
# CREATE INDEX CONCURRENTLY without it. | ||
if isinstance(self.database_engine, PostgresEngine): | ||
conn.set_session(autocommit=True) | ||
|
||
try: | ||
c = conn.cursor() | ||
|
||
# Now that the duplicates are gone, we can create the index. | ||
concurrently = ( | ||
"CONCURRENTLY" | ||
if isinstance(self.database_engine, PostgresEngine) | ||
else "" | ||
) | ||
sql = f""" | ||
CREATE UNIQUE INDEX {concurrently} {index_name} | ||
ON {table}(room_id, receipt_type, user_id) | ||
WHERE thread_id IS NULL | ||
""" | ||
c.execute(sql) | ||
finally: | ||
if isinstance(self.database_engine, PostgresEngine): | ||
conn.set_session(autocommit=False) | ||
|
||
await self.db_pool.runWithConnection(_create_index) | ||
|
||
async def _background_receipts_linearized_unique_index( | ||
self, progress: dict, batch_size: int | ||
) -> int: | ||
"""Removes duplicate receipts and adds a unique index on | ||
`(room_id, receipt_type, user_id)` to `receipts_linearized`, for non-thread | ||
receipts.""" | ||
|
||
def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None: | ||
# Identify any duplicate receipts arising from | ||
# https://github.com/matrix-org/synapse/issues/14406. | ||
# We expect the following query to use the per-thread receipt index and take | ||
# less than a minute. | ||
sql = """ | ||
SELECT MAX(stream_id), room_id, receipt_type, user_id | ||
FROM receipts_linearized | ||
WHERE thread_id IS NULL | ||
GROUP BY room_id, receipt_type, user_id | ||
HAVING COUNT(*) > 1 | ||
""" | ||
txn.execute(sql) | ||
duplicate_keys = cast(List[Tuple[int, str, str, str]], list(txn)) | ||
|
||
# Then remove duplicate receipts, keeping the one with the highest | ||
# `stream_id`. There should only be a single receipt with any given | ||
# `stream_id`. | ||
for max_stream_id, room_id, receipt_type, user_id in duplicate_keys: | ||
sql = """ | ||
DELETE FROM receipts_linearized | ||
WHERE | ||
room_id = ? AND | ||
receipt_type = ? AND | ||
user_id = ? AND | ||
thread_id IS NULL AND | ||
stream_id < ? | ||
""" | ||
txn.execute(sql, (room_id, receipt_type, user_id, max_stream_id)) | ||
|
||
await self.db_pool.runInteraction( | ||
self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME, | ||
_remote_duplicate_receipts_txn, | ||
) | ||
|
||
await self._create_receipts_index( | ||
"receipts_linearized_unique_index", | ||
"receipts_linearized", | ||
) | ||
|
||
await self.db_pool.updates._end_background_update( | ||
self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME | ||
) | ||
|
||
return 1 | ||
|
||
async def _background_receipts_graph_unique_index( | ||
self, progress: dict, batch_size: int | ||
) -> int: | ||
"""Removes duplicate receipts and adds a unique index on | ||
`(room_id, receipt_type, user_id)` to `receipts_graph`, for non-thread | ||
receipts.""" | ||
|
||
def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None: | ||
# Identify any duplicate receipts arising from | ||
# https://github.com/matrix-org/synapse/issues/14406. | ||
# We expect the following query to use the per-thread receipt index and take | ||
# less than a minute. | ||
sql = """ | ||
SELECT room_id, receipt_type, user_id FROM receipts_graph | ||
WHERE thread_id IS NULL | ||
GROUP BY room_id, receipt_type, user_id | ||
HAVING COUNT(*) > 1 | ||
""" | ||
txn.execute(sql) | ||
duplicate_keys = cast(List[Tuple[str, str, str]], list(txn)) | ||
|
||
# Then remove all duplicate receipts. | ||
# We could be clever and try to keep the latest receipt out of every set of | ||
# duplicates, but it's far simpler to remove them all. | ||
for room_id, receipt_type, user_id in duplicate_keys: | ||
sql = """ | ||
DELETE FROM receipts_graph | ||
WHERE | ||
room_id = ? AND | ||
receipt_type = ? AND | ||
user_id = ? AND | ||
thread_id IS NULL | ||
""" | ||
txn.execute(sql, (room_id, receipt_type, user_id)) | ||
|
||
await self.db_pool.runInteraction( | ||
self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME, | ||
_remote_duplicate_receipts_txn, | ||
) | ||
|
||
await self._create_receipts_index( | ||
"receipts_graph_unique_index", | ||
"receipts_graph", | ||
) | ||
|
||
await self.db_pool.updates._end_background_update( | ||
self.RECEIPTS_GRAPH_UNIQUE_INDEX_UPDATE_NAME | ||
) | ||
|
||
return 1 | ||
|
||
|
||
class ReceiptsStore(ReceiptsWorkerStore, ReceiptsBackgroundUpdateStore): | ||
pass |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why move these to the non-worker store?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved them to the
*BackgroundUpdateStore
. I thought that's where we usually put the background updates?Is there a motivation for having these on the worker store that I've completely missed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's no motivation besides it being consistent with other examples I saw. 🤷