From fe4719a2683dcd0d9c9deb606a1895d222c1b001 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Fri, 12 Apr 2024 09:28:44 +0100 Subject: [PATCH] Use receipts `event_stream_ordering` instead of joins (#17032) Resurrecting https://github.com/matrix-org/synapse/pull/13918. This should reduce IOPs incurred by joining to the events table to lookup stream ordering, which happens in many receipt handling code paths. Like the previous PR I believe sufficient time has passed between the original migration in DB schema 72 and now to merge this as-is. It's highly unlikely that both the migration is still ongoing AND (active) users still have any receipts prior to that date. In the unlikely event there is a receipt without a populated `event_stream_ordering` synapse will behave just as it does now when receipts exist for events that don't (yet): for push action calculation the receipts are just ignored. I've removed the validation on event IDs as this is already covered here: https://github.com/element-hq/synapse/blob/59ceabcb9798793cd4312fdbcced4e612aeda84d/synapse/handlers/receipts.py#L189-L192 --- changelog.d/17032.misc | 1 + .../databases/main/event_push_actions.py | 22 +++++++------------ synapse/storage/databases/main/receipts.py | 8 +++---- 3 files changed, 12 insertions(+), 19 deletions(-) create mode 100644 changelog.d/17032.misc diff --git a/changelog.d/17032.misc b/changelog.d/17032.misc new file mode 100644 index 00000000000..b03f6f42e5f --- /dev/null +++ b/changelog.d/17032.misc @@ -0,0 +1 @@ +Use new receipts column to optimise receipt and push action SQL queries. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 40bf000e9cb..bdd0781c480 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -385,7 +385,6 @@ def _get_unread_counts_by_room_for_user_txn( WITH all_receipts AS ( SELECT room_id, thread_id, MAX(event_stream_ordering) AS max_receipt_stream_ordering FROM receipts_linearized - LEFT JOIN events USING (room_id, event_id) WHERE {receipt_types_clause} AND user_id = ? @@ -621,13 +620,12 @@ def _get_thread(thread_id: str) -> NotifCounts: SELECT notif_count, COALESCE(unread_count, 0), thread_id FROM event_push_summary LEFT JOIN ( - SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + SELECT thread_id, MAX(event_stream_ordering) AS threaded_receipt_stream_ordering FROM receipts_linearized - LEFT JOIN events USING (room_id, event_id) WHERE user_id = ? AND room_id = ? - AND stream_ordering > ? + AND event_stream_ordering > ? AND {receipt_types_clause} GROUP BY thread_id ) AS receipts USING (thread_id) @@ -659,13 +657,12 @@ def _get_thread(thread_id: str) -> NotifCounts: sql = f""" SELECT COUNT(*), thread_id FROM event_push_actions LEFT JOIN ( - SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + SELECT thread_id, MAX(event_stream_ordering) AS threaded_receipt_stream_ordering FROM receipts_linearized - LEFT JOIN events USING (room_id, event_id) WHERE user_id = ? AND room_id = ? - AND stream_ordering > ? + AND event_stream_ordering > ? AND {receipt_types_clause} GROUP BY thread_id ) AS receipts USING (thread_id) @@ -738,13 +735,12 @@ def _get_thread(thread_id: str) -> NotifCounts: thread_id FROM event_push_actions LEFT JOIN ( - SELECT thread_id, MAX(stream_ordering) AS threaded_receipt_stream_ordering + SELECT thread_id, MAX(event_stream_ordering) AS threaded_receipt_stream_ordering FROM receipts_linearized - LEFT JOIN events USING (room_id, event_id) WHERE user_id = ? AND room_id = ? - AND stream_ordering > ? + AND event_stream_ordering > ? AND {receipt_types_clause} GROUP BY thread_id ) AS receipts USING (thread_id) @@ -910,9 +906,8 @@ def _get_receipts_for_room_and_threads_txn( # given this function generally gets called with only one room and # thread ID. sql = f""" - SELECT room_id, thread_id, MAX(stream_ordering) + SELECT room_id, thread_id, MAX(event_stream_ordering) FROM receipts_linearized - INNER JOIN events USING (room_id, event_id) WHERE {receipt_types_clause} AND {thread_ids_clause} AND {room_ids_clause} @@ -1442,9 +1437,8 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool: ) sql = """ - SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, e.stream_ordering + SELECT r.stream_id, r.room_id, r.user_id, r.thread_id, r.event_stream_ordering FROM receipts_linearized AS r - INNER JOIN events AS e USING (event_id) WHERE ? < r.stream_id AND r.stream_id <= ? AND user_id LIKE ? ORDER BY r.stream_id ASC LIMIT ? diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index d513c425301..9660fc46997 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -178,14 +178,13 @@ def get_last_unthreaded_receipt_for_user_txn( ) sql = f""" - SELECT event_id, stream_ordering + SELECT event_id, event_stream_ordering FROM receipts_linearized - INNER JOIN events USING (room_id, event_id) WHERE {clause} AND user_id = ? AND room_id = ? AND thread_id IS NULL - ORDER BY stream_ordering DESC + ORDER BY event_stream_ordering DESC LIMIT 1 """ @@ -736,8 +735,7 @@ def _insert_linearized_receipt_txn( thread_args = (thread_id,) sql = f""" - SELECT stream_ordering, event_id FROM events - INNER JOIN receipts_linearized AS r USING (event_id, room_id) + SELECT r.event_stream_ordering, r.event_id FROM receipts_linearized AS r WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ? AND {thread_clause} """ txn.execute(