From 6481502d5e446fe5ad0128f68e31c51dc85d745f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 10 Aug 2021 01:23:19 -0500 Subject: [PATCH 01/22] Allow room creator to send MSC2716 related events in existing room versions Discussed at https://github.com/matrix-org/matrix-doc/pull/2716/#discussion_r682474869 Restoring `get_create_event_for_room_txn` from, https://github.com/matrix-org/synapse/pull/10245/commits/44bb3f0cf5cb365ef9281554daceeecfb17cc94d --- synapse/handlers/federation.py | 6 +- synapse/storage/databases/main/events.py | 14 ++-- .../storage/databases/main/events_worker.py | 70 ++++++++++++++++++- synapse/storage/databases/main/state.py | 57 +++++++++++---- 4 files changed, 127 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 9a5e7265330c..e4c6aae56c54 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -934,9 +934,11 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): return # Skip processing a marker event if the room version doesn't - # support it. + # support it or the event is not from the room creator. room_version = await self.store.get_room_version(marker_event.room_id) - if not room_version.msc2716_historical: + create_event = await self.store.get_create_event_for_room(marker_event.room_id) + room_creator = create_event.content.get("creator", None) + if not room_version.msc2716_historical or marker_event.sender != room_creator: return logger.debug("_handle_marker_event: received %s", marker_event) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 40b53274fb3d..f3e1632be472 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1770,10 +1770,12 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Not a insertion event return - # Skip processing a insertion event if the room version doesn't - # support it. + # Skip processing an insertion event if the room version doesn't + # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - if not room_version.msc2716_historical: + create_event = self.store.get_create_event_for_room_txn(txn, event.room_id) + room_creator = create_event.content.get("creator", None) + if not room_version.msc2716_historical or event.sender != room_creator: return next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) @@ -1822,9 +1824,11 @@ def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): return # Skip processing a chunk event if the room version doesn't - # support it. + # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - if not room_version.msc2716_historical: + create_event = self.store.get_create_event_for_room_txn(txn, event.room_id) + room_creator = create_event.content.get("creator", None) + if not room_version.msc2716_historical or event.sender != room_creator: return chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 375463e4e979..b8a51e17ab9e 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,7 +55,7 @@ from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.storage.util.sequence import build_sequence_generator @@ -223,6 +223,74 @@ async def get_received_ts(self, event_id: str) -> Optional[int]: desc="get_received_ts", ) + # Inform mypy that if allow_none is False (the default) then get_event + + # always returns an EventBase. + @overload + def get_event_txn( + self, + event_id: str, + allow_rejected: bool = False, + allow_none: Literal[False] = False, + ) -> EventBase: + ... + + @overload + def get_event_txn( + self, + event_id: str, + allow_rejected: bool = False, + allow_none: Literal[True] = False, + ) -> Optional[EventBase]: + ... + + def get_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_rejected: bool = False, + allow_none: bool = False, + ) -> Optional[EventBase]: + """Get an event from the database by event_id. + Args: + txn: Transaction object + event_id: The event_id of the event to fetch + get_prev_content: If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected: If True, return rejected events. Otherwise, + behave as per allow_none. + allow_none: If True, return None if no event found, if + False throw a NotFoundError + check_room_id: if not None, check the room of the found event. + If there is a mismatch, behave as per allow_none. + Returns: + The event, or None if the event was not found and allow_none=True + Raises: + NotFoundError: if the event_id was not found and allow_none=False + """ + event_map = self._fetch_event_rows(txn, [event_id]) + event_info = event_map[event_id] + if event_info is None and not allow_none: + raise NotFoundError("Could not find event %s" % (event_id,)) + + rejected_reason = event_info["rejected_reason"] + if not allow_rejected and rejected_reason: + return + + d = db_to_json(event_info["json"]) + internal_metadata = db_to_json(event_info["internal_metadata"]) + room_version_id = event_info["room_version_id"] + room_version = KNOWN_ROOM_VERSIONS.get(room_version_id) + + event = make_event_from_dict( + event_dict=d, + room_version=room_version, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + return event + # Inform mypy that if allow_none is False (the default) then get_event # always returns an EventBase. @overload diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 8e22da99ae60..ad4779a56cb5 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -178,7 +178,26 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: Raises: NotFoundError if the room is unknown """ - state_ids = await self.get_current_state_ids(room_id) + return await self.db_pool.runInteraction( + "get_create_event_for_room_txn", + self.get_create_event_for_room_txn, + room_id, + ) + + def get_create_event_for_room_txn( + self, txn: LoggingTransaction, room_id: str + ) -> EventBase: + """Get the create state event for a room. + Args: + txn: Transaction object + room_id: The room ID. + Returns: + The room creation event. + Raises: + NotFoundError if the room is unknown + """ + + state_ids = self.get_current_state_ids_txn(txn, room_id) create_id = state_ids.get((EventTypes.Create, "")) # If we can't find the create event, assume we've hit a dead end @@ -186,7 +205,7 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: raise NotFoundError("Unknown room %s" % (room_id,)) # Retrieve the room's create event and return - create_event = await self.get_event(create_id) + create_event = self.get_event_txn(txn, create_id) return create_event @cached(max_entries=100000, iterable=True) @@ -200,21 +219,35 @@ async def get_current_state_ids(self, room_id: str) -> StateMap[str]: Returns: The current state of the room. """ + return await self.db_pool.runInteraction( + "get_current_state_ids_txn", + self.get_current_state_ids_txn, + room_id, + ) - def _get_current_state_ids_txn(txn): - txn.execute( - """SELECT type, state_key, event_id FROM current_state_events - WHERE room_id = ? - """, - (room_id,), - ) + def get_current_state_ids_txn( + self, txn: LoggingTransaction, room_id: str + ) -> StateMap[str]: + """Get the current state event ids for a room based on the + current_state_events table. + + Args: + txn: Transaction object + room_id: The room to get the state IDs of. - return {(intern_string(r[0]), intern_string(r[1])): r[2] for r in txn} + Returns: + The current state of the room. + """ - return await self.db_pool.runInteraction( - "get_current_state_ids", _get_current_state_ids_txn + txn.execute( + """SELECT type, state_key, event_id FROM current_state_events + WHERE room_id = ? + """, + (room_id,), ) + return {(intern_string(r[0]), intern_string(r[1])): r[2] for r in txn} + # FIXME: how should this be cached? async def get_filtered_current_state_ids( self, room_id: str, state_filter: Optional[StateFilter] = None From 13d0929a2e3242179b7cec6a51a3c915d394b1de Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 10 Aug 2021 01:28:53 -0500 Subject: [PATCH 02/22] Add changelog --- changelog.d/10566.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10566.feature diff --git a/changelog.d/10566.feature b/changelog.d/10566.feature new file mode 100644 index 000000000000..04575d76a9e3 --- /dev/null +++ b/changelog.d/10566.feature @@ -0,0 +1 @@ +Allow room creators to send historical events specified by [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) in existing room versions. From 259303a25819446d8c261db75cd2c6fc878bd4dd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 10 Aug 2021 01:53:24 -0500 Subject: [PATCH 03/22] Stop people from trying to redact MSC2716 events in unsupported room versions --- synapse/handlers/message.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8a0024ce8485..e7f85ada552f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1376,6 +1376,9 @@ async def persist_and_notify_client_event( allow_none=True, ) + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + # we can make some additional checks now if we have the original event. if original_event: if original_event.type == EventTypes.Create: @@ -1387,6 +1390,28 @@ async def persist_and_notify_client_event( if original_event.type == EventTypes.ServerACL: raise AuthError(403, "Redacting server ACL events is not permitted") + # Add a little safety stop-gap to prevent people from trying to + # redact MSC2716 related events when they're in a room version + # which does not support it yet. We allow people to use MSC2716 + # events in existing room versions but only from the room + # creator since it does not require any changes to the auth + # rules and in effect, the redaction algorithm . In the + # supported room version, we add the `historical` power level to + # auth the MSC2716 related events and adjust the redaction + # algorthim to keep the `historical` field around (redacting an + # event should only strip fields which don't affect the + # structural protocol level). + is_msc2716_event = ( + original_event.type == EventTypes.MSC2716_INSERTION + or original_event.type == EventTypes.MSC2716_CHUNK + or original_event.type == EventTypes.MSC2716_MARKER + ) + if not room_version_obj.msc2716_historical and is_msc2716_event: + raise AuthError( + 403, + "Redacting MSC2716 events is not supported in this room version", + ) + prev_state_ids = await context.get_prev_state_ids() auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True @@ -1394,9 +1419,6 @@ async def persist_and_notify_client_event( auth_events_map = await self.store.get_events(auth_events_ids) auth_events = {(e.type, e.state_key): e for e in auth_events_map.values()} - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - if event_auth.check_redaction( room_version_obj, event, auth_events=auth_events ): From 16a41dd701e289b8fb45529b6b4854cad7f97346 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 19 Aug 2021 23:42:41 -0500 Subject: [PATCH 04/22] Populate rooms.creator column for easy lookup > From some [out of band discussion](https://matrix.to/#/!UytJQHLQYfvYWsGrGY:jki.re/$p2fKESoFst038x6pOOmsY0C49S2gLKMr0jhNMz_JJz0?via=jki.re&via=matrix.org), my plan is to use `rooms.creator`. But currently, we don't fill in `creator` for remote rooms when a user is invited to a room for example. So we need to add some code to fill in `creator` wherever we add to the `rooms` table. And also add a background update to fill in the rows missing `creator` (we can use the same logic that `get_create_event_for_room_txn` is doing by looking in the state events to get the `creator`). > > https://github.com/matrix-org/synapse/pull/10566#issuecomment-901616642 --- synapse/storage/databases/main/room.py | 70 ++++++++++++++++++- .../delta/63/01populate_rooms_creator.sql | 17 +++++ 2 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 443e5f331545..7f0f95b019d7 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1026,6 +1026,7 @@ def get_rooms_for_retention_period_in_range_txn(txn): class _BackgroundUpdates: REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory" ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column" + POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" @@ -1059,6 +1060,11 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._background_add_rooms_room_version_column, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + self._background_populate_rooms_creator_column, + ) + # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1206,6 +1212,60 @@ def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction): return batch_size + async def _background_populate_rooms_creator_column( + self, progress: dict, batch_size: int + ): + """Background update to go and add creator information to `rooms` + table from `current_state_events` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): + sql = """ + SELECT room_id, json FROM current_state_events + INNER JOIN event_json USING (room_id, event_id) + WHERE room_id > ? AND type = 'm.room.create' AND state_key = '' + ORDER BY room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + + new_last_room_id = "" + for room_id, event_json in txn: + event_dict = db_to_json(event_json) + + creator = event_dict.get("content").get("creator") + + self.db_pool.simple_update_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"creator": creator}, + ) + new_last_room_id = room_id + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_populate_rooms_creator_column", + _background_populate_rooms_creator_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN + ) + + return batch_size + async def _remove_tombstoned_rooms_from_directory( self, progress, batch_size ) -> int: @@ -1376,6 +1436,9 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) + create_event = await self.store.get_create_event_for_room(room_id) + room_creator = create_event.content.get("creator", None) + await self.db_pool.simple_upsert( desc="upsert_room_on_join", table="rooms", @@ -1383,7 +1446,7 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): values={"room_version": room_version.identifier}, insertion_values={ "is_public": False, - "creator": "", + "creator": room_creator, "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an @@ -1454,6 +1517,9 @@ async def maybe_store_room_on_outlier_membership( # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) + create_event = await self.store.get_create_event_for_room(room_id) + room_creator = create_event.content.get("creator", None) + await self.db_pool.simple_upsert( desc="maybe_store_room_on_outlier_membership", table="rooms", @@ -1462,7 +1528,7 @@ async def maybe_store_room_on_outlier_membership( insertion_values={ "room_version": room_version.identifier, "is_public": False, - "creator": "", + "creator": room_creator, "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an diff --git a/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql b/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql new file mode 100644 index 000000000000..f3e24b1ee38a --- /dev/null +++ b/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql @@ -0,0 +1,17 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('populate_rooms_creator_column', '{}'); From aafa06981654b412d6d92b786f83129cab360303 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 19 Aug 2021 23:47:36 -0500 Subject: [PATCH 05/22] Remove and switch away from get_create_event_for_room_txn --- synapse/storage/databases/main/events.py | 18 +++-- .../storage/databases/main/events_worker.py | 70 +------------------ synapse/storage/databases/main/state.py | 57 ++++----------- 3 files changed, 27 insertions(+), 118 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f3e1632be472..faf45493fbdd 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1773,8 +1773,13 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): # Skip processing an insertion event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - create_event = self.store.get_create_event_for_room_txn(txn, event.room_id) - room_creator = create_event.content.get("creator", None) + room_creator = self.db_pool.simple_select_one_onecol_txn( + txn, + table="rooms", + keyvalues={"room_id": event.room_id}, + retcol="creator", + allow_none=True, + ) if not room_version.msc2716_historical or event.sender != room_creator: return @@ -1826,8 +1831,13 @@ def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): # Skip processing a chunk event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) - create_event = self.store.get_create_event_for_room_txn(txn, event.room_id) - room_creator = create_event.content.get("creator", None) + room_creator = self.db_pool.simple_select_one_onecol_txn( + txn, + table="rooms", + keyvalues={"room_id": event.room_id}, + retcol="creator", + allow_none=True, + ) if not room_version.msc2716_historical or event.sender != room_creator: return diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b8a51e17ab9e..375463e4e979 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -55,7 +55,7 @@ from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.database import DatabasePool from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.storage.util.sequence import build_sequence_generator @@ -223,74 +223,6 @@ async def get_received_ts(self, event_id: str) -> Optional[int]: desc="get_received_ts", ) - # Inform mypy that if allow_none is False (the default) then get_event - - # always returns an EventBase. - @overload - def get_event_txn( - self, - event_id: str, - allow_rejected: bool = False, - allow_none: Literal[False] = False, - ) -> EventBase: - ... - - @overload - def get_event_txn( - self, - event_id: str, - allow_rejected: bool = False, - allow_none: Literal[True] = False, - ) -> Optional[EventBase]: - ... - - def get_event_txn( - self, - txn: LoggingTransaction, - event_id: str, - allow_rejected: bool = False, - allow_none: bool = False, - ) -> Optional[EventBase]: - """Get an event from the database by event_id. - Args: - txn: Transaction object - event_id: The event_id of the event to fetch - get_prev_content: If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected: If True, return rejected events. Otherwise, - behave as per allow_none. - allow_none: If True, return None if no event found, if - False throw a NotFoundError - check_room_id: if not None, check the room of the found event. - If there is a mismatch, behave as per allow_none. - Returns: - The event, or None if the event was not found and allow_none=True - Raises: - NotFoundError: if the event_id was not found and allow_none=False - """ - event_map = self._fetch_event_rows(txn, [event_id]) - event_info = event_map[event_id] - if event_info is None and not allow_none: - raise NotFoundError("Could not find event %s" % (event_id,)) - - rejected_reason = event_info["rejected_reason"] - if not allow_rejected and rejected_reason: - return - - d = db_to_json(event_info["json"]) - internal_metadata = db_to_json(event_info["internal_metadata"]) - room_version_id = event_info["room_version_id"] - room_version = KNOWN_ROOM_VERSIONS.get(room_version_id) - - event = make_event_from_dict( - event_dict=d, - room_version=room_version, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - return event - # Inform mypy that if allow_none is False (the default) then get_event # always returns an EventBase. @overload diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index ad4779a56cb5..8e22da99ae60 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -178,26 +178,7 @@ async def get_create_event_for_room(self, room_id: str) -> EventBase: Raises: NotFoundError if the room is unknown """ - return await self.db_pool.runInteraction( - "get_create_event_for_room_txn", - self.get_create_event_for_room_txn, - room_id, - ) - - def get_create_event_for_room_txn( - self, txn: LoggingTransaction, room_id: str - ) -> EventBase: - """Get the create state event for a room. - Args: - txn: Transaction object - room_id: The room ID. - Returns: - The room creation event. - Raises: - NotFoundError if the room is unknown - """ - - state_ids = self.get_current_state_ids_txn(txn, room_id) + state_ids = await self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) # If we can't find the create event, assume we've hit a dead end @@ -205,7 +186,7 @@ def get_create_event_for_room_txn( raise NotFoundError("Unknown room %s" % (room_id,)) # Retrieve the room's create event and return - create_event = self.get_event_txn(txn, create_id) + create_event = await self.get_event(create_id) return create_event @cached(max_entries=100000, iterable=True) @@ -219,35 +200,21 @@ async def get_current_state_ids(self, room_id: str) -> StateMap[str]: Returns: The current state of the room. """ - return await self.db_pool.runInteraction( - "get_current_state_ids_txn", - self.get_current_state_ids_txn, - room_id, - ) - - def get_current_state_ids_txn( - self, txn: LoggingTransaction, room_id: str - ) -> StateMap[str]: - """Get the current state event ids for a room based on the - current_state_events table. - Args: - txn: Transaction object - room_id: The room to get the state IDs of. + def _get_current_state_ids_txn(txn): + txn.execute( + """SELECT type, state_key, event_id FROM current_state_events + WHERE room_id = ? + """, + (room_id,), + ) - Returns: - The current state of the room. - """ + return {(intern_string(r[0]), intern_string(r[1])): r[2] for r in txn} - txn.execute( - """SELECT type, state_key, event_id FROM current_state_events - WHERE room_id = ? - """, - (room_id,), + return await self.db_pool.runInteraction( + "get_current_state_ids", _get_current_state_ids_txn ) - return {(intern_string(r[0]), intern_string(r[1])): r[2] for r in txn} - # FIXME: how should this be cached? async def get_filtered_current_state_ids( self, room_id: str, state_filter: Optional[StateFilter] = None From fedd25017feb3b727f585b5502d836f643510fe2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 20 Aug 2021 19:05:08 -0500 Subject: [PATCH 06/22] Fix no create event being found because no state events persisted yet --- synapse/handlers/federation.py | 1 + synapse/storage/databases/main/room.py | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3f0a2e0d4bab..b174053070da 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1692,6 +1692,7 @@ async def do_invite_join( await self.store.upsert_room_on_join( room_id=room_id, room_version=room_version_obj, + auth_events=auth_chain, ) max_stream_id = await self._persist_auth_tree( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 0ad4abfd9c2d..1974da7da3de 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -22,6 +22,7 @@ from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions +from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.search import SearchStore @@ -1333,7 +1334,7 @@ async def has_auth_chain_index(self, room_id: str) -> bool: keyvalues={"room_id": room_id}, retcol="MAX(stream_ordering)", allow_none=True, - desc="upsert_room_on_join", + desc="has_auth_chain_index_fallback", ) return max_ordering is None @@ -1410,7 +1411,9 @@ def __init__(self, database: DatabasePool, db_conn, hs): self.config = hs.config - async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): + async def upsert_room_on_join( + self, room_id: str, room_version: RoomVersion, auth_events: List[EventBase] + ): """Ensure that the room is stored in the table Called when we join a room over federation, and overwrites any room version @@ -1421,7 +1424,17 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) - create_event = await self.store.get_create_event_for_room(room_id) + create_event = None + for e in auth_events: + if (e.type, e.state_key) == (EventTypes.Create, ""): + create_event = e + break + + if create_event is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise StoreError(400, "No create event in state") + room_creator = create_event.content.get("creator", None) await self.db_pool.simple_upsert( @@ -1451,7 +1464,7 @@ async def maybe_store_room_on_outlier_membership( # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) - create_event = await self.store.get_create_event_for_room(room_id) + create_event = await self.get_create_event_for_room(room_id) room_creator = create_event.content.get("creator", None) await self.db_pool.simple_upsert( From 8a2db202a30f79d7198afc01fd5196c81f711d98 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 20 Aug 2021 22:34:12 -0500 Subject: [PATCH 07/22] Fix and add tests for rooms creator bg update --- synapse/storage/databases/main/room.py | 3 + tests/storage/databases/main/test_room.py | 98 +++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 tests/storage/databases/main/test_room.py diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 1974da7da3de..78bc586fa770 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1232,6 +1232,9 @@ def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): ) new_last_room_id = room_id + if new_last_room_id == "": + return True + self.db_pool.updates._background_update_progress_txn( txn, _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py new file mode 100644 index 000000000000..ffee70715342 --- /dev/null +++ b/tests/storage/databases/main/test_room.py @@ -0,0 +1,98 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.rest import admin +from synapse.rest.client import login, room +from synapse.storage.databases.main.room import _BackgroundUpdates + +from tests.unittest import HomeserverTestCase + + +class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.user_id = self.register_user("foo", "pass") + self.token = self.login("foo", "pass") + + def _generate_room(self) -> str: + room_id = self.helper.create_room_as(self.user_id, tok=self.token) + + return room_id + + def test_background_populate_rooms_creator_column(self): + """Test that the background update to populate the rooms creator column + works properly. + """ + + # Insert a room without the creator + room_id = self._generate_room() + self.get_success( + self.store.db_pool.simple_update( + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"creator": None}, + desc="test", + ) + ) + + # Make sure the test is starting out with a room without a creator + room_creator_before = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="rooms", + keyvalues={"room_id": room_id}, + retcol="creator", + allow_none=True, + ) + ) + self.assertEqual(room_creator_before, None) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + # Now let's actually drive the updates to completion + while not self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ): + self.get_success( + self.store.db_pool.updates.do_next_background_update(100), by=0.1 + ) + + # Make sure the background update filled in the room creator + room_creator_after = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="rooms", + keyvalues={"room_id": room_id}, + retcol="creator", + allow_none=True, + ) + ) + self.assertEqual(room_creator_after, self.user_id) From 2b177b72e8f5e91d9c3838c8ba6e957a01343dcd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 25 Aug 2021 11:45:25 -0500 Subject: [PATCH 08/22] Populate rooms.creator field for easy lookup Part of https://github.com/matrix-org/synapse/pull/10566 - Fill in creator whenever we insert into the rooms table - Add background update to backfill any missing creator values --- synapse/storage/databases/main/room.py | 90 ++++++++++++++++- .../delta/63/02populate-rooms-creator.sql | 17 ++++ tests/storage/databases/main/test_room.py | 98 +++++++++++++++++++ 3 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql create mode 100644 tests/storage/databases/main/test_room.py diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index f98b89259892..78bc586fa770 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -22,6 +22,7 @@ from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions +from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.search import SearchStore @@ -1011,6 +1012,7 @@ def get_rooms_for_retention_period_in_range_txn(txn): class _BackgroundUpdates: REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory" ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column" + POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" @@ -1044,6 +1046,11 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._background_add_rooms_room_version_column, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + self._background_populate_rooms_creator_column, + ) + # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1191,6 +1198,63 @@ def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction): return batch_size + async def _background_populate_rooms_creator_column( + self, progress: dict, batch_size: int + ): + """Background update to go and add creator information to `rooms` + table from `current_state_events` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): + sql = """ + SELECT room_id, json FROM current_state_events + INNER JOIN event_json USING (room_id, event_id) + WHERE room_id > ? AND type = 'm.room.create' AND state_key = '' + ORDER BY room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + + new_last_room_id = "" + for room_id, event_json in txn: + event_dict = db_to_json(event_json) + + creator = event_dict.get("content").get("creator") + + self.db_pool.simple_update_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"creator": creator}, + ) + new_last_room_id = room_id + + if new_last_room_id == "": + return True + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_populate_rooms_creator_column", + _background_populate_rooms_creator_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN + ) + + return batch_size + async def _remove_tombstoned_rooms_from_directory( self, progress, batch_size ) -> int: @@ -1273,7 +1337,7 @@ async def has_auth_chain_index(self, room_id: str) -> bool: keyvalues={"room_id": room_id}, retcol="MAX(stream_ordering)", allow_none=True, - desc="upsert_room_on_join", + desc="has_auth_chain_index_fallback", ) return max_ordering is None @@ -1350,7 +1414,9 @@ def __init__(self, database: DatabasePool, db_conn, hs): self.config = hs.config - async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): + async def upsert_room_on_join( + self, room_id: str, room_version: RoomVersion, auth_events: List[EventBase] + ): """Ensure that the room is stored in the table Called when we join a room over federation, and overwrites any room version @@ -1361,6 +1427,19 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) + create_event = None + for e in auth_events: + if (e.type, e.state_key) == (EventTypes.Create, ""): + create_event = e + break + + if create_event is None: + # If the state doesn't have a create event then the room is + # invalid, and it would fail auth checks anyway. + raise StoreError(400, "No create event in state") + + room_creator = create_event.content.get("creator", None) + await self.db_pool.simple_upsert( desc="upsert_room_on_join", table="rooms", @@ -1368,7 +1447,7 @@ async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion): values={"room_version": room_version.identifier}, insertion_values={ "is_public": False, - "creator": "", + "creator": room_creator, "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an @@ -1388,6 +1467,9 @@ async def maybe_store_room_on_outlier_membership( # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) + create_event = await self.get_create_event_for_room(room_id) + room_creator = create_event.content.get("creator", None) + await self.db_pool.simple_upsert( desc="maybe_store_room_on_outlier_membership", table="rooms", @@ -1396,7 +1478,7 @@ async def maybe_store_room_on_outlier_membership( insertion_values={ "room_version": room_version.identifier, "is_public": False, - "creator": "", + "creator": room_creator, "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an diff --git a/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql new file mode 100644 index 000000000000..d5975e9a4402 --- /dev/null +++ b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql @@ -0,0 +1,17 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('populate_rooms_creator_column', '{}'); diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py new file mode 100644 index 000000000000..ffee70715342 --- /dev/null +++ b/tests/storage/databases/main/test_room.py @@ -0,0 +1,98 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.rest import admin +from synapse.rest.client import login, room +from synapse.storage.databases.main.room import _BackgroundUpdates + +from tests.unittest import HomeserverTestCase + + +class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.user_id = self.register_user("foo", "pass") + self.token = self.login("foo", "pass") + + def _generate_room(self) -> str: + room_id = self.helper.create_room_as(self.user_id, tok=self.token) + + return room_id + + def test_background_populate_rooms_creator_column(self): + """Test that the background update to populate the rooms creator column + works properly. + """ + + # Insert a room without the creator + room_id = self._generate_room() + self.get_success( + self.store.db_pool.simple_update( + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"creator": None}, + desc="test", + ) + ) + + # Make sure the test is starting out with a room without a creator + room_creator_before = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="rooms", + keyvalues={"room_id": room_id}, + retcol="creator", + allow_none=True, + ) + ) + self.assertEqual(room_creator_before, None) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + # Now let's actually drive the updates to completion + while not self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ): + self.get_success( + self.store.db_pool.updates.do_next_background_update(100), by=0.1 + ) + + # Make sure the background update filled in the room creator + room_creator_after = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="rooms", + keyvalues={"room_id": room_id}, + retcol="creator", + allow_none=True, + ) + ) + self.assertEqual(room_creator_after, self.user_id) From ee406dfb902cadd200daf12c9c7803128682a732 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 25 Aug 2021 11:50:49 -0500 Subject: [PATCH 09/22] Add changelog --- changelog.d/10697.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10697.misc diff --git a/changelog.d/10697.misc b/changelog.d/10697.misc new file mode 100644 index 000000000000..a9ad17faf26c --- /dev/null +++ b/changelog.d/10697.misc @@ -0,0 +1 @@ +Ensure `rooms.creator` field is always populated for easy lookup in [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) usage later. From 9f8e22bf76b50bb8ec635ffbb9f5ad245db70b09 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 25 Aug 2021 11:56:49 -0500 Subject: [PATCH 10/22] Fix usage --- synapse/handlers/federation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 246df43501bc..ec0e057eaa4a 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1690,6 +1690,7 @@ async def do_invite_join( await self.store.upsert_room_on_join( room_id=room_id, room_version=room_version_obj, + auth_events=auth_chain, ) max_stream_id = await self._persist_auth_tree( From 9b828ab031fec737d71e6739f6fa88cac5793fa2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 25 Aug 2021 12:06:40 -0500 Subject: [PATCH 11/22] Remove extra delta already included in #10697 --- .../main/delta/63/01populate_rooms_creator.sql | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql diff --git a/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql b/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql deleted file mode 100644 index f3e24b1ee38a..000000000000 --- a/synapse/storage/schema/main/delta/63/01populate_rooms_creator.sql +++ /dev/null @@ -1,17 +0,0 @@ -/* Copyright 2020 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -INSERT into background_updates (update_name, progress_json) - VALUES ('populate_rooms_creator_column', '{}'); From 3c9b5a6643b2e0a1f3836ea693628ed66f7699f8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 25 Aug 2021 16:17:33 -0500 Subject: [PATCH 12/22] Don't worry about setting creator for invite --- synapse/storage/databases/main/room.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 78bc586fa770..04eb3e6e085d 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1440,6 +1440,11 @@ async def upsert_room_on_join( room_creator = create_event.content.get("creator", None) + if room_creator is None: + # If the create event does not have a creator then the room is + # invalid, and it would fail auth checks anyway. + raise StoreError(400, "No creator defined on the create event") + await self.db_pool.simple_upsert( desc="upsert_room_on_join", table="rooms", @@ -1467,9 +1472,6 @@ async def maybe_store_room_on_outlier_membership( # mark the room as having an auth chain cover index. has_auth_chain_index = await self.has_auth_chain_index(room_id) - create_event = await self.get_create_event_for_room(room_id) - room_creator = create_event.content.get("creator", None) - await self.db_pool.simple_upsert( desc="maybe_store_room_on_outlier_membership", table="rooms", @@ -1478,7 +1480,10 @@ async def maybe_store_room_on_outlier_membership( insertion_values={ "room_version": room_version.identifier, "is_public": False, - "creator": room_creator, + # We don't worry about setting the `creator` here because + # we don't process any messages in a room while a user is + # invited (only after the join). + "creator": "", "has_auth_chain_index": has_auth_chain_index, }, # rooms has a unique constraint on room_id, so no need to lock when doing an From 9a600ff334b2953abd13ec851ecb25019208532f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 20:40:51 -0500 Subject: [PATCH 13/22] Only iterate over rows missing the creator See https://github.com/matrix-org/synapse/pull/10697#discussion_r695940898 --- synapse/storage/databases/main/room.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 04eb3e6e085d..a7b644be2b8e 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1209,9 +1209,10 @@ async def _background_populate_rooms_creator_column( def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): sql = """ - SELECT room_id, json FROM current_state_events - INNER JOIN event_json USING (room_id, event_id) - WHERE room_id > ? AND type = 'm.room.create' AND state_key = '' + SELECT room_id, json FROM event_json + INNER JOIN rooms AS room USING (room_id) + INNER JOIN current_state_events AS state_event USING (room_id, event_id) + WHERE room_id > ? AND (room.creator IS NULL OR room.creator = '') AND state_event.type = 'm.room.create' AND state_event.state_key = '' ORDER BY room_id LIMIT ? """ From 79b4991b16fe416334c94a79d5633c87791cc102 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 21:02:35 -0500 Subject: [PATCH 14/22] Use constant to fetch room creator field See https://github.com/matrix-org/synapse/pull/10697#discussion_r696803029 --- synapse/api/constants.py | 2 ++ synapse/storage/databases/main/room.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 829061c870c4..a0e5fffbfca0 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -198,6 +198,8 @@ class EventContentFields: # cf https://github.com/matrix-org/matrix-doc/pull/1772 ROOM_TYPE = "type" + ROOM_CREATOR = "creator" + # Used on normal messages to indicate they were historically imported after the fact MSC2716_HISTORICAL = "org.matrix.msc2716.historical" # For "insertion" events to indicate what the next chunk ID should be in diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index a7b644be2b8e..903ed2bc03ec 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -19,7 +19,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Tuple -from synapse.api.constants import EventTypes, JoinRules +from synapse.api.constants import EventTypes, EventContentFields, JoinRules from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.events import EventBase @@ -1223,7 +1223,7 @@ def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): for room_id, event_json in txn: event_dict = db_to_json(event_json) - creator = event_dict.get("content").get("creator") + creator = event_dict.get("content").get(EventContentFields.ROOM_CREATOR) self.db_pool.simple_update_txn( txn, @@ -1439,7 +1439,7 @@ async def upsert_room_on_join( # invalid, and it would fail auth checks anyway. raise StoreError(400, "No create event in state") - room_creator = create_event.content.get("creator", None) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) if room_creator is None: # If the create event does not have a creator then the room is From 25db2895d4d01a06ccd65e47e53265e879e4ab33 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 22:07:07 -0500 Subject: [PATCH 15/22] More protection from other random types See https://github.com/matrix-org/synapse/pull/10697#discussion_r696806853 --- synapse/storage/databases/main/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 903ed2bc03ec..6ab445eb0d2f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1441,7 +1441,7 @@ async def upsert_room_on_join( room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) - if room_creator is None: + if not isinstance(room_creator, str): # If the create event does not have a creator then the room is # invalid, and it would fail auth checks anyway. raise StoreError(400, "No creator defined on the create event") From 41e72c2f36f1580ae008f52e2d8d34ffe31f0e9f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 22:11:05 -0500 Subject: [PATCH 16/22] Move new background update to end of list See https://github.com/matrix-org/synapse/pull/10697#discussion_r696814181 --- synapse/storage/databases/main/room.py | 128 ++++++++++++------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 6ab445eb0d2f..64df348c7b8a 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1012,9 +1012,9 @@ def get_rooms_for_retention_period_in_range_txn(txn): class _BackgroundUpdates: REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory" ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column" - POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" + POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" _REPLACE_ROOM_DEPTH_SQL_COMMANDS = ( @@ -1046,11 +1046,6 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._background_add_rooms_room_version_column, ) - self.db_pool.updates.register_background_update_handler( - _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, - self._background_populate_rooms_creator_column, - ) - # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1061,6 +1056,11 @@ def __init__(self, database: DatabasePool, db_conn, hs): self._background_replace_room_depth_min_depth, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + self._background_populate_rooms_creator_column, + ) + async def _background_insert_retention(self, progress, batch_size): """Retrieves a list of all rooms within a range and inserts an entry for each of them into the room_retention table. @@ -1198,64 +1198,6 @@ def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction): return batch_size - async def _background_populate_rooms_creator_column( - self, progress: dict, batch_size: int - ): - """Background update to go and add creator information to `rooms` - table from `current_state_events` table. - """ - - last_room_id = progress.get("room_id", "") - - def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): - sql = """ - SELECT room_id, json FROM event_json - INNER JOIN rooms AS room USING (room_id) - INNER JOIN current_state_events AS state_event USING (room_id, event_id) - WHERE room_id > ? AND (room.creator IS NULL OR room.creator = '') AND state_event.type = 'm.room.create' AND state_event.state_key = '' - ORDER BY room_id - LIMIT ? - """ - - txn.execute(sql, (last_room_id, batch_size)) - - new_last_room_id = "" - for room_id, event_json in txn: - event_dict = db_to_json(event_json) - - creator = event_dict.get("content").get(EventContentFields.ROOM_CREATOR) - - self.db_pool.simple_update_txn( - txn, - table="rooms", - keyvalues={"room_id": room_id}, - updatevalues={"creator": creator}, - ) - new_last_room_id = room_id - - if new_last_room_id == "": - return True - - self.db_pool.updates._background_update_progress_txn( - txn, - _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, - {"room_id": new_last_room_id}, - ) - - return False - - end = await self.db_pool.runInteraction( - "_background_populate_rooms_creator_column", - _background_populate_rooms_creator_column_txn, - ) - - if end: - await self.db_pool.updates._end_background_update( - _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN - ) - - return batch_size - async def _remove_tombstoned_rooms_from_directory( self, progress, batch_size ) -> int: @@ -1408,6 +1350,64 @@ def process(txn: Cursor) -> None: return 0 + async def _background_populate_rooms_creator_column( + self, progress: dict, batch_size: int + ): + """Background update to go and add creator information to `rooms` + table from `current_state_events` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): + sql = """ + SELECT room_id, json FROM event_json + INNER JOIN rooms AS room USING (room_id) + INNER JOIN current_state_events AS state_event USING (room_id, event_id) + WHERE room_id > ? AND (room.creator IS NULL OR room.creator = '') AND state_event.type = 'm.room.create' AND state_event.state_key = '' + ORDER BY room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + + new_last_room_id = "" + for room_id, event_json in txn: + event_dict = db_to_json(event_json) + + creator = event_dict.get("content").get(EventContentFields.ROOM_CREATOR) + + self.db_pool.simple_update_txn( + txn, + table="rooms", + keyvalues={"room_id": room_id}, + updatevalues={"creator": creator}, + ) + new_last_room_id = room_id + + if new_last_room_id == "": + return True + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_populate_rooms_creator_column", + _background_populate_rooms_creator_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN + ) + + return batch_size + class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): def __init__(self, database: DatabasePool, db_conn, hs): From 9a3c01515702ddbacbd9d810e8209c40d6aa787b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 22:27:56 -0500 Subject: [PATCH 17/22] Fix query casing --- synapse/storage/databases/main/room.py | 2 +- .../storage/schema/main/delta/63/02populate-rooms-creator.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 64df348c7b8a..10929eefcc44 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -19,7 +19,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Tuple -from synapse.api.constants import EventTypes, EventContentFields, JoinRules +from synapse.api.constants import EventContentFields, EventTypes, JoinRules from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.events import EventBase diff --git a/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql index d5975e9a4402..282866cc0e31 100644 --- a/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql +++ b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql @@ -13,5 +13,5 @@ * limitations under the License. */ -INSERT into background_updates (update_name, progress_json) +INSERT INTO background_updates (update_name, progress_json) VALUES ('populate_rooms_creator_column', '{}'); From 9a887a4b096e18220d5fa9b3452e5a84808ec277 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 30 Aug 2021 23:49:41 -0500 Subject: [PATCH 18/22] Fix ambiguity iterating over cursor instead of list Fix `psycopg2.ProgrammingError: no results to fetch` error when tests run with Postgres. ``` SYNAPSE_POSTGRES=1 SYNAPSE_TEST_LOG_LEVEL=INFO python -m twisted.trial tests.storage.databases.main.test_room ``` --- We use `txn.fetchall` because it will return the results as a list or an empty list when there are no results. Docs: > `cursor` objects are iterable, so, instead of calling explicitly fetchone() in a loop, the object itself can be used: > > https://www.psycopg.org/docs/cursor.html#cursor-iterable And I'm guessing iterating over a raw cursor does something weird when there are no results. --- Test CI failure: https://github.com/matrix-org/synapse/pull/10697/checks?check_run_id=3468916530 ``` tests.test_visibility.FilterEventsForServerTestCase.test_large_room =============================================================================== [FAIL] Traceback (most recent call last): File "/home/runner/work/synapse/synapse/tests/storage/databases/main/test_room.py", line 85, in test_background_populate_rooms_creator_column self.get_success( File "/home/runner/work/synapse/synapse/tests/unittest.py", line 500, in get_success return self.successResultOf(d) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/trial/_synctest.py", line 700, in successResultOf self.fail( twisted.trial.unittest.FailTest: Success result expected on , found failure result instead: Traceback (most recent call last): File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 701, in errback self._startRunCallbacks(fail) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 764, in _startRunCallbacks self._runCallbacks() File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 858, in _runCallbacks current.result = callback( # type: ignore[misc] File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 1751, in gotResult current_context.run(_inlineCallbacks, r, gen, status) --- --- File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 1657, in _inlineCallbacks result = current_context.run( File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/failure.py", line 500, in throwExceptionIntoGenerator return g.throw(self.type, self.value, self.tb) File "/home/runner/work/synapse/synapse/synapse/storage/background_updates.py", line 224, in do_next_background_update await self._do_background_update(desired_duration_ms) File "/home/runner/work/synapse/synapse/synapse/storage/background_updates.py", line 261, in _do_background_update items_updated = await update_handler(progress, batch_size) File "/home/runner/work/synapse/synapse/synapse/storage/databases/main/room.py", line 1399, in _background_populate_rooms_creator_column end = await self.db_pool.runInteraction( File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 686, in runInteraction result = await self.runWithConnection( File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 791, in runWithConnection return await make_deferred_yieldable( File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/internet/defer.py", line 858, in _runCallbacks current.result = callback( # type: ignore[misc] File "/home/runner/work/synapse/synapse/tests/server.py", line 425, in d.addCallback(lambda x: function(*args, **kwargs)) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 293, in _runWithConnection compat.reraise(excValue, excTraceback) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/deprecate.py", line 298, in deprecatedFunction return function(*args, **kwargs) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/python/compat.py", line 404, in reraise raise exception.with_traceback(traceback) File "/home/runner/work/synapse/synapse/.tox/py/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 284, in _runWithConnection result = func(conn, *args, **kw) File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 786, in inner_func return func(db_conn, *args, **kwargs) File "/home/runner/work/synapse/synapse/synapse/storage/database.py", line 554, in new_transaction r = func(cursor, *args, **kwargs) File "/home/runner/work/synapse/synapse/synapse/storage/databases/main/room.py", line 1375, in _background_populate_rooms_creator_column_txn for room_id, event_json in txn: psycopg2.ProgrammingError: no results to fetch ``` --- synapse/storage/databases/main/room.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 10929eefcc44..6e7312266d0b 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1370,9 +1370,10 @@ def _background_populate_rooms_creator_column_txn(txn: LoggingTransaction): """ txn.execute(sql, (last_room_id, batch_size)) + room_id_to_create_event_results = txn.fetchall() new_last_room_id = "" - for room_id, event_json in txn: + for room_id, event_json in room_id_to_create_event_results: event_dict = db_to_json(event_json) creator = event_dict.get("content").get(EventContentFields.ROOM_CREATOR) From 6f9cb41e0f45068e2612754cb8cc8235a1e7299c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 31 Aug 2021 00:16:05 -0500 Subject: [PATCH 19/22] Move code not under the MSC2716 room version underneath an experimental config option See https://github.com/matrix-org/synapse/pull/10566#issuecomment-906437909 --- synapse/handlers/federation.py | 6 +++++- synapse/storage/databases/main/events.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b174053070da..23cda13f2761 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -800,7 +800,11 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): room_version = await self.store.get_room_version(marker_event.room_id) create_event = await self.store.get_create_event_for_room(marker_event.room_id) room_creator = create_event.content.get("creator", None) - if not room_version.msc2716_historical or marker_event.sender != room_creator: + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or marker_event.sender != room_creator + ): return logger.debug("_handle_marker_event: received %s", marker_event) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index faf45493fbdd..3b2ff401d024 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1780,7 +1780,11 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): retcol="creator", allow_none=True, ) - if not room_version.msc2716_historical or event.sender != room_creator: + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator + ): return next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) @@ -1838,7 +1842,11 @@ def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): retcol="creator", allow_none=True, ) - if not room_version.msc2716_historical or event.sender != room_creator: + if ( + not room_version.msc2716_historical + or not self.hs.config.experimental.msc2716_enabled + or event.sender != room_creator + ): return chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) From ad29e9606931000ed5ca894e17a498fc41381924 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 31 Aug 2021 20:02:57 -0500 Subject: [PATCH 20/22] Add ordering to rooms creator background update See https://github.com/matrix-org/synapse/pull/10697#discussion_r696815277 --- .../storage/schema/main/delta/63/02populate-rooms-creator.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql index 282866cc0e31..f7c0b31261f8 100644 --- a/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql +++ b/synapse/storage/schema/main/delta/63/02populate-rooms-creator.sql @@ -13,5 +13,5 @@ * limitations under the License. */ -INSERT INTO background_updates (update_name, progress_json) - VALUES ('populate_rooms_creator_column', '{}'); +INSERT INTO background_updates (ordering, update_name, progress_json) + VALUES (6302, 'populate_rooms_creator_column', '{}'); From 462ab259c8db0d9da7c7cd6ef4a8ac0067c7d450 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 31 Aug 2021 20:04:04 -0500 Subject: [PATCH 21/22] Add comment to better document constant See https://github.com/matrix-org/synapse/pull/10697#discussion_r699674458 --- synapse/api/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index a0e5fffbfca0..5e34eb7e13e9 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -198,6 +198,7 @@ class EventContentFields: # cf https://github.com/matrix-org/matrix-doc/pull/1772 ROOM_TYPE = "type" + # The creator of the room, as used in `m.room.create` events. ROOM_CREATOR = "creator" # Used on normal messages to indicate they were historically imported after the fact From a3581d3cd12aa2e0d7837cac8070b068b6c919cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 1 Sep 2021 11:21:54 -0500 Subject: [PATCH 22/22] Use constant field --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 23cda13f2761..5237686eb32d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -799,7 +799,7 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase): # support it or the event is not from the room creator. room_version = await self.store.get_room_version(marker_event.room_id) create_event = await self.store.get_create_event_for_room(marker_event.room_id) - room_creator = create_event.content.get("creator", None) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) if ( not room_version.msc2716_historical or not self.hs.config.experimental.msc2716_enabled