diff --git a/changelog.d/7905.bugfix b/changelog.d/7905.bugfix new file mode 100644 index 000000000000..e60e62441210 --- /dev/null +++ b/changelog.d/7905.bugfix @@ -0,0 +1 @@ +Fix a longstanding bug when storing a media file with an empty `upload_name`. diff --git a/changelog.d/8415.doc b/changelog.d/8415.doc new file mode 100644 index 000000000000..28b579853364 --- /dev/null +++ b/changelog.d/8415.doc @@ -0,0 +1 @@ +Improve description of `server_name` config option in `homserver.yaml`. \ No newline at end of file diff --git a/changelog.d/8419.feature b/changelog.d/8419.feature new file mode 100644 index 000000000000..b363e929ea8c --- /dev/null +++ b/changelog.d/8419.feature @@ -0,0 +1 @@ +Add experimental support for sharding event persister. diff --git a/changelog.d/8422.misc b/changelog.d/8422.misc new file mode 100644 index 000000000000..03fba120c6d9 --- /dev/null +++ b/changelog.d/8422.misc @@ -0,0 +1 @@ +Typing fixes for `synapse.handlers.federation`. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 76f588fa9fc3..4831484ef2d4 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -33,10 +33,23 @@ ## Server ## -# The domain name of the server, with optional explicit port. -# This is used by remote servers to connect to this server, -# e.g. matrix.org, localhost:8080, etc. -# This is also the last part of your UserID. +# The public-facing domain of the server +# +# The server_name name will appear at the end of usernames and room addresses +# created on this server. For example if the server_name was example.com, +# usernames on this server would be in the format @user:example.com +# +# In most cases you should avoid using a matrix specific subdomain such as +# matrix.example.com or synapse.example.com as the server_name for the same +# reasons you wouldn't use user@email.example.com as your email address. +# See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md +# for information on how to host Synapse on a subdomain while preserving +# a clean server_name. +# +# The server_name cannot be changed later so it is important to +# configure this correctly before you start Synapse. It should be all +# lowercase and may contain an explicit port. +# Examples: matrix.org, localhost:8080 # server_name: "SERVERNAME" diff --git a/synapse/config/server.py b/synapse/config/server.py index eff2a846a47d..9a5c7cb0ab4a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -647,10 +647,23 @@ def generate_config_section( """\ ## Server ## - # The domain name of the server, with optional explicit port. - # This is used by remote servers to connect to this server, - # e.g. matrix.org, localhost:8080, etc. - # This is also the last part of your UserID. + # The public-facing domain of the server + # + # The server_name name will appear at the end of usernames and room addresses + # created on this server. For example if the server_name was example.com, + # usernames on this server would be in the format @user:example.com + # + # In most cases you should avoid using a matrix specific subdomain such as + # matrix.example.com or synapse.example.com as the server_name for the same + # reasons you wouldn't use user@email.example.com as your email address. + # See https://github.com/matrix-org/synapse/blob/master/docs/delegate.md + # for information on how to host Synapse on a subdomain while preserving + # a clean server_name. + # + # The server_name cannot be changed later so it is important to + # configure this correctly before you start Synapse. It should be all + # lowercase and may contain an explicit port. + # Examples: matrix.org, localhost:8080 # server_name: "%(server_name)s" diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 688d43fffb4c..302b2f69bcdd 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -24,10 +24,12 @@ Dict, Iterable, List, + Mapping, Optional, Sequence, Tuple, TypeVar, + Union, ) from prometheus_client import Counter @@ -501,7 +503,7 @@ async def make_membership_event( user_id: str, membership: str, content: dict, - params: Dict[str, str], + params: Optional[Mapping[str, Union[str, Iterable[str]]]], ) -> Tuple[str, EventBase, RoomVersion]: """ Creates an m.room.member event, with context, without participating in the room. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 33feb1b2ba91..eb91f2f7620d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -155,8 +155,9 @@ def __init__(self, hs): self._device_list_updater = hs.get_device_handler().device_list_updater self._maybe_store_room_on_invite = self.store.maybe_store_room_on_invite - # When joining a room we need to queue any events for that room up - self.room_queues = {} + # When joining a room we need to queue any events for that room up. + # For each room, a list of (pdu, origin) tuples. + self.room_queues = {} # type: Dict[str, List[Tuple[EventBase, str]]] self._room_pdu_linearizer = Linearizer("fed_room_pdu") self.third_party_event_rules = hs.get_third_party_event_rules() @@ -817,6 +818,9 @@ async def backfill(self, dest, room_id, limit, extremities): dest, room_id, limit=limit, extremities=extremities ) + if not events: + return [] + # ideally we'd sanity check the events here for excess prev_events etc, # but it's hard to reject events at this point without completely # breaking backfill in the same way that it is currently broken by @@ -2174,10 +2178,10 @@ async def _check_for_soft_fail( # given state at the event. This should correctly handle cases # like bans, especially with state res v2. - state_sets = await self.state_store.get_state_groups( + state_sets_d = await self.state_store.get_state_groups( event.room_id, extrem_ids ) - state_sets = list(state_sets.values()) + state_sets = list(state_sets_d.values()) # type: List[Iterable[EventBase]] state_sets.append(state) current_states = await self.state_handler.resolve_events( room_version, state_sets, event @@ -2968,6 +2972,7 @@ async def persist_events_and_notify( ) return result["max_stream_id"] else: + assert self.storage.persistence max_stream_token = await self.storage.persistence.persist_events( event_and_contexts, backfilled=backfilled ) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 69f353d46f9e..ae6822d6e742 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -139,7 +139,7 @@ def mark_recently_accessed(self, server_name, media_id): async def create_content( self, media_type: str, - upload_name: str, + upload_name: Optional[str], content: IO, content_length: int, auth_user: str, @@ -147,8 +147,8 @@ async def create_content( """Store uploaded content for a local user and return the mxc URL Args: - media_type: The content type of the file - upload_name: The name of the file + media_type: The content type of the file. + upload_name: The name of the file, if provided. content: A file like object that is the content to store content_length: The length of the content auth_user: The user_id of the uploader @@ -156,6 +156,7 @@ async def create_content( Returns: The mxc url of the stored content """ + media_id = random_string(24) file_info = FileInfo(server_name=None, file_id=media_id) diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 3ebf7a68e673..d76f7389e101 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -63,6 +63,10 @@ async def _async_render_POST(self, request): msg="Invalid UTF-8 filename parameter: %r" % (upload_name), code=400 ) + # If the name is falsey (e.g. an empty byte string) ensure it is None. + else: + upload_name = None + headers = request.requestHeaders if headers.hasHeader(b"Content-Type"): diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 989f0cbc9d3b..0e31cc811a38 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -24,7 +24,7 @@ from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import StateMap +from synapse.types import MutableStateMap, StateMap from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache @@ -208,7 +208,7 @@ def _get_state_for_group_using_cache(self, cache, group, state_filter): async def _get_state_for_groups( self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all() - ) -> Dict[int, StateMap[str]]: + ) -> Dict[int, MutableStateMap[str]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 603cd7d825e0..ded6cf965528 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -197,7 +197,7 @@ def __init__(self, hs, stores: Databases): async def persist_events( self, - events_and_contexts: List[Tuple[EventBase, EventContext]], + events_and_contexts: Iterable[Tuple[EventBase, EventContext]], backfilled: bool = False, ) -> RoomStreamToken: """ diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 8f68d968f0c8..08a69f2f9667 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -20,7 +20,7 @@ from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.types import StateMap +from synapse.types import MutableStateMap, StateMap logger = logging.getLogger(__name__) @@ -349,7 +349,7 @@ async def get_state_group_delta(self, state_group: int): async def get_state_groups_ids( self, _room_id: str, event_ids: Iterable[str] - ) -> Dict[int, StateMap[str]]: + ) -> Dict[int, MutableStateMap[str]]: """Get the event IDs of all the state for the state groups for the given events Args: @@ -532,7 +532,7 @@ async def get_state_ids_for_event( def _get_state_for_groups( self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all() - ) -> Awaitable[Dict[int, StateMap[str]]]: + ) -> Awaitable[Dict[int, MutableStateMap[str]]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 4fd7573e260d..02fbb656e81c 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -273,6 +273,19 @@ def _load_current_ids( # Load the current positions of all writers for the stream. if self._writers: + # We delete any stale entries in the positions table. This is + # important if we add back a writer after a long time; we want to + # consider that a "new" writer, rather than using the old stale + # entry here. + sql = """ + DELETE FROM stream_positions + WHERE + stream_name = ? + AND instance_name != ALL(?) + """ + sql = self._db.engine.convert_param_style(sql) + cur.execute(sql, (self._stream_name, self._writers)) + sql = """ SELECT instance_name, stream_id FROM stream_positions WHERE stream_name = ? @@ -453,11 +466,22 @@ def get_current_token_for_writer(self, instance_name: str) -> int: """Returns the position of the given writer. """ + # If we don't have an entry for the given instance name, we assume it's a + # new writer. + # + # For new writers we assume their initial position to be the current + # persisted up to position. This stops Synapse from doing a full table + # scan when a new writer announces itself over replication. with self._lock: - return self._return_factor * self._current_positions.get(instance_name, 0) + return self._return_factor * self._current_positions.get( + instance_name, self._persisted_upto_position + ) def get_positions(self) -> Dict[str, int]: """Get a copy of the current positon map. + + Note that this won't necessarily include all configured writers if some + writers haven't written anything yet. """ with self._lock: diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py index 4558bee7be85..392b08832b05 100644 --- a/tests/storage/test_id_generators.py +++ b/tests/storage/test_id_generators.py @@ -390,17 +390,28 @@ def test_writer_config_change(self): # Initial config has two writers id_gen = self._create_id_generator("first", writers=["first", "second"]) self.assertEqual(id_gen.get_persisted_upto_position(), 3) + self.assertEqual(id_gen.get_current_token_for_writer("first"), 3) + self.assertEqual(id_gen.get_current_token_for_writer("second"), 5) # New config removes one of the configs. Note that if the writer is # removed from config we assume that it has been shut down and has # finished persisting, hence why the persisted upto position is 5. id_gen_2 = self._create_id_generator("second", writers=["second"]) self.assertEqual(id_gen_2.get_persisted_upto_position(), 5) + self.assertEqual(id_gen_2.get_current_token_for_writer("second"), 5) # This config points to a single, previously unused writer. id_gen_3 = self._create_id_generator("third", writers=["third"]) self.assertEqual(id_gen_3.get_persisted_upto_position(), 5) + # For new writers we assume their initial position to be the current + # persisted up to position. This stops Synapse from doing a full table + # scan when a new writer comes along. + self.assertEqual(id_gen_3.get_current_token_for_writer("third"), 5) + + id_gen_4 = self._create_id_generator("fourth", writers=["third"]) + self.assertEqual(id_gen_4.get_current_token_for_writer("third"), 5) + # Check that we get a sane next stream ID with this new config. async def _get_next_async(): @@ -410,6 +421,13 @@ async def _get_next_async(): self.get_success(_get_next_async()) self.assertEqual(id_gen_3.get_persisted_upto_position(), 6) + # If we add back the old "first" then we shouldn't see the persisted up + # to position revert back to 3. + id_gen_5 = self._create_id_generator("five", writers=["first", "third"]) + self.assertEqual(id_gen_5.get_persisted_upto_position(), 6) + self.assertEqual(id_gen_5.get_current_token_for_writer("first"), 6) + self.assertEqual(id_gen_5.get_current_token_for_writer("third"), 6) + def test_sequence_consistency(self): """Test that we error out if the table and sequence diverges. """