From 924ae2b0d4a28b568d2fd40e482d860605074650 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 17:15:03 -0500 Subject: [PATCH 1/7] Track when the pulled event signature fails Part of https://github.com/matrix-org/synapse/issues/13700 --- synapse/federation/federation_base.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index abe2c1971a19..23175847e19d 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -80,7 +80,13 @@ async def _check_sigs_and_hash( InvalidEventSignatureError if the signature check failed. Nothing will be logged in this case. """ - await _check_sigs_on_pdu(self.keyring, room_version, pdu) + try: + await _check_sigs_on_pdu(self.keyring, room_version, pdu) + except Exception as exc: + await self._store.record_event_failed_pull_attempt( + pdu.room_id, pdu.event_id, str(exc) + ) + raise exc if not check_event_content_hash(pdu): # let's try to distinguish between failures because the event was @@ -116,6 +122,9 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) + await self._store.record_event_failed_pull_attempt( + pdu.room_id, pdu.event_id, "Event content has been tampered with" + ) return redacted_event spam_check = await self.spam_checker.check_event_for_spam(pdu) From d240aeb48e72226f3e7b575d5abe774eebebca91 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 18:02:08 -0500 Subject: [PATCH 2/7] Add changelog --- changelog.d/13815.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13815.feature diff --git a/changelog.d/13815.feature b/changelog.d/13815.feature new file mode 100644 index 000000000000..ba411f5067b6 --- /dev/null +++ b/changelog.d/13815.feature @@ -0,0 +1 @@ +Keep track when an event pulled over federation fails its signature check so we can intelligently back-off in the future. From cfb4e88d0e66f72ed77d2528fb56ccdd7af925bc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 18:24:54 -0500 Subject: [PATCH 3/7] Fix reference --- synapse/federation/federation_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 23175847e19d..8d25e92a67fb 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -83,7 +83,7 @@ async def _check_sigs_and_hash( try: await _check_sigs_on_pdu(self.keyring, room_version, pdu) except Exception as exc: - await self._store.record_event_failed_pull_attempt( + await self.store.record_event_failed_pull_attempt( pdu.room_id, pdu.event_id, str(exc) ) raise exc @@ -122,7 +122,7 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) - await self._store.record_event_failed_pull_attempt( + await self.store.record_event_failed_pull_attempt( pdu.room_id, pdu.event_id, "Event content has been tampered with" ) return redacted_event From 88a75cf53bdde623fd2d32524c78f8273cb6eb6b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 23 Sep 2022 18:04:30 -0500 Subject: [PATCH 4/7] Use callback pattern to record signature failures See https://github.com/matrix-org/synapse/pull/13815#discussion_r971432765 --- synapse/federation/federation_base.py | 21 +++++++++++------- synapse/federation/federation_client.py | 29 ++++++++++++++++++------- tests/test_federation.py | 4 ++-- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 8d25e92a67fb..1388b9123788 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Awaitable, Callable, Optional from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError @@ -58,7 +58,12 @@ def __init__(self, hs: "HomeServer"): @trace async def _check_sigs_and_hash( - self, room_version: RoomVersion, pdu: EventBase + self, + room_version: RoomVersion, + pdu: EventBase, + record_failure_callback: Optional[ + Callable[[EventBase, str], Awaitable[None]] + ] = None, ) -> EventBase: """Checks that event is correctly signed by the sending server. @@ -83,9 +88,8 @@ async def _check_sigs_and_hash( try: await _check_sigs_on_pdu(self.keyring, room_version, pdu) except Exception as exc: - await self.store.record_event_failed_pull_attempt( - pdu.room_id, pdu.event_id, str(exc) - ) + if record_failure_callback: + await record_failure_callback(pdu, str(exc)) raise exc if not check_event_content_hash(pdu): @@ -122,9 +126,10 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) - await self.store.record_event_failed_pull_attempt( - pdu.room_id, pdu.event_id, "Event content has been tampered with" - ) + if record_failure_callback: + await record_failure_callback( + pdu, "Event content has been tampered with" + ) return redacted_event spam_check = await self.spam_checker.check_event_for_spam(pdu) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 464672a3da81..7bdbf7e27780 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -278,7 +278,7 @@ async def backfill( pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus] # Check signatures and hash of pdus, removing any from the list that fail checks - pdus[:] = await self._check_sigs_and_hash_and_fetch( + pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch( dest, pdus, room_version=room_version ) @@ -547,24 +547,28 @@ async def get_room_state( len(auth_event_map), ) - valid_auth_events = await self._check_sigs_and_hash_and_fetch( + valid_auth_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, auth_event_map.values(), room_version ) - valid_state_events = await self._check_sigs_and_hash_and_fetch( - destination, state_event_map.values(), room_version + valid_state_events = ( + await self._check_sigs_and_hash_for_pulled_events_and_fetch( + destination, state_event_map.values(), room_version + ) ) return valid_state_events, valid_auth_events @trace - async def _check_sigs_and_hash_and_fetch( + async def _check_sigs_and_hash_for_pulled_events_and_fetch( self, origin: str, pdus: Collection[EventBase], room_version: RoomVersion, ) -> List[EventBase]: - """Checks the signatures and hashes of a list of events. + """ + Checks the signatures and hashes of a list of pulled events we got from + federation and records any signature failures as failed pull attempts. If a PDU fails its signature check then we check if we have it in the database, and if not then request it from the sender's server (if that @@ -597,11 +601,17 @@ async def _check_sigs_and_hash_and_fetch( valid_pdus: List[EventBase] = [] + async def _record_failure_callback(event: EventBase, cause: str) -> None: + await self.store.record_event_failed_pull_attempt( + event.room_id, event.event_id, cause + ) + async def _execute(pdu: EventBase) -> None: valid_pdu = await self._check_sigs_and_hash_and_fetch_one( pdu=pdu, origin=origin, room_version=room_version, + record_failure_callback=_record_failure_callback, ) if valid_pdu: @@ -618,6 +628,9 @@ async def _check_sigs_and_hash_and_fetch_one( pdu: EventBase, origin: str, room_version: RoomVersion, + record_failure_callback: Optional[ + Callable[[EventBase, str], Awaitable[None]] + ] = None, ) -> Optional[EventBase]: """Takes a PDU and checks its signatures and hashes. @@ -694,7 +707,7 @@ async def get_event_auth( auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]] - signed_auth = await self._check_sigs_and_hash_and_fetch( + signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, auth_chain, room_version=room_version ) @@ -1401,7 +1414,7 @@ async def get_missing_events( event_from_pdu_json(e, room_version) for e in content.get("events", []) ] - signed_events = await self._check_sigs_and_hash_and_fetch( + signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, events, room_version=room_version ) except HttpResponseException as e: diff --git a/tests/test_federation.py b/tests/test_federation.py index 779fad1f6398..80e5c590d836 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -86,8 +86,8 @@ async def _check_event_auth(origin, event, context): federation_event_handler._check_event_auth = _check_event_auth self.client = self.homeserver.get_federation_client() - self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed( - pdus + self.client._check_sigs_and_hash_for_pulled_events_and_fetch = ( + lambda dest, pdus, **k: succeed(pdus) ) # Send the join, it should return None (which is not an error) From d29ac0bd5fdec5bc232fb9e33756cf210d252495 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 23 Sep 2022 19:06:38 -0500 Subject: [PATCH 5/7] Add docstring --- synapse/federation/federation_base.py | 5 +++++ synapse/federation/federation_client.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 1388b9123788..c5505f19643c 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -75,6 +75,11 @@ async def _check_sigs_and_hash( Args: room_version: The room version of the PDU pdu: the event to be checked + record_failure_callback: A callback to run whenever the given event + fails signature or hash checks. This includes exceptions + that would be normally be thrown/raised but also things like + checking for event tampering where we just return the redacted + event. Returns: * the original event if the checks pass diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 7bdbf7e27780..6c22a540ebe3 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -647,6 +647,11 @@ async def _check_sigs_and_hash_and_fetch_one( origin pdu room_version + record_failure_callback: A callback to run whenever the given event + fails signature or hash checks. This includes exceptions + that would be normally be thrown/raised but also things like + checking for event tampering where we just return the redacted + event. Returns: The PDU (possibly redacted) if it has valid signatures and hashes. From 14e39ee5b92275b168b44d2485b4b13ddcf7ba65 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 23 Sep 2022 20:16:00 -0500 Subject: [PATCH 6/7] Record failure from get_pdu and add test --- synapse/federation/federation_client.py | 16 ++++- tests/federation/test_federation_client.py | 75 ++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6c22a540ebe3..4dca711cd28d 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -328,7 +328,17 @@ async def get_pdu_from_destination_raw( # Check signatures are correct. try: - signed_pdu = await self._check_sigs_and_hash(room_version, pdu) + + async def _record_failure_callback( + event: EventBase, cause: str + ) -> None: + await self.store.record_event_failed_pull_attempt( + event.room_id, event.event_id, cause + ) + + signed_pdu = await self._check_sigs_and_hash( + room_version, pdu, _record_failure_callback + ) except InvalidEventSignatureError as e: errmsg = f"event id {pdu.event_id}: {e}" logger.warning("%s", errmsg) @@ -659,7 +669,9 @@ async def _check_sigs_and_hash_and_fetch_one( """ try: - return await self._check_sigs_and_hash(room_version, pdu) + return await self._check_sigs_and_hash( + room_version, pdu, record_failure_callback + ) except InvalidEventSignatureError as e: logger.warning( "Signature on retrieved event %s was invalid (%s). " diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py index 50e376f69574..a538215931e3 100644 --- a/tests/federation/test_federation_client.py +++ b/tests/federation/test_federation_client.py @@ -23,14 +23,23 @@ from synapse.api.room_versions import RoomVersions from synapse.events import EventBase +from synapse.rest import admin +from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock +from tests.test_utils import event_injection from tests.unittest import FederatingHomeserverTestCase class FederationClientTest(FederatingHomeserverTestCase): + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): super().prepare(reactor, clock, homeserver) @@ -231,6 +240,72 @@ def _get_pdu_once(self) -> EventBase: return remote_pdu + def test_backfill_invalid_signature_records_failed_pull_attempts( + self, + ) -> None: + """ + Test to make sure that events from /backfill with invalid signatures get + recorded as failed pull attempts. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # We purposely don't run `add_hashes_and_signatures_from_other_server` + # over this because we want the signature check to fail. + pulled_event, _ = self.get_success( + event_injection.create_event( + self.hs, + room_id=room_id, + sender=OTHER_USER, + type="test_event_type", + content={"body": "garply"}, + ) + ) + + # We expect an outbound request to /backfill, so stub that out + self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed( + _mock_response( + { + "origin": "yet.another.server", + "origin_server_ts": 900, + # Mimic the other server returning our new `pulled_event` + "pdus": [pulled_event.get_pdu_json()], + } + ) + ) + + self.get_success( + self.hs.get_federation_client().backfill( + # We use "yet.another.server" instead of + # `self.OTHER_SERVER_NAME` because we want to see the behavior + # from `_check_sigs_and_hash_and_fetch_one` where it tries to + # fetch the PDU again from the origin server if the signature + # fails. Just want to make sure that the failure is counted from + # both code paths. + dest="yet.another.server", + room_id=room_id, + limit=1, + extremities=[pulled_event.event_id], + ), + ) + + # Make sure our failed pull attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + # This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the + # other from "yet.another.server" + self.assertEqual(backfill_num_attempts, 2) + def _mock_response(resp: JsonDict): body = json.dumps(resp).encode("utf-8") From 7898371c25872f98e5bfab5cce8b2fd8c91a995b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 29 Sep 2022 21:17:01 -0500 Subject: [PATCH 7/7] Be more selective about which errors to care about See https://github.com/matrix-org/synapse/pull/13815#discussion_r983384698 So wes can avoid things like `CancelledError` which is a valid error but probably should not count as an error --- synapse/federation/federation_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index c5505f19643c..6bd4742140c4 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -92,7 +92,7 @@ async def _check_sigs_and_hash( """ try: await _check_sigs_on_pdu(self.keyring, room_version, pdu) - except Exception as exc: + except InvalidEventSignatureError as exc: if record_failure_callback: await record_failure_callback(pdu, str(exc)) raise exc