From 32adcdeefe27844a1c331848df09640aeba270ef Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 11 Dec 2020 11:44:36 -0500 Subject: [PATCH 1/5] Add a maximum size for well-known lookups. --- changelog.d/8950.misc | 1 + synapse/http/client.py | 8 +++--- .../http/federation/well_known_resolver.py | 13 +++++++-- synapse/http/matrixfederationclient.py | 4 +-- .../test_matrix_federation_agent.py | 27 +++++++++++++++++++ 5 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 changelog.d/8950.misc diff --git a/changelog.d/8950.misc b/changelog.d/8950.misc new file mode 100644 index 000000000000..42e0335afc5e --- /dev/null +++ b/changelog.d/8950.misc @@ -0,0 +1 @@ +Add a maximum size of 50 kilobytes to .well-known lookups. diff --git a/synapse/http/client.py b/synapse/http/client.py index df7730078ff1..f7f9a287050b 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -720,7 +720,7 @@ async def get_file( try: length = await make_deferred_yieldable( - readBodyToFile(response, output_stream, max_size) + readBodyWithMaxSize(response, output_stream, max_size) ) except SynapseError: # This can happen e.g. because the body is too large. @@ -748,7 +748,7 @@ def _timeout_to_request_timed_out_error(f: Failure): return f -class _ReadBodyToFileProtocol(protocol.Protocol): +class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): def __init__( self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int] ): @@ -782,7 +782,7 @@ def connectionLost(self, reason: Failure) -> None: self.deferred.errback(reason) -def readBodyToFile( +def readBodyWithMaxSize( response: IResponse, stream: BinaryIO, max_size: Optional[int] ) -> defer.Deferred: """ @@ -798,7 +798,7 @@ def readBodyToFile( """ d = defer.Deferred() - response.deliverBody(_ReadBodyToFileProtocol(stream, d, max_size)) + response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size)) return d diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index 5e08ef1664f5..b2adb2401c62 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -15,17 +15,19 @@ import logging import random import time +from io import BytesIO from typing import Callable, Dict, Optional, Tuple import attr from twisted.internet import defer from twisted.internet.interfaces import IReactorTime -from twisted.web.client import RedirectAgent, readBody +from twisted.web.client import RedirectAgent from twisted.web.http import stringToDatetime from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IResponse +from synapse.http.client import readBodyWithMaxSize from synapse.logging.context import make_deferred_yieldable from synapse.util import Clock, json_decoder from synapse.util.caches.ttlcache import TTLCache @@ -53,6 +55,9 @@ # lower bound for .well-known cache period WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 +# The maximum size (in bytes) to allow a well-known file to be. +WELL_KNOW_MAX_SIZE = 50 * 1024 # 50 KiB + # Attempt to refetch a cached well-known N% of the TTL before it expires. # e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then # we'll start trying to refetch 1 minute before it expires. @@ -250,7 +255,11 @@ async def _make_well_known_request( b"GET", uri, headers=Headers(headers) ) ) - body = await make_deferred_yieldable(readBody(response)) + body_stream = BytesIO() + await make_deferred_yieldable( + readBodyWithMaxSize(response, body_stream, WELL_KNOW_MAX_SIZE) + ) + body = body_stream.getvalue() if 500 <= response.code < 600: raise Exception("Non-200 response %s" % (response.code,)) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c962994727f6..84c0a9f371d7 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -46,7 +46,7 @@ BlacklistingAgentWrapper, BlacklistingReactorWrapper, encode_query_args, - readBodyToFile, + readBodyWithMaxSize, ) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging.context import make_deferred_yieldable @@ -975,7 +975,7 @@ async def get_file( headers = dict(response.headers.getAllRawHeaders()) try: - d = readBodyToFile(response, output_stream, max_size) + d = readBodyWithMaxSize(response, output_stream, max_size) d.addTimeout(self.default_timeout, self.reactor) length = await make_deferred_yieldable(d) except Exception as e: diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 626acdcaa333..ad8d849c1fd0 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -36,6 +36,7 @@ from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.http.federation.srv_resolver import Server from synapse.http.federation.well_known_resolver import ( + WELL_KNOW_MAX_SIZE, WellKnownResolver, _cache_period_from_headers, ) @@ -1107,6 +1108,32 @@ def test_well_known_cache_with_temp_failure(self): r = self.successResultOf(fetch_d) self.assertEqual(r.delegated_server, None) + def test_well_known_too_large(self): + """A well-known query that returns a result which is too large should be rejected.""" + self.reactor.lookups["testserv"] = "1.2.3.4" + + fetch_d = defer.ensureDeferred( + self.well_known_resolver.get_well_known(b"testserv") + ) + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 443) + + self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + response_headers={b"Cache-Control": b"max-age=1000"}, + content=b'{ "m.server": "' + (b"a" * WELL_KNOW_MAX_SIZE) + b'" }', + ) + + # The result is sucessful, but disabled delegation. + r = self.successResultOf(fetch_d) + self.assertIsNone(r.delegated_server) + def test_srv_fallbacks(self): """Test that other SRV results are tried if the first one fails. """ From 9389a88816ddb1eeb70f927da804b921aba0facb Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 15 Dec 2020 14:56:55 -0500 Subject: [PATCH 2/5] Raise a custom exception. --- synapse/http/client.py | 13 ++++++++++--- synapse/http/federation/well_known_resolver.py | 14 +++++++++++++- synapse/http/matrixfederationclient.py | 9 +++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index f7f9a287050b..bb679397d1d9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -722,9 +722,12 @@ async def get_file( length = await make_deferred_yieldable( readBodyWithMaxSize(response, output_stream, max_size) ) - except SynapseError: - # This can happen e.g. because the body is too large. - raise + except BodyExceededMaxSize: + SynapseError( + 502, + "Requested file is too large > %r bytes" % (max_size,), + Codes.TOO_LARGE, + ) except Exception as e: raise SynapseError(502, ("Failed to download remote body: %s" % e)) from e @@ -748,6 +751,10 @@ def _timeout_to_request_timed_out_error(f: Failure): return f +class BodyExceededMaxSize(Exception): + """The maximum allowed size of the HTTP body was exceeded.""" + + class _ReadBodyWithMaxSizeProtocol(protocol.Protocol): def __init__( self, stream: BinaryIO, deferred: defer.Deferred, max_size: Optional[int] diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index b2adb2401c62..a2879ae8d5f5 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -27,7 +27,7 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IResponse -from synapse.http.client import readBodyWithMaxSize +from synapse.http.client import BodyExceededMaxSize, readBodyWithMaxSize from synapse.logging.context import make_deferred_yieldable from synapse.util import Clock, json_decoder from synapse.util.caches.ttlcache import TTLCache @@ -234,6 +234,9 @@ async def _make_well_known_request( server_name: name of the server, from the requested url retry: Whether to retry the request if it fails. + Raises: + _FetchWellKnownFailure if we fail to lookup a result + Returns: Returns the response object and body. Response may be a non-200 response. """ @@ -268,6 +271,15 @@ async def _make_well_known_request( except defer.CancelledError: # Bail if we've been cancelled raise + except BodyExceededMaxSize: + # If the well-known file was too large, do not keep attempting + # to download it, but consider it a temporary error. + logger.warning( + "Requested .well-known file for %s is too large > %r bytes", + server_name.decode("ascii"), + WELL_KNOW_MAX_SIZE, + ) + raise _FetchWellKnownFailure(temporary=True) except Exception as e: if not retry or i >= WELL_KNOWN_RETRY_ATTEMPTS: logger.info("Error fetching %s: %s", uri_str, e) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 84c0a9f371d7..db0da2bec9a8 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -37,14 +37,17 @@ import synapse.metrics import synapse.util.retryutils from synapse.api.errors import ( + Codes, FederationDeniedError, HttpResponseException, RequestSendFailed, + SynapseError, ) from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, BlacklistingReactorWrapper, + BodyExceededMaxSize, encode_query_args, readBodyWithMaxSize, ) @@ -978,6 +981,12 @@ async def get_file( d = readBodyWithMaxSize(response, output_stream, max_size) d.addTimeout(self.default_timeout, self.reactor) length = await make_deferred_yieldable(d) + except BodyExceededMaxSize: + msg = "Requested file is too large > %r bytes" % (max_size,) + logger.warning( + "{%s} [%s] %s", request.txn_id, request.destination, msg, + ) + SynapseError(502, msg, Codes.TOO_LARGE) except Exception as e: logger.warning( "{%s} [%s] Error reading response: %s", From b8aeebd6e5546fc09847a69e33726b453f8b4e6f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 16 Dec 2020 14:44:44 -0500 Subject: [PATCH 3/5] Actually raise the proper exception. --- docs/sample_log_config.yaml | 71 ------------------------------------- synapse/http/client.py | 8 +---- 2 files changed, 1 insertion(+), 78 deletions(-) diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml index ff3c747180a5..e69de29bb2d1 100644 --- a/docs/sample_log_config.yaml +++ b/docs/sample_log_config.yaml @@ -1,71 +0,0 @@ -# Log configuration for Synapse. -# -# This is a YAML file containing a standard Python logging configuration -# dictionary. See [1] for details on the valid settings. -# -# Synapse also supports structured logging for machine readable logs which can -# be ingested by ELK stacks. See [2] for details. -# -# [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema -# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md - -version: 1 - -formatters: - precise: - format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' - -handlers: - file: - class: logging.handlers.TimedRotatingFileHandler - formatter: precise - filename: /var/log/matrix-synapse/homeserver.log - when: midnight - backupCount: 3 # Does not include the current log file. - encoding: utf8 - - # Default to buffering writes to log file for efficiency. This means that - # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR - # logs will still be flushed immediately. - buffer: - class: logging.handlers.MemoryHandler - target: file - # The capacity is the number of log lines that are buffered before - # being written to disk. Increasing this will lead to better - # performance, at the expensive of it taking longer for log lines to - # be written to disk. - capacity: 10 - flushLevel: 30 # Flush for WARNING logs as well - - # A handler that writes logs to stderr. Unused by default, but can be used - # instead of "buffer" and "file" in the logger handlers. - console: - class: logging.StreamHandler - formatter: precise - -loggers: - synapse.storage.SQL: - # beware: increasing this to DEBUG will make synapse log sensitive - # information such as access tokens. - level: INFO - - twisted: - # We send the twisted logging directly to the file handler, - # to work around https://github.com/matrix-org/synapse/issues/3471 - # when using "buffer" logger. Use "console" to log to stderr instead. - handlers: [file] - propagate: false - -root: - level: INFO - - # Write logs to the `buffer` handler, which will buffer them together in memory, - # then write them to a file. - # - # Replace "buffer" with "console" to log to stderr instead. (Note that you'll - # also need to update the configuration for the `twisted` logger above, in - # this case.) - # - handlers: [buffer] - -disable_existing_loggers: false diff --git a/synapse/http/client.py b/synapse/http/client.py index bb679397d1d9..210743abd6e3 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -768,13 +768,7 @@ def dataReceived(self, data: bytes) -> None: self.stream.write(data) self.length += len(data) if self.max_size is not None and self.length >= self.max_size: - self.deferred.errback( - SynapseError( - 502, - "Requested file is too large > %r bytes" % (self.max_size,), - Codes.TOO_LARGE, - ) - ) + self.deferred.errback(BodyExceededMaxSize()) self.deferred = defer.Deferred() self.transport.loseConnection() From bcec5f30148181dd99d01fdb9d38f082909712cc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 16 Dec 2020 14:48:25 -0500 Subject: [PATCH 4/5] Use a more Pythonic name and update the docstring. --- synapse/http/client.py | 7 +++++-- synapse/http/federation/well_known_resolver.py | 8 ++++---- synapse/http/matrixfederationclient.py | 4 ++-- tests/http/federation/test_matrix_federation_agent.py | 4 ++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 210743abd6e3..29f40ddf5f25 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -720,7 +720,7 @@ async def get_file( try: length = await make_deferred_yieldable( - readBodyWithMaxSize(response, output_stream, max_size) + read_body_with_max_size(response, output_stream, max_size) ) except BodyExceededMaxSize: SynapseError( @@ -783,12 +783,15 @@ def connectionLost(self, reason: Failure) -> None: self.deferred.errback(reason) -def readBodyWithMaxSize( +def read_body_with_max_size( response: IResponse, stream: BinaryIO, max_size: Optional[int] ) -> defer.Deferred: """ Read a HTTP response body to a file-object. Optionally enforcing a maximum file size. + If the maximum file size is reached, the returned Deferred will resolve to a + Failure with a BodyExceededMaxSize exception. + Args: response: The HTTP response to read from. stream: The file-object to write to. diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index a2879ae8d5f5..b3b6dbcab045 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -27,7 +27,7 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IResponse -from synapse.http.client import BodyExceededMaxSize, readBodyWithMaxSize +from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size from synapse.logging.context import make_deferred_yieldable from synapse.util import Clock, json_decoder from synapse.util.caches.ttlcache import TTLCache @@ -56,7 +56,7 @@ WELL_KNOWN_MIN_CACHE_PERIOD = 5 * 60 # The maximum size (in bytes) to allow a well-known file to be. -WELL_KNOW_MAX_SIZE = 50 * 1024 # 50 KiB +WELL_KNOWN_MAX_SIZE = 50 * 1024 # 50 KiB # Attempt to refetch a cached well-known N% of the TTL before it expires. # e.g. if set to 0.2 and we have a cached entry with a TTL of 5mins, then @@ -260,7 +260,7 @@ async def _make_well_known_request( ) body_stream = BytesIO() await make_deferred_yieldable( - readBodyWithMaxSize(response, body_stream, WELL_KNOW_MAX_SIZE) + read_body_with_max_size(response, body_stream, WELL_KNOWN_MAX_SIZE) ) body = body_stream.getvalue() @@ -277,7 +277,7 @@ async def _make_well_known_request( logger.warning( "Requested .well-known file for %s is too large > %r bytes", server_name.decode("ascii"), - WELL_KNOW_MAX_SIZE, + WELL_KNOWN_MAX_SIZE, ) raise _FetchWellKnownFailure(temporary=True) except Exception as e: diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index db0da2bec9a8..b261e078c469 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -49,7 +49,7 @@ BlacklistingReactorWrapper, BodyExceededMaxSize, encode_query_args, - readBodyWithMaxSize, + read_body_with_max_size, ) from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.logging.context import make_deferred_yieldable @@ -978,7 +978,7 @@ async def get_file( headers = dict(response.headers.getAllRawHeaders()) try: - d = readBodyWithMaxSize(response, output_stream, max_size) + d = read_body_with_max_size(response, output_stream, max_size) d.addTimeout(self.default_timeout, self.reactor) length = await make_deferred_yieldable(d) except BodyExceededMaxSize: diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index ad8d849c1fd0..4e51839d0fb2 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -36,7 +36,7 @@ from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.http.federation.srv_resolver import Server from synapse.http.federation.well_known_resolver import ( - WELL_KNOW_MAX_SIZE, + WELL_KNOWN_MAX_SIZE, WellKnownResolver, _cache_period_from_headers, ) @@ -1127,7 +1127,7 @@ def test_well_known_too_large(self): client_factory, expected_sni=b"testserv", response_headers={b"Cache-Control": b"max-age=1000"}, - content=b'{ "m.server": "' + (b"a" * WELL_KNOW_MAX_SIZE) + b'" }', + content=b'{ "m.server": "' + (b"a" * WELL_KNOWN_MAX_SIZE) + b'" }', ) # The result is sucessful, but disabled delegation. From 57391a486145310168325e4194075248ff3bb659 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 16 Dec 2020 14:51:40 -0500 Subject: [PATCH 5/5] Add back log config. --- docs/sample_log_config.yaml | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml index e69de29bb2d1..ff3c747180a5 100644 --- a/docs/sample_log_config.yaml +++ b/docs/sample_log_config.yaml @@ -0,0 +1,71 @@ +# Log configuration for Synapse. +# +# This is a YAML file containing a standard Python logging configuration +# dictionary. See [1] for details on the valid settings. +# +# Synapse also supports structured logging for machine readable logs which can +# be ingested by ELK stacks. See [2] for details. +# +# [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema +# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md + +version: 1 + +formatters: + precise: + format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + +handlers: + file: + class: logging.handlers.TimedRotatingFileHandler + formatter: precise + filename: /var/log/matrix-synapse/homeserver.log + when: midnight + backupCount: 3 # Does not include the current log file. + encoding: utf8 + + # Default to buffering writes to log file for efficiency. This means that + # will be a delay for INFO/DEBUG logs to get written, but WARNING/ERROR + # logs will still be flushed immediately. + buffer: + class: logging.handlers.MemoryHandler + target: file + # The capacity is the number of log lines that are buffered before + # being written to disk. Increasing this will lead to better + # performance, at the expensive of it taking longer for log lines to + # be written to disk. + capacity: 10 + flushLevel: 30 # Flush for WARNING logs as well + + # A handler that writes logs to stderr. Unused by default, but can be used + # instead of "buffer" and "file" in the logger handlers. + console: + class: logging.StreamHandler + formatter: precise + +loggers: + synapse.storage.SQL: + # beware: increasing this to DEBUG will make synapse log sensitive + # information such as access tokens. + level: INFO + + twisted: + # We send the twisted logging directly to the file handler, + # to work around https://github.com/matrix-org/synapse/issues/3471 + # when using "buffer" logger. Use "console" to log to stderr instead. + handlers: [file] + propagate: false + +root: + level: INFO + + # Write logs to the `buffer` handler, which will buffer them together in memory, + # then write them to a file. + # + # Replace "buffer" with "console" to log to stderr instead. (Note that you'll + # also need to update the configuration for the `twisted` logger above, in + # this case.) + # + handlers: [buffer] + +disable_existing_loggers: false