diff --git a/README.md b/README.md index de11dc7..16f78c6 100644 --- a/README.md +++ b/README.md @@ -56,9 +56,9 @@ deployment instructions) is the configuration format: * `acceptedMimeType` is renamed `scan.allowed_mimetypes` * `requestHeader` is renamed `download.additional_headers` and turned into a dictionary. -Note that the format of the cryptographic pickle file and key are compatible between this -project and the legacy Matrix Content Scanner. If no file exist at that path one will be -created automatically. +Note that the format of the cryptographic pickle file and key are compatible between +this project and the legacy Matrix Content Scanner. If no file exist at that path one will +be created automatically. ## Development diff --git a/config.sample.yaml b/config.sample.yaml index f4a8834..f8d4f4e 100644 --- a/config.sample.yaml +++ b/config.sample.yaml @@ -100,10 +100,12 @@ download: # Configuration for decrypting Olm-encrypted request bodies. crypto: - # The path to the Olm pickle file. + # The path to the Olm pickle file. This file contains the key pair to use when + # encrypting and decrypting encrypted POST request bodies. + # A new keypair will be created at startup if the pickle file doesn't already exist. # Required. pickle_path: "./pickle" - # The key to the pickle. + # The key to use to decode the pickle file. # Required. pickle_key: "this_is_a_secret" diff --git a/matrix_content_scanner/crypto.py b/matrix_content_scanner/crypto.py new file mode 100644 index 0000000..fc52b9c --- /dev/null +++ b/matrix_content_scanner/crypto.py @@ -0,0 +1,117 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +from typing import TYPE_CHECKING + +from olm.pk import PkDecryption, PkDecryptionError, PkMessage + +from matrix_content_scanner.utils.constants import ErrCode +from matrix_content_scanner.utils.errors import ConfigError, ContentScannerRestError +from matrix_content_scanner.utils.types import JsonDict + +if TYPE_CHECKING: + from matrix_content_scanner.mcs import MatrixContentScanner + + +logger = logging.getLogger(__name__) + + +class CryptoHandler: + """Handler for handling Olm-encrypted request bodies.""" + + def __init__(self, mcs: "MatrixContentScanner") -> None: + key = mcs.config.crypto.pickle_key + path = mcs.config.crypto.pickle_path + + # Try reading the pickle file from disk. + try: + with open(path, "r") as fp: + pickle = fp.read() + + # Create a PkDecryption object with the content and key. + try: + self._decryptor: PkDecryption = PkDecryption.from_pickle( + pickle=pickle.encode("ascii"), + passphrase=key, + ) + except PkDecryptionError as e: + # If we failed to extract the key pair from the pickle file, it's likely + # because the key is incorrect, or there's an issue with the file's + # content. + raise ConfigError( + "Configured value for crypto.pickle_key is incorrect or pickle file" + f" is corrupted (Olm error code: {e})" + ) + + logger.info("Loaded Olm key pair from pickle file %s", path) + + except FileNotFoundError: + logger.info( + "Pickle file not found, generating a new Olm key pair and storing it in" + " pickle file %s", + path, + ) + + # Generate a new key pair and turns it into a pickle. + self._decryptor = PkDecryption() + pickle_bytes = self._decryptor.pickle(passphrase=key) + + # Try to write the pickle's content into a file. + try: + with open(path, "w+") as fp: + fp.write(pickle_bytes.decode("ascii")) + except OSError as e: + raise ConfigError( + "Failed to write the pickle file at the location configured for" + f" crypto.pickle_path ({path}): {e}" + ) + + except OSError as e: + raise ConfigError( + "Failed to read the pickle file at the location configured for" + f" crypto.pickle_path ({path}): {e}" + ) + + self.public_key = self._decryptor.public_key + + def decrypt_body(self, ciphertext: str, mac: str, ephemeral: str) -> JsonDict: + """Decrypts an Olm-encrypted body. + + Args: + ciphertext: The encrypted body's ciphertext. + mac: The encrypted body's MAC. + ephemeral: The encrypted body's ephemeral key. + + Returns: + The decrypted body, parsed as JSON. + """ + try: + decrypted = self._decryptor.decrypt( + message=PkMessage( + ephemeral_key=ephemeral, + mac=mac, + ciphertext=ciphertext, + ) + ) + except PkDecryptionError as e: + logger.error("Failed to decrypt encrypted body: %s", e) + raise ContentScannerRestError( + http_status=400, + reason=ErrCode.FAILED_TO_DECRYPT, + info=str(e), + ) + + # We know that `decrypted` parses as a JsonDict. + return json.loads(decrypted) # type: ignore[no-any-return] diff --git a/matrix_content_scanner/mcs.py b/matrix_content_scanner/mcs.py index 57d8333..ab21f0d 100644 --- a/matrix_content_scanner/mcs.py +++ b/matrix_content_scanner/mcs.py @@ -24,6 +24,7 @@ from matrix_content_scanner import logutils from matrix_content_scanner.config import MatrixContentScannerConfig +from matrix_content_scanner.crypto import CryptoHandler from matrix_content_scanner.httpserver import HTTPServer from matrix_content_scanner.scanner.file_downloader import FileDownloader from matrix_content_scanner.scanner.scanner import Scanner @@ -58,6 +59,10 @@ def file_downloader(self) -> FileDownloader: def scanner(self) -> Scanner: return Scanner(self) + @cached_property + def crypto_handler(self) -> CryptoHandler: + return CryptoHandler(self) + def start(self) -> None: """Start the HTTP server and start the reactor.""" setup_logging() @@ -106,9 +111,19 @@ def setup_logging() -> None: except (ConfigError, ScannerError) as e: # If there's an error reading the file, print it and exit without raising so we # don't confuse/annoy the user with an unnecessary stack trace. - logger.error("Failed to read configuration file: %s", e) + print("Failed to read configuration file: %s" % e, file=sys.stderr) sys.exit(1) - # Start the content scanner. + # Create the content scanner. mcs = MatrixContentScanner(cfg) + + # Construct the crypto handler early on, so we can make sure we can load the Olm key + # pair from the pickle file (or write it if it doesn't already exist). + try: + _ = mcs.crypto_handler + except ConfigError as e: + print(e, file=sys.stderr) + sys.exit(1) + + # Start the content scanner. mcs.start() diff --git a/tests/test_crypto.py b/tests/test_crypto.py new file mode 100644 index 0000000..e5d224f --- /dev/null +++ b/tests/test_crypto.py @@ -0,0 +1,43 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import unittest + +from olm.pk import PkEncryption + +from tests.testutils import get_content_scanner + + +class CryptoHandlerTestCase(unittest.TestCase): + def setUp(self) -> None: + self.crypto_handler = get_content_scanner().crypto_handler + + def test_decrypt(self) -> None: + """Tests that an Olm-encrypted payload is successfully decrypted.""" + payload = {"foo": "bar"} + + # Encrypt the payload with PkEncryption. + pke = PkEncryption(self.crypto_handler.public_key) + encrypted = pke.encrypt(json.dumps(payload)) + + # Decrypt the payload with the crypto handler. + decrypted = self.crypto_handler.decrypt_body( + encrypted.ciphertext, + encrypted.mac, + encrypted.ephemeral_key, + ) + + # Check that the decrypted payload is the same as the original one before + # encryption. + self.assertEqual(decrypted, payload) diff --git a/tests/testutils.py b/tests/testutils.py index c5d0362..f081771 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -123,4 +123,6 @@ def get_content_scanner(config: Optional[JsonDict] = None) -> MatrixContentScann # all required settings in that section. default_config.update(config) - return MatrixContentScanner(MatrixContentScannerConfig(default_config)) + parsed_config = MatrixContentScannerConfig(default_config) + + return MatrixContentScanner(parsed_config) diff --git a/tox.ini b/tox.ini index 08ce094..1ee6265 100644 --- a/tox.ini +++ b/tox.ini @@ -33,5 +33,13 @@ commands = extras = dev +# The current version of python-olm that's on PyPI does not include a types marker. +# Hopefully that's something we can fix at some point, but in the mean time let's not +# block things on this and instead use the wheels on gitlab.matrix.org's repository (which +# do have a type marker). We use --index-url (and not --extra-index-url) so that pip does +# not try to download the python-olm that's on pypi.org. This is fine because GitLab will +# redirect requests for packages it doesn't know about to pypi.org. +install_command = python -m pip install --index-url=https://gitlab.matrix.org/api/v4/projects/27/packages/pypi/simple {opts} {packages} + commands = mypy matrix_content_scanner tests