diff --git a/tests/test_trusted_metadata_set.py b/tests/test_trusted_metadata_set.py new file mode 100644 index 0000000000..b59e9de78b --- /dev/null +++ b/tests/test_trusted_metadata_set.py @@ -0,0 +1,124 @@ +import json +import logging +import os +import shutil +import sys +import tempfile +import unittest + +from tuf import exceptions +from tuf.api.metadata import Metadata +from tuf.ngclient._internal.trusted_metadata_set import TrustedMetadataSet + +from tests import utils + +logger = logging.getLogger(__name__) + +class TestTrustedMetadataSet(unittest.TestCase): + + def test_update(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + + with open(os.path.join(repo_dir, "root.json"), "rb") as f: + trusted_set = TrustedMetadataSet(f.read()) + trusted_set.root_update_finished() + + with open(os.path.join(repo_dir, "timestamp.json"), "rb") as f: + trusted_set.update_timestamp(f.read()) + with open(os.path.join(repo_dir, "snapshot.json"), "rb") as f: + trusted_set.update_snapshot(f.read()) + with open(os.path.join(repo_dir, "targets.json"), "rb") as f: + trusted_set.update_targets(f.read()) + with open(os.path.join(repo_dir, "role1.json"), "rb") as f: + trusted_set.update_delegated_targets(f.read(), "role1", "targets") + with open(os.path.join(repo_dir, "role2.json"), "rb") as f: + trusted_set.update_delegated_targets(f.read(), "role2", "role1") + + def test_out_of_order_ops(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() + + trusted_set = TrustedMetadataSet(data["root"]) + + # Update timestamp before root is finished + with self.assertRaises(RuntimeError): + trusted_set.update_timestamp(data["timestamp"]) + + trusted_set.root_update_finished() + with self.assertRaises(RuntimeError): + trusted_set.root_update_finished() + + # Update snapshot before timestamp + with self.assertRaises(RuntimeError): + trusted_set.update_snapshot(data["snapshot"]) + + trusted_set.update_timestamp(data["timestamp"]) + + # Update targets before snapshot + with self.assertRaises(RuntimeError): + trusted_set.update_targets(data["targets"]) + + trusted_set.update_snapshot(data["snapshot"]) + + #update timestamp after snapshot + with self.assertRaises(RuntimeError): + trusted_set.update_timestamp(data["timestamp"]) + + # Update delegated targets before targets + with self.assertRaises(RuntimeError): + trusted_set.update_delegated_targets(data["role1"], "role1", "targets") + + trusted_set.update_targets(data["targets"]) + trusted_set.update_delegated_targets(data["role1"], "role1", "targets") + + def test_update_with_invalid_json(self): + repo_dir = os.path.join(os.getcwd(), 'repository_data', 'repository', 'metadata') + data={} + for md in ["root", "timestamp", "snapshot", "targets", "role1"]: + with open(os.path.join(repo_dir, f"{md}.json"), "rb") as f: + data[md] = f.read() + + # root.json not a json file at all + with self.assertRaises(exceptions.RepositoryError): + TrustedMetadataSet(b"") + # root.json is invalid + root = Metadata.from_bytes(data["root"]) + root.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + TrustedMetadataSet(json.dumps(root.to_dict()).encode()) + + trusted_set = TrustedMetadataSet(data["root"]) + trusted_set.root_update_finished() + + top_level_md = [ + (data["timestamp"], trusted_set.update_timestamp), + (data["snapshot"], trusted_set.update_snapshot), + (data["targets"], trusted_set.update_targets), + ] + for metadata, update_func in top_level_md: + # metadata is not json + with self.assertRaises(exceptions.RepositoryError): + update_func(b"") + # metadata is invalid + md = Metadata.from_bytes(metadata) + md.signed.version += 1 + with self.assertRaises(exceptions.RepositoryError): + update_func(json.dumps(md.to_dict()).encode()) + + # metadata is of wrong type + with self.assertRaises(exceptions.RepositoryError): + update_func(data["root"]) + + update_func(metadata) + + + # TODO test updating over initial metadata (new keys, newer timestamp, etc) + # TODO test the actual specification checks + + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py new file mode 100644 index 0000000000..eec10d73cb --- /dev/null +++ b/tests/test_updater_ng.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python + +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Test Updater class +""" + +import os +import shutil +import tempfile +import logging +import sys +import unittest +import tuf.unittest_toolbox as unittest_toolbox + +from tests import utils +from tuf import ngclient + +logger = logging.getLogger(__name__) + + +class TestUpdater(unittest_toolbox.Modified_TestCase): + + @classmethod + def setUpClass(cls): + # Create a temporary directory to store the repository, metadata, and target + # files. 'temporary_directory' must be deleted in TearDownModule() so that + # temporary files are always removed, even when exceptions occur. + cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + + # Needed because in some tests simple_server.py cannot be found. + # The reason is that the current working directory + # has been changed when executing a subprocess. + cls.SIMPLE_SERVER_PATH = os.path.join(os.getcwd(), 'simple_server.py') + + # Launch a SimpleHTTPServer (serves files in the current directory). + # Test cases will request metadata and target files that have been + # pre-generated in 'tuf/tests/repository_data', which will be served + # by the SimpleHTTPServer launched here. The test cases of 'test_updater.py' + # assume the pre-generated metadata files have a specific structure, such + # as a delegated role 'targets/role1', three target files, five key files, + # etc. + cls.server_process_handler = utils.TestServerProcess(log=logger, + server=cls.SIMPLE_SERVER_PATH) + + + + @classmethod + def tearDownClass(cls): + # Cleans the resources and flush the logged lines (if any). + cls.server_process_handler.clean() + + # Remove the temporary repository directory, which should contain all the + # metadata, targets, and key files generated for the test cases + shutil.rmtree(cls.temporary_directory) + + + + def setUp(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.setUp(self) + + # Copy the original repository files provided in the test folder so that + # any modifications made to repository files are restricted to the copies. + # The 'repository_data' directory is expected to exist in 'tuf.tests/'. + original_repository_files = os.path.join(os.getcwd(), 'repository_data') + temporary_repository_root = \ + self.make_temp_directory(directory=self.temporary_directory) + + # The original repository, keystore, and client directories will be copied + # for each test case. + original_repository = os.path.join(original_repository_files, 'repository') + original_keystore = os.path.join(original_repository_files, 'keystore') + original_client = os.path.join(original_repository_files, 'client', 'test_repository1', 'metadata', 'current') + + # Save references to the often-needed client repository directories. + # Test cases need these references to access metadata and target files. + self.repository_directory = \ + os.path.join(temporary_repository_root, 'repository') + self.keystore_directory = \ + os.path.join(temporary_repository_root, 'keystore') + + self.client_directory = os.path.join(temporary_repository_root, 'client') + + # Copy the original 'repository', 'client', and 'keystore' directories + # to the temporary repository the test cases can use. + shutil.copytree(original_repository, self.repository_directory) + shutil.copytree(original_client, self.client_directory) + shutil.copytree(original_keystore, self.keystore_directory) + + # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. + repository_basepath = self.repository_directory[len(os.getcwd()):] + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + + str(self.server_process_handler.port) + repository_basepath + + metadata_url = f"{url_prefix}/metadata/" + targets_url = f"{url_prefix}/targets/" + # Creating a repository instance. The test cases will use this client + # updater to refresh metadata, fetch target files, etc. + self.repository_updater = ngclient.Updater(self.client_directory, + metadata_url, + targets_url) + + def tearDown(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.tearDown(self) + + # Logs stdout and stderr from the sever subprocess. + self.server_process_handler.flush_log() + + def test_refresh(self): + # All metadata is in local directory already + self.repository_updater.refresh() + + # Get targetinfo for 'file1.txt' listed in targets + targetinfo1 = self.repository_updater.get_one_valid_targetinfo('file1.txt') + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') + + destination_directory = self.make_temp_directory() + updated_targets = self.repository_updater.updated_targets([targetinfo1, targetinfo3], + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo1, targetinfo3]) + + self.repository_updater.download_target(targetinfo1, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, [targetinfo3]) + + + self.repository_updater.download_target(targetinfo3, destination_directory) + updated_targets = self.repository_updater.updated_targets(updated_targets, + destination_directory) + + self.assertListEqual(updated_targets, []) + + def test_refresh_with_only_local_root(self): + os.remove(os.path.join(self.client_directory, "timestamp.json")) + os.remove(os.path.join(self.client_directory, "snapshot.json")) + os.remove(os.path.join(self.client_directory, "targets.json")) + os.remove(os.path.join(self.client_directory, "role1.json")) + + self.repository_updater.refresh() + + # Get targetinfo for 'file3.txt' listed in the delegated role1 + targetinfo3= self.repository_updater.get_one_valid_targetinfo('file3.txt') + +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() diff --git a/tox.ini b/tox.ini index 94d0d0683f..f460f69c5a 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ changedir = tests commands = python --version python -m coverage run aggregate_tests.py - python -m coverage report -m --fail-under 97 + python -m coverage report -m --fail-under 97 --omit "{toxinidir}/tuf/ngclient/*" deps = -r{toxinidir}/requirements-test.txt @@ -43,13 +43,13 @@ changedir = {toxinidir} commands = # Use different configs for new (tuf/api/*) and legacy code # TODO: configure black and isort args in pyproject.toml (see #1161) - black --check --diff --line-length 80 tuf/api - isort --check --diff --line-length 80 --profile black -p tuf tuf/api - pylint -j 0 tuf/api --rcfile=tuf/api/pylintrc + black --check --diff --line-length 80 tuf/api tuf/ngclient + isort --check --diff --line-length 80 --profile black -p tuf tuf/api tuf/ngclient + pylint -j 0 tuf/api tuf/ngclient --rcfile=tuf/api/pylintrc # NOTE: Contrary to what the pylint docs suggest, ignoring full paths does # work, unfortunately each subdirectory has to be ignored explicitly. - pylint -j 0 tuf --ignore=tuf/api,tuf/api/serialization + pylint -j 0 tuf --ignore=tuf/api,tuf/api/serialization,tuf/ngclient,tuf/ngclient/_internal mypy diff --git a/tuf/exceptions.py b/tuf/exceptions.py index 04eed8bc5e..2a24a0429e 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -70,7 +70,10 @@ class UnsupportedAlgorithmError(Error): class LengthOrHashMismatchError(Error): """Indicate an error while checking the length and hash values of an object""" -class BadHashError(Error): +class RepositoryError(Error): + """Indicate an error with a repository's state, such as a missing file.""" + +class BadHashError(RepositoryError): """Indicate an error while checking the value of a hash object.""" def __init__(self, expected_hash: str, observed_hash: str): @@ -92,9 +95,6 @@ def __repr__(self) -> str: # self.__class__.__name__ + '(' + repr(self.expected_hash) + ', ' + # repr(self.observed_hash) + ')') -class BadVersionNumberError(Error): - """Indicate an error for metadata that contains an invalid version number.""" - class BadPasswordError(Error): """Indicate an error after encountering an invalid password.""" @@ -104,8 +104,8 @@ class UnknownKeyError(Error): """Indicate an error while verifying key-like objects (e.g., keyids).""" -class RepositoryError(Error): - """Indicate an error with a repository's state, such as a missing file.""" +class BadVersionNumberError(RepositoryError): + """Indicate an error for metadata that contains an invalid version number.""" class MissingLocalRepositoryError(RepositoryError): @@ -120,35 +120,29 @@ class ForbiddenTargetError(RepositoryError): """Indicate that a role signed for a target that it was not delegated to.""" -class ExpiredMetadataError(Error): +class ExpiredMetadataError(RepositoryError): """Indicate that a TUF Metadata file has expired.""" class ReplayedMetadataError(RepositoryError): """Indicate that some metadata has been replayed to the client.""" - def __init__(self, metadata_role: str, previous_version: int, current_version: int): + def __init__(self, metadata_role: str, downloaded_version: int, current_version: int): super(ReplayedMetadataError, self).__init__() self.metadata_role = metadata_role - self.previous_version = previous_version + self.downloaded_version = downloaded_version self.current_version = current_version def __str__(self) -> str: return ( 'Downloaded ' + repr(self.metadata_role) + ' is older (' + - repr(self.previous_version) + ') than the version currently ' + repr(self.downloaded_version) + ') than the version currently ' 'installed (' + repr(self.current_version) + ').') def __repr__(self) -> str: return self.__class__.__name__ + ' : ' + str(self) - # # Directly instance-reproducing: - # return ( - # self.__class__.__name__ + '(' + repr(self.metadata_role) + ', ' + - # repr(self.previous_version) + ', ' + repr(self.current_version) + ')') - - class CryptoError(Error): """Indicate any cryptography-related errors.""" @@ -250,7 +244,7 @@ class InvalidNameError(Error): """Indicate an error while trying to validate any type of named object.""" -class UnsignedMetadataError(Error): +class UnsignedMetadataError(RepositoryError): """Indicate metadata object with insufficient threshold of signatures.""" # signable is not used but kept in method signature for backwards compat diff --git a/tuf/ngclient/README.md b/tuf/ngclient/README.md new file mode 100644 index 0000000000..ad1b396c0d --- /dev/null +++ b/tuf/ngclient/README.md @@ -0,0 +1,22 @@ +## Next-gen TUF client for Python + +This package provides modules for TUF client implementers. + +**tuf.ngclient.Updater** is a class that implements the client workflow +described in the TUF specification (see +https://theupdateframework.github.io/specification/latest/#detailed-client-workflow) + +**tuf.ngclient.FetcherInterface** is an abstract class that client +implementers can implement a concrete class of in order to reuse their +own networking/download libraries -- a Requests-based implementation is +used by default. + +This package: +* Aims to be a clean, easy-to-validate reference client implementation + written in modern Python +* At the same time aims to be the library of choice for anyone + implementing a TUF client in Python: light-weight, easy to integrate + and with minimal required dependencies +* Is still under development but is planned to become the default client + in this implementation (i.e., the older `tuf.client` will be deprecated + in the future) diff --git a/tuf/ngclient/__init__.py b/tuf/ngclient/__init__.py new file mode 100644 index 0000000000..0a572962ba --- /dev/null +++ b/tuf/ngclient/__init__.py @@ -0,0 +1,8 @@ +# Copyright New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client public API +""" + +from tuf.ngclient.fetcher import FetcherInterface +from tuf.ngclient.updater import Updater diff --git a/tuf/ngclient/_internal/__init__.py b/tuf/ngclient/_internal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tuf/ngclient/_internal/download.py b/tuf/ngclient/_internal/download.py new file mode 100644 index 0000000000..31b59f6630 --- /dev/null +++ b/tuf/ngclient/_internal/download.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + download.py + + + February 21, 2012. Based on previous version by Geremy Condra. + + + Konstantin Andrianov + Vladimir Diaz + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Download metadata and target files and check their validity. The hash and + length of a downloaded file has to match the hash and length supplied by the + metadata of that file. +""" + +import logging +import tempfile +import timeit +from urllib import parse + +from securesystemslib import formats as sslib_formats + +import tuf +from tuf import exceptions, formats + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + + +def download_file(url, required_length, fetcher, strict_required_length=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + strict_required_length: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + sslib_formats.URL_SCHEMA.check_match(url) + formats.LENGTH_SCHEMA.check_match(required_length) + + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in + # a url with their single-character equivalent. A back-slash may be + # encoded as %5c in the url, which should also be replaced with a forward + # slash. + url = parse.unquote(url).replace("\\", "/") + logger.info("Downloading: %s", url) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() # pylint: disable=consider-using-with + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = ( + number_of_bytes_received / seconds_spent_receiving + ) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + logger.debug( + "The average download speed dropped below the minimum" + " average download speed set in tuf.settings.py." + " Stopping the download!" + ) + break + + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) + + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length( + number_of_bytes_received, + required_length, + strict_required_length=strict_required_length, + average_download_speed=average_download_speed, + ) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug("Could not download URL: %s", url) + raise + + else: + temp_file.seek(0) + return temp_file + + +def download_bytes(url, required_length, fetcher, strict_required_length=True): + """Download bytes from given url + + Returns the downloaded bytes, otherwise like download_file() + """ + with download_file( + url, required_length, fetcher, strict_required_length + ) as dl_file: + return dl_file.read() + + +def _check_downloaded_length( + total_downloaded, + required_length, + strict_required_length=True, + average_download_speed=None, +): + """ + + A helper function which checks whether the total number of downloaded + bytes matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in + question. + + required_length: + The total number of bytes expected of the file as seen from its metadata + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + strict_required_length: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + strict_required_length is True and total_downloaded is not equal + required_length. + + exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info("Downloaded %d bytes as expected.", total_downloaded) + + else: + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if strict_required_length: + logger.info( + "Downloaded %d bytes, but expected %d bytes", + total_downloaded, + required_length, + ) + + # If the average download speed is below a certain threshold, we + # flag this as a possible slow-retrieval attack. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise exceptions.SlowRetrievalError(average_download_speed) + + raise exceptions.DownloadLengthMismatchError( + required_length, total_downloaded + ) + + # We specifically disabled strict checking of required length, but + # we will log a warning anyway. This is useful when we wish to + # download the Timestamp or Root metadata, for which we have no + # signed metadata; so, we must guess a reasonable required_length + # for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise exceptions.SlowRetrievalError(average_download_speed) + + logger.debug( + "Good average download speed: %f bytes per second", + average_download_speed, + ) + + logger.info( + "Downloaded %d bytes out of upper limit of %d bytes.", + total_downloaded, + required_length, + ) diff --git a/tuf/ngclient/_internal/requests_fetcher.py b/tuf/ngclient/_internal/requests_fetcher.py new file mode 100644 index 0000000000..6913b27edd --- /dev/null +++ b/tuf/ngclient/_internal/requests_fetcher.py @@ -0,0 +1,187 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +import logging +import time +from urllib import parse + +# Imports +import requests +import urllib3.exceptions + +import tuf +from tuf import exceptions, settings +from tuf.ngclient.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if + # you're making several requests to the same host, the underlying TCP + # connection will be reused, which can result in a significant + # performance increase (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname + # combination, in order to reuse connections to the same hostname to + # improve efficiency, but avoiding sharing state between different + # hosts-scheme combinations to minimize subtle security issues. + # Some cookies may not be HTTP-safe. + self._sessions = {} + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. + + Raises: + exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by + # requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get( + url, stream=True, timeout=settings.SOCKET_TIMEOUT + ) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise exceptions.FetcherHTTPError(str(e), status) + + # Define a generator function to be returned by fetch. This way the + # caller of fetch can differentiate between connection and actual data + # download and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is + # so that we can defend against slow retrieval attacks. + # Furthermore, we do not wish to download an extremely + # large file in one shot. Before beginning the round, sleep + # (if set) for a short amount of time so that the CPU is not + # hogged in the while loop. + if settings.SLEEP_BEFORE_ROUND: + time.sleep(settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + settings.CHUNK_SIZE, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a + # Content-Encoding header, which may cause urllib3 to + # misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes + # downloaded. + if not data: + logger.debug( + "Downloaded %d out of %d bytes", + bytes_received, + required_length, + ) + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise exceptions.URLParsingError( + "Could not get scheme and hostname from URL: " + url + ) + + session_index = parsed_url.scheme + "+" + parsed_url.hostname + + logger.debug("url: %s", url) + logger.debug("session index: %s", session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers["User-Agent"] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = ( + "tuf/" + tuf.__version__ + " " + requests_user_agent + ) + session.headers.update( + { + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + "Accept-Encoding": "identity", + # The TUF user agent. + "User-Agent": tuf_user_agent, + } + ) + + logger.debug("Made new session %s", session_index) + + else: + logger.debug("Reusing session %s", session_index) + + return session diff --git a/tuf/ngclient/_internal/trusted_metadata_set.py b/tuf/ngclient/_internal/trusted_metadata_set.py new file mode 100644 index 0000000000..ff629eff8c --- /dev/null +++ b/tuf/ngclient/_internal/trusted_metadata_set.py @@ -0,0 +1,463 @@ +# Copyright the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Trusted collection of client-side TUF Metadata + +TrustedMetadataSet keeps track of the current valid set of metadata for the +client, and handles almost every step of the "Detailed client workflow" ( +https://theupdateframework.github.io/specification/latest#detailed-client-workflow) +in the TUF specification: the remaining steps are related to filesystem and +network IO, which are not handled here. + +Loaded metadata can be accessed via index access with rolename as key +(trusted_set["root"]) or, in the case of top-level metadata, using the helper +properties (trusted_set.root). + +The rules for top-level metadata are + * Metadata is updatable only if metadata it depends on is loaded + * Metadata is not updatable if any metadata depending on it has been loaded + * Metadata must be updated in order: + root -> timestamp -> snapshot -> targets -> (delegated targets) + +Exceptions are raised if metadata fails to load in any way. + +Example of loading root, timestamp and snapshot: + +>>> # Load local root (RepositoryErrors here stop the update) +>>> with open(root_path, "rb") as f: +>>> trusted_set = TrustedMetadataSet(f.read()) +>>> +>>> # update root from remote until no more are available +>>> with download("root", trusted_set.root.signed.version + 1) as f: +>>> trusted_set.update_root(f.read()) +>>> # ... +>>> trusted_set.root_update_finished() +>>> +>>> # load local timestamp, then update from remote +>>> try: +>>> with open(timestamp_path, "rb") as f: +>>> trusted_set.update_timestamp(f.read()) +>>> except (RepositoryError, OSError): +>>> pass # failure to load a local file is ok +>>> +>>> with download("timestamp") as f: +>>> trusted_set.update_timestamp(f.read()) +>>> +>>> # load local snapshot, then update from remote if needed +>>> try: +>>> with open(snapshot_path, "rb") as f: +>>> trusted_set.update_snapshot(f.read()) +>>> except (RepositoryError, OSError): +>>> # local snapshot is not valid, load from remote +>>> # (RepositoryErrors here stop the update) +>>> with download("snapshot", version) as f: +>>> trusted_set.update_snapshot(f.read()) + +TODO: + * exceptions are not final: the idea is that client could just handle + a generic RepositoryError that covers every issue that server provided + metadata could inflict (other errors would be user errors), but this is not + yet the case + * usefulness of root_update_finished() can be debated: it could be done + in the beginning of load_timestamp()... + * some metadata interactions might work better in Metadata itself + * Progress through Specification update process should be documented + (not sure yet how: maybe a spec_logger that logs specification events?) +""" + +import logging +from collections import abc +from datetime import datetime +from typing import Dict, Iterator, Optional + +from securesystemslib import hash as sslib_hash + +from tuf import exceptions +from tuf.api.metadata import Metadata, Root, Targets +from tuf.api.serialization import DeserializationError + +logger = logging.getLogger(__name__) + +# This is a placeholder until ... +# TODO issue 1306: implement this in Metadata API +def verify_with_threshold( + delegator: Metadata, role_name: str, unverified: Metadata +) -> bool: + """Verify 'unverified' with keys and threshold defined in delegator""" + role = None + keys = {} + if isinstance(delegator.signed, Root): + keys = delegator.signed.keys + role = delegator.signed.roles.get(role_name) + elif isinstance(delegator.signed, Targets): + if delegator.signed.delegations: + keys = delegator.signed.delegations.keys + # role names are unique: first match is enough + roles = delegator.signed.delegations.roles + role = next((r for r in roles if r.name == role_name), None) + else: + raise ValueError("Call is valid only on delegator metadata") + + if role is None: + raise ValueError(f"Delegated role {role_name} not found") + + # verify that delegate is signed by correct threshold of unique keys + unique_keys = set() + for keyid in role.keyids: + key = keys[keyid] + try: + key.verify_signature(unverified) + unique_keys.add(key.keyval["public"]) + except Exception as e: # pylint: disable=broad-except + # TODO specify the Exceptions (see issue #1351) + logger.info("verify failed: %s", e) + + return len(unique_keys) >= role.threshold + + +class TrustedMetadataSet(abc.Mapping): + """Internal class to keep track of trusted metadata in Updater + + TrustedMetadataSet ensures that the collection of metadata in it is valid + and trusted through the whole client update workflow. It provides easy ways + to update the metadata with the caller making decisions on what is updated. + """ + + def __init__(self, root_data: bytes): + """Initialize TrustedMetadataSet by loading trusted root metadata + + Args: + root_data: Trusted root metadata as bytes. Note that this metadata + will only be verified by itself: it is the source of trust for + all metadata in the TrustedMetadataSet + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + self._trusted_set = {} # type: Dict[str: Metadata] + self.reference_time = datetime.utcnow() + self._root_update_finished = False + + # Load and validate the local root metadata. Valid initial trusted root + # metadata is required + logger.debug("Updating initial trusted root") + self.update_root(root_data) + + def __getitem__(self, role: str) -> Metadata: + """Returns current Metadata for 'role'""" + return self._trusted_set[role] + + def __len__(self) -> int: + """Returns number of Metadata objects in TrustedMetadataSet""" + return len(self._trusted_set) + + def __iter__(self) -> Iterator[Metadata]: + """Returns iterator over all Metadata objects in TrustedMetadataSet""" + return iter(self._trusted_set) + + # Helper properties for top level metadata + @property + def root(self) -> Optional[Metadata]: + """Current root Metadata or None""" + return self._trusted_set.get("root") + + @property + def timestamp(self) -> Optional[Metadata]: + """Current timestamp Metadata or None""" + return self._trusted_set.get("timestamp") + + @property + def snapshot(self) -> Optional[Metadata]: + """Current snapshot Metadata or None""" + return self._trusted_set.get("snapshot") + + @property + def targets(self) -> Optional[Metadata]: + """Current targets Metadata or None""" + return self._trusted_set.get("targets") + + # Methods for updating metadata + def update_root(self, data: bytes): + """Verifies and loads 'data' as new root metadata. + + Note that an expired intermediate root is considered valid: expiry is + only checked for the final root in root_update_finished(). + + Args: + data: unverified new root metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if self._root_update_finished: + raise RuntimeError( + "Cannot update root after root update is finished" + ) + logger.debug("Updating root") + + try: + new_root = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load root") from e + + if new_root.signed.type != "root": + raise exceptions.RepositoryError( + f"Expected 'root', got '{new_root.signed.type}'" + ) + + if self.root is not None: + # We are not loading initial trusted root: verify the new one + if not verify_with_threshold(self.root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by root", new_root.signed + ) + + if new_root.signed.version != self.root.signed.version + 1: + raise exceptions.ReplayedMetadataError( + "root", new_root.signed.version, self.root.signed.version + ) + + if not verify_with_threshold(new_root, "root", new_root): + raise exceptions.UnsignedMetadataError( + "New root is not signed by itself", new_root.signed + ) + + self._trusted_set["root"] = new_root + logger.debug("Updated root") + + def root_update_finished(self): + """Marks root metadata as final and verifies it is not expired + + Raises: + ExpiredMetadataError: The final root metadata is expired. + """ + if self._root_update_finished: + raise RuntimeError("Root update is already finished") + + if self.root.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New root.json is expired") + + # No need to delete timestamp/snapshot here as specification instructs + # for fast-forward attack recovery: timestamp/snapshot can not be + # loaded at this point and when loaded later they will be verified + # with current root keys. + + self._root_update_finished = True + logger.debug("Verified final root.json") + + def update_timestamp(self, data: bytes): + """Verifies and loads 'data' as new timestamp metadata. + + Args: + data: unverified new timestamp metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if not self._root_update_finished: + raise RuntimeError("Cannot update timestamp before root") + if self.snapshot is not None: + raise RuntimeError("Cannot update timestamp after snapshot") + + try: + new_timestamp = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load timestamp") from e + + if new_timestamp.signed.type != "timestamp": + raise exceptions.RepositoryError( + f"Expected 'timestamp', got '{new_timestamp.signed.type}'" + ) + + if not verify_with_threshold(self.root, "timestamp", new_timestamp): + raise exceptions.UnsignedMetadataError( + "New timestamp is not signed by root", new_timestamp.signed + ) + + # If an existing trusted timestamp is updated, + # check for a rollback attack + if self.timestamp is not None: + # Prevent rolling back timestamp version + if new_timestamp.signed.version < self.timestamp.signed.version: + raise exceptions.ReplayedMetadataError( + "timestamp", + new_timestamp.signed.version, + self.timestamp.signed.version, + ) + # Prevent rolling back snapshot version + if ( + new_timestamp.signed.meta["snapshot.json"].version + < self.timestamp.signed.meta["snapshot.json"].version + ): + raise exceptions.ReplayedMetadataError( + "snapshot", + new_timestamp.signed.meta["snapshot.json"].version, + self.timestamp.signed.meta["snapshot.json"].version, + ) + + if new_timestamp.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New timestamp is expired") + + self._trusted_set["timestamp"] = new_timestamp + logger.debug("Updated timestamp") + + # TODO: remove pylint disable once the hash verification is in metadata.py + def update_snapshot(self, data: bytes): # pylint: disable=too-many-branches + """Verifies and loads 'data' as new snapshot metadata. + + Args: + data: unverified new snapshot metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + + if self.timestamp is None: + raise RuntimeError("Cannot update snapshot before timestamp") + if self.targets is not None: + raise RuntimeError("Cannot update snapshot after targets") + logger.debug("Updating snapshot") + + meta = self.timestamp.signed.meta["snapshot.json"] + + # Verify against the hashes in timestamp, if any + hashes = meta.hashes or {} + for algo, stored_hash in hashes.items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + observed_hash = digest_object.hexdigest() + if observed_hash != stored_hash: + raise exceptions.BadHashError(stored_hash, observed_hash) + + try: + new_snapshot = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e + + if new_snapshot.signed.type != "snapshot": + raise exceptions.RepositoryError( + f"Expected 'snapshot', got '{new_snapshot.signed.type}'" + ) + + if not verify_with_threshold(self.root, "snapshot", new_snapshot): + raise exceptions.UnsignedMetadataError( + "New snapshot is not signed by root", new_snapshot.signed + ) + + if ( + new_snapshot.signed.version + != self.timestamp.signed.meta["snapshot.json"].version + ): + raise exceptions.BadVersionNumberError( + f"Expected snapshot version " + f"{self.timestamp.signed.meta['snapshot.json'].version}, " + f"got {new_snapshot.signed.version}" + ) + + # If an existing trusted snapshot is updated, + # check for a rollback attack + if self.snapshot is not None: + for filename, fileinfo in self.snapshot.signed.meta.items(): + new_fileinfo = new_snapshot.signed.meta.get(filename) + + # Prevent removal of any metadata in meta + if new_fileinfo is None: + raise exceptions.RepositoryError( + f"New snapshot is missing info for '{filename}'" + ) + + # Prevent rollback of any metadata versions + if new_fileinfo.version < fileinfo.version: + raise exceptions.BadVersionNumberError( + f"Expected {filename} version " + f"{new_fileinfo.version}, got {fileinfo.version}." + ) + + if new_snapshot.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError("New snapshot is expired") + + self._trusted_set["snapshot"] = new_snapshot + logger.debug("Updated snapshot") + + def update_targets(self, data: bytes): + """Verifies and loads 'data' as new top-level targets metadata. + + Args: + data: unverified new targets metadata as bytes + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + self.update_delegated_targets(data, "targets", "root") + + def update_delegated_targets( + self, data: bytes, role_name: str, delegator_name: str + ): + """Verifies and loads 'data' as new metadata for target 'role_name'. + + Args: + data: unverified new metadata as bytes + role_name: The role name of the new metadata + delegator_name: The name of the role delegating to the new metadata + + Raises: + RepositoryError: Metadata failed to load or verify. The actual + error type and content will contain more details. + """ + if self.snapshot is None: + raise RuntimeError("Cannot load targets before snapshot") + + delegator = self.get(delegator_name) + if delegator is None: + raise RuntimeError("Cannot load targets before delegator") + + logger.debug("Updating %s delegated by %s", role_name, delegator_name) + + # Verify against the hashes in snapshot, if any + meta = self.snapshot.signed.meta.get(f"{role_name}.json") + if meta is None: + raise exceptions.RepositoryError( + f"Snapshot does not contain information for '{role_name}'" + ) + + hashes = meta.hashes or {} + for algo, stored_hash in hashes.items(): + digest_object = sslib_hash.digest(algo) + digest_object.update(data) + observed_hash = digest_object.hexdigest() + if observed_hash != stored_hash: + # TODO: Error should derive from RepositoryError + raise exceptions.BadHashError(stored_hash, observed_hash) + + try: + new_delegate = Metadata.from_bytes(data) + except DeserializationError as e: + raise exceptions.RepositoryError("Failed to load snapshot") from e + + if new_delegate.signed.type != "targets": + raise exceptions.RepositoryError( + f"Expected 'targets', got '{new_delegate.signed.type}'" + ) + + if not verify_with_threshold(delegator, role_name, new_delegate): + raise exceptions.UnsignedMetadataError( + f"New {role_name} is not signed by {delegator_name}", + new_delegate, + ) + + if new_delegate.signed.version != meta.version: + raise exceptions.BadVersionNumberError( + f"Expected {role_name} version " + f"{meta.version}, got {new_delegate.signed.version}." + ) + + if new_delegate.signed.is_expired(self.reference_time): + raise exceptions.ExpiredMetadataError(f"New {role_name} is expired") + + self._trusted_set[role_name] = new_delegate + logger.debug("Updated %s delegated by %s", role_name, delegator_name) diff --git a/tuf/ngclient/fetcher.py b/tuf/ngclient/fetcher.py new file mode 100644 index 0000000000..8a6cae34d7 --- /dev/null +++ b/tuf/ngclient/fetcher.py @@ -0,0 +1,41 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + + +# Classes +class FetcherInterface: + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in + bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving + data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py new file mode 100644 index 0000000000..75cf229ec0 --- /dev/null +++ b/tuf/ngclient/updater.py @@ -0,0 +1,582 @@ +# Copyright 2020, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""TUF client workflow implementation. +""" + +import fnmatch +import logging +import os +from typing import Any, Dict, List, Optional +from urllib import parse + +from securesystemslib import exceptions as sslib_exceptions +from securesystemslib import hash as sslib_hash +from securesystemslib import util as sslib_util + +from tuf import exceptions +from tuf.ngclient._internal import ( + download, + requests_fetcher, + trusted_metadata_set, +) +from tuf.ngclient.fetcher import FetcherInterface + +# Globals +MAX_ROOT_ROTATIONS = 32 +MAX_DELEGATIONS = 32 +DEFAULT_ROOT_MAX_LENGTH = 512000 # bytes +DEFAULT_TIMESTAMP_MAX_LENGTH = 16384 # bytes +DEFAULT_SNAPSHOT_MAX_LENGTH = 2000000 # bytes +DEFAULT_TARGETS_MAX_LENGTH = 5000000 # bytes + +logger = logging.getLogger(__name__) + + +class Updater: + """ + An implemetation of the TUF client workflow. + Provides a public API for integration in client applications. + """ + + def __init__( + self, + repository_dir: str, + metadata_base_url: str, + target_base_url: Optional[str] = None, + fetcher: Optional[FetcherInterface] = None, + ): + """ + Args: + repository_dir: Local metadata directory. Directory must be + writable and it must contain at least a root.json file. + metadata_base_url: Base URL for all remote metadata downloads + target_base_url: Optional; Default base URL for all remote target + downloads. Can be individually set in download_target() + fetcher: Optional; FetcherInterface implementation used to download + both metadata and targets. Default is RequestsFetcher + + Raises: + OSError: Local root.json cannot be read + RepositoryError: Local root.json is invalid + """ + self._dir = repository_dir + self._metadata_base_url = _ensure_trailing_slash(metadata_base_url) + if target_base_url is None: + self._target_base_url = None + else: + self._target_base_url = _ensure_trailing_slash(target_base_url) + + # Read trusted local root metadata + data = self._load_local_metadata("root") + self._trusted_set = trusted_metadata_set.TrustedMetadataSet(data) + + if fetcher is None: + self._fetcher = requests_fetcher.RequestsFetcher() + else: + self._fetcher = fetcher + + def refresh(self) -> None: + """ + This method downloads, verifies, and loads metadata for the top-level + roles in the specified order (root -> timestamp -> snapshot -> targets) + The expiration time for downloaded metadata is also verified. + + The metadata for delegated roles are not refreshed by this method, but + by the method that returns targetinfo (i.e., + get_one_valid_targetinfo()). + + The refresh() method should be called by the client before any target + requests. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors + """ + + self._load_root() + self._load_timestamp() + self._load_snapshot() + self._load_targets("targets", "root") + + def get_one_valid_targetinfo(self, target_path: str) -> Dict: + """ + Returns the target information for a target identified by target_path. + + As a side-effect this method downloads all the additional (delegated + targets) metadata required to return the target information. + + Args: + target_path: A target identifier that is a path-relative-URL string + (https://url.spec.whatwg.org/#path-relative-url-string). + Typically this is also the unix file path of the eventually + downloaded file. + + Raises: + OSError: New metadata could not be written to disk + RepositoryError: Metadata failed to verify in some way + TODO: download-related errors + """ + return self._preorder_depth_first_walk(target_path) + + @staticmethod + def updated_targets( + targets: List[Dict[str, Any]], destination_directory: str + ) -> List[Dict[str, Any]]: + """ + After the client has retrieved the target information for those targets + they are interested in updating, they would call this method to + determine which targets have changed from those saved locally on disk. + All the targets that have changed are returned in a list. From this + list, they can request a download by calling 'download_target()'. + """ + # Keep track of the target objects and filepaths of updated targets. + # Return 'updated_targets' and use 'updated_targetpaths' to avoid + # duplicates. + updated_targets = [] + updated_targetpaths = [] + + for target in targets: + # Prepend 'destination_directory' to the target's relative filepath + # (as stored in metadata.) Verify the hash of 'target_filepath' + # against each hash listed for its fileinfo. Note: join() discards + # 'destination_directory' if 'filepath' contains a leading path + # separator (i.e., is treated as an absolute path). + filepath = target["filepath"] + target_filepath = os.path.join(destination_directory, filepath) + + if target_filepath in updated_targetpaths: + continue + + # Try one of the algorithm/digest combos for a mismatch. We break + # as soon as we find a mismatch. + for algorithm, digest in target["fileinfo"].hashes.items(): + digest_object = None + try: + digest_object = sslib_hash.digest_filename( + target_filepath, algorithm=algorithm + ) + + # This exception will occur if the target does not exist + # locally. + except sslib_exceptions.StorageError: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + # The file does exist locally, check if its hash differs. + if digest_object.hexdigest() != digest: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + return updated_targets + + def download_target( + self, + targetinfo: Dict, + destination_directory: str, + target_base_url: Optional[str] = None, + ): + """ + Download target specified by 'targetinfo' into 'destination_directory'. + + Args: + targetinfo: data received from get_one_valid_targetinfo() or + updated_targets(). + destination_directory: existing local directory to download into. + Note that new directories may be created inside + destination_directory as required. + target_base_url: Optional; Base URL used to form the final target + download URL. Default is the value provided in Updater() + + Raises: + TODO: download-related errors + TODO: file write errors + """ + if target_base_url is None and self._target_base_url is None: + raise ValueError( + "target_base_url must be set in either download_target() or " + "constructor" + ) + if target_base_url is None: + target_base_url = self._target_base_url + else: + target_base_url = _ensure_trailing_slash(target_base_url) + + full_url = parse.urljoin(target_base_url, targetinfo["filepath"]) + + with download.download_file( + full_url, targetinfo["fileinfo"].length, self._fetcher + ) as target_file: + _check_file_length(target_file, targetinfo["fileinfo"].length) + _check_hashes_obj(target_file, targetinfo["fileinfo"].hashes) + + filepath = os.path.join( + destination_directory, targetinfo["filepath"] + ) + sslib_util.persist_temp_file(target_file, filepath) + + def _download_metadata( + self, rolename: str, length: int, version: Optional[int] = None + ) -> bytes: + """Download a metadata file and return it as bytes""" + if version is None: + filename = f"{rolename}.json" + else: + filename = f"{version}.{rolename}.json" + url = parse.urljoin(self._metadata_base_url, filename) + return download.download_bytes( + url, + length, + self._fetcher, + strict_required_length=False, + ) + + def _load_local_metadata(self, rolename: str) -> bytes: + with open(os.path.join(self._dir, f"{rolename}.json"), "rb") as f: + return f.read() + + def _persist_metadata(self, rolename: str, data: bytes): + with open(os.path.join(self._dir, f"{rolename}.json"), "wb") as f: + f.write(data) + + def _load_root(self) -> None: + """Load remote root metadata. + + Sequentially load and persist on local disk every newer root metadata + version available on the remote. + """ + + # Update the root role + lower_bound = self._trusted_set.root.signed.version + 1 + upper_bound = lower_bound + MAX_ROOT_ROTATIONS + + for next_version in range(lower_bound, upper_bound): + try: + data = self._download_metadata( + "root", DEFAULT_ROOT_MAX_LENGTH, next_version + ) + self._trusted_set.update_root(data) + self._persist_metadata("root", data) + + except exceptions.FetcherHTTPError as exception: + if exception.status_code not in {403, 404}: + raise + # 404/403 means current root is newest available + break + + # Verify final root + self._trusted_set.root_update_finished() + + def _load_timestamp(self) -> None: + """Load local and remote timestamp metadata""" + try: + data = self._load_local_metadata("timestamp") + self._trusted_set.update_timestamp(data) + except (OSError, exceptions.RepositoryError) as e: + # Local timestamp does not exist or is invalid + logger.debug("Failed to load local timestamp %s", e) + + # Load from remote (whether local load succeeded or not) + data = self._download_metadata( + "timestamp", DEFAULT_TIMESTAMP_MAX_LENGTH + ) + self._trusted_set.update_timestamp(data) + self._persist_metadata("timestamp", data) + + def _load_snapshot(self) -> None: + """Load local (and if needed remote) snapshot metadata""" + try: + data = self._load_local_metadata("snapshot") + self._trusted_set.update_snapshot(data) + logger.debug("Local snapshot is valid: not downloading new one") + except (OSError, exceptions.RepositoryError) as e: + # Local snapshot does not exist or is invalid: update from remote + logger.debug("Failed to load local snapshot %s", e) + + metainfo = self._trusted_set.timestamp.signed.meta["snapshot.json"] + length = metainfo.length or DEFAULT_SNAPSHOT_MAX_LENGTH + version = None + if self._trusted_set.root.signed.consistent_snapshot: + version = metainfo.version + + data = self._download_metadata("snapshot", length, version) + self._trusted_set.update_snapshot(data) + self._persist_metadata("snapshot", data) + + def _load_targets(self, role: str, parent_role: str) -> None: + """Load local (and if needed remote) metadata for 'role'.""" + try: + data = self._load_local_metadata(role) + self._trusted_set.update_delegated_targets(data, role, parent_role) + logger.debug("Local %s is valid: not downloading new one", role) + except (OSError, exceptions.RepositoryError) as e: + # Local 'role' does not exist or is invalid: update from remote + logger.debug("Failed to load local %s: %s", role, e) + + metainfo = self._trusted_set.snapshot.signed.meta[f"{role}.json"] + length = metainfo.length or DEFAULT_TARGETS_MAX_LENGTH + version = None + if self._trusted_set.root.signed.consistent_snapshot: + version = metainfo.version + + data = self._download_metadata(role, length, version) + self._trusted_set.update_delegated_targets(data, role, parent_role) + self._persist_metadata(role, data) + + def _preorder_depth_first_walk(self, target_filepath) -> Dict: + """ + Interrogates the tree of target delegations in order of appearance + (which implicitly order trustworthiness), and returns the matching + target found in the most trusted role. + """ + + target = None + role_names = [("targets", "root")] + visited_role_names = set() + number_of_delegations = MAX_DELEGATIONS + + # Preorder depth-first traversal of the graph of target delegations. + while ( + target is None and number_of_delegations > 0 and len(role_names) > 0 + ): + + # Pop the role name from the top of the stack. + role_name, parent_role = role_names.pop(-1) + self._load_targets(role_name, parent_role) + # Skip any visited current role to prevent cycles. + if (role_name, parent_role) in visited_role_names: + msg = f"Skipping visited current role {role_name}" + logger.debug(msg) + continue + + # The metadata for 'role_name' must be downloaded/updated before + # its targets, delegations, and child roles can be inspected. + + role_metadata = self._trusted_set[role_name].signed + target = role_metadata.targets.get(target_filepath) + + # After preorder check, add current role to set of visited roles. + visited_role_names.add((role_name, parent_role)) + + # And also decrement number of visited roles. + number_of_delegations -= 1 + child_roles = [] + if role_metadata.delegations is not None: + child_roles = role_metadata.delegations.roles + + if target is None: + + child_roles_to_visit = [] + # NOTE: This may be a slow operation if there are many + # delegated roles. + for child_role in child_roles: + child_role_name = _visit_child_role( + child_role, target_filepath + ) + + if child_role.terminating and child_role_name is not None: + msg = ( + f"Adding child role {child_role_name}.\n", + "Not backtracking to other roles.", + ) + logger.debug(msg) + role_names = [] + child_roles_to_visit.append( + (child_role_name, role_name) + ) + break + + if child_role_name is None: + msg = f"Skipping child role {child_role_name}" + logger.debug(msg) + + else: + msg = f"Adding child role {child_role_name}" + logger.debug(msg) + child_roles_to_visit.append( + (child_role_name, role_name) + ) + + # Push 'child_roles_to_visit' in reverse order of appearance + # onto 'role_names'. Roles are popped from the end of + # the 'role_names' list. + child_roles_to_visit.reverse() + role_names.extend(child_roles_to_visit) + + else: + msg = f"Found target in current role {role_name}" + logger.debug(msg) + + if ( + target is None + and number_of_delegations == 0 + and len(role_names) > 0 + ): + msg = ( + f"{len(role_names)} roles left to visit, but allowed to ", + f"visit at most {MAX_DELEGATIONS} delegations.", + ) + logger.debug(msg) + + return {"filepath": target_filepath, "fileinfo": target} + + +def _visit_child_role(child_role: Dict, target_filepath: str) -> str: + """ + + Non-public method that determines whether the given 'target_filepath' + is an allowed path of 'child_role'. + + Ensure that we explore only delegated roles trusted with the target. The + metadata for 'child_role' should have been refreshed prior to this point, + however, the paths/targets that 'child_role' signs for have not been + verified (as intended). The paths/targets that 'child_role' is allowed + to specify in its metadata depends on the delegating role, and thus is + left to the caller to verify. We verify here that 'target_filepath' + is an allowed path according to the delegated 'child_role'. + + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? + + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys, and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + """ + + child_role_name = child_role.name + child_role_paths = child_role.paths + child_role_path_hash_prefixes = child_role.path_hash_prefixes + + if child_role_path_hash_prefixes is not None: + target_filepath_hash = _get_filepath_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if not target_filepath_hash.startswith(child_role_path_hash_prefix): + continue + + return child_role_name + + elif child_role_paths is not None: + # Is 'child_role_name' allowed to sign for 'target_filepath'? + for child_role_path in child_role_paths: + # A child role path may be an explicit path or glob pattern (Unix + # shell-style wildcards). The child role 'child_role_name' is + # returned if 'target_filepath' is equal to or matches + # 'child_role_path'. Explicit filepaths are also considered + # matches. A repo maintainer might delegate a glob pattern with a + # leading path separator, while the client requests a matching + # target without a leading path separator - make sure to strip any + # leading path separators so that a match is made. + # Example: "foo.tgz" should match with "/*.tgz". + if fnmatch.fnmatch( + target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) + ): + logger.debug( + "Child role " + + repr(child_role_name) + + " is allowed to sign for " + + repr(target_filepath) + ) + + return child_role_name + + logger.debug( + "The given target path " + + repr(target_filepath) + + " does not match the trusted path or glob pattern: " + + repr(child_role_path) + ) + continue + + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise exceptions.FormatError( + repr(child_role_name) + " " + 'has neither a "paths" nor "path_hash_prefixes". At least' + " one of these attributes must be present." + ) + + return None + + +def _check_file_length(file_object, trusted_file_length): + """ + Given a file_object, checks whether its length matches + trusted_file_length. + + Raises: + DownloadLengthMismatchError: File length does not match + expected length. + """ + file_object.seek(0, 2) + observed_length = file_object.tell() + file_object.seek(0) + + if observed_length != trusted_file_length: + raise exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) + + +def _check_hashes_obj(file_object, trusted_hashes): + """ + Given a file_object, checks whether its hash matches + trusted_hashes. + + Raises: + BadHashError: Hashes do not match + """ + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = sslib_hash.digest_fileobject(file_object, algorithm) + + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise exceptions.BadHashError(trusted_hash, computed_hash) + + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) + + +def _get_filepath_hash(target_filepath, hash_function="sha256"): + """ + Calculate the hash of the filepath to determine which bin to find the + target. + """ + # The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = sslib_hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode("utf-8") + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash + + +def _ensure_trailing_slash(url: str): + """Return url guaranteed to end in a slash""" + return url if url.endswith("/") else f"{url}/"