From bc7d6a411e6ce6864ab8b7628069adc2010748d6 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Wed, 7 Dec 2022 12:56:21 -0500 Subject: [PATCH] sigstore: stream input into signing (#329) * sigstore: stream input into signing Closes #158. Signed-off-by: William Woodruff * _utils: ignore some mypy errors See: https://github.com/python/typing/issues/659 Signed-off-by: William Woodruff * test_sign: fix signing test Signed-off-by: William Woodruff * test_utils: test correctness of our digest streaming Signed-off-by: William Woodruff * sigstore, test: stream verification as well Signed-off-by: William Woodruff * _utils: document the security properties of sha256_streaming Signed-off-by: William Woodruff Signed-off-by: William Woodruff --- sigstore/_cli.py | 22 ++++++++++++---------- sigstore/_sign.py | 14 +++++++++----- sigstore/_utils.py | 35 ++++++++++++++++++++++++++++++++++- sigstore/_verify/models.py | 24 ++++++++++-------------- sigstore/_verify/verifier.py | 8 ++++++-- test/unit/conftest.py | 13 +++++++------ test/unit/test_sign.py | 3 ++- test/unit/test_utils.py | 14 ++++++++++++++ 8 files changed, 94 insertions(+), 39 deletions(-) diff --git a/sigstore/_cli.py b/sigstore/_cli.py index c5ca84d2..095ed25b 100644 --- a/sigstore/_cli.py +++ b/sigstore/_cli.py @@ -447,10 +447,11 @@ def _sign(args: argparse.Namespace) -> None: for file, outputs in output_map.items(): logger.debug(f"signing for {file.name}") - result = signer.sign( - input_=file.read_bytes(), - identity_token=args.identity_token, - ) + with file.open(mode="rb", buffering=0) as io: + result = signer.sign( + input_=io, + identity_token=args.identity_token, + ) print("Using ephemeral certificate:") print(result.cert_pem) @@ -586,12 +587,13 @@ def _verify(args: argparse.Namespace) -> None: logger.debug(f"Verifying contents from: {file}") - materials = VerificationMaterials( - input_=file.read_bytes(), - cert_pem=cert_pem, - signature=base64.b64decode(b64_signature), - offline_rekor_entry=entry, - ) + with file.open(mode="rb", buffering=0) as io: + materials = VerificationMaterials( + input_=io, + cert_pem=cert_pem, + signature=base64.b64decode(b64_signature), + offline_rekor_entry=entry, + ) policy_ = policy.Identity( identity=args.cert_identity, diff --git a/sigstore/_sign.py b/sigstore/_sign.py index 696db118..c142d7bd 100644 --- a/sigstore/_sign.py +++ b/sigstore/_sign.py @@ -15,12 +15,13 @@ from __future__ import annotations import base64 -import hashlib import logging +from typing import IO import cryptography.x509 as x509 from cryptography.hazmat.primitives import hashes, serialization from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives.asymmetric.utils import Prehashed from cryptography.x509.oid import NameOID from pydantic import BaseModel @@ -28,6 +29,7 @@ from sigstore._internal.oidc import Identity from sigstore._internal.rekor import RekorClient, RekorEntry from sigstore._internal.sct import verify_sct +from sigstore._utils import sha256_streaming logger = logging.getLogger(__name__) @@ -56,11 +58,11 @@ def staging(cls) -> Signer: def sign( self, - input_: bytes, + input_: IO[bytes], identity_token: str, ) -> SigningResult: """Public API for signing blobs""" - sha256_artifact_hash = hashlib.sha256(input_).hexdigest() + input_digest = sha256_streaming(input_) logger.debug("Generating ephemeral keys...") private_key = ec.generate_private_key(ec.SECP384R1()) @@ -102,7 +104,9 @@ def sign( logger.debug("Successfully verified SCT...") # Sign artifact - artifact_signature = private_key.sign(input_, ec.ECDSA(hashes.SHA256())) + artifact_signature = private_key.sign( + input_digest, ec.ECDSA(Prehashed(hashes.SHA256())) + ) b64_artifact_signature = base64.b64encode(artifact_signature).decode() # Prepare inputs @@ -113,7 +117,7 @@ def sign( # Create the transparency log entry entry = self._rekor.log.entries.post( b64_artifact_signature=b64_artifact_signature, - sha256_artifact_hash=sha256_artifact_hash, + sha256_artifact_hash=input_digest.hex(), b64_cert=b64_cert.decode(), ) diff --git a/sigstore/_utils.py b/sigstore/_utils.py index 086a7ebc..506fd3ba 100644 --- a/sigstore/_utils.py +++ b/sigstore/_utils.py @@ -20,7 +20,7 @@ import base64 import hashlib -from typing import Union +from typing import IO, Union from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import ec, rsa @@ -104,3 +104,36 @@ def split_certificate_chain(chain_pem: str) -> list[bytes]: certificate_chain = [(pem_header + c).encode() for c in certificate_chain] return certificate_chain + + +def sha256_streaming(io: IO[bytes]) -> bytes: + """ + Compute the SHA256 of a stream. + + This function does its own internal buffering, so an unbuffered stream + should be supplied for optimal performance. + """ + + # NOTE: This function performs a SHA256 digest over a stream. + # The stream's size is not checked, meaning that the stream's source + # is implicitly trusted: if an attacker is able to truncate the stream's + # source prematurely, then they could conceivably produce a digest + # for a partial stream. This in turn could conceivably result + # in a valid signature for an unintended (truncated) input. + # + # This is currently outside of sigstore-python's threat model: we + # assume that the stream is trusted. + # + # See: https://github.com/sigstore/sigstore-python/pull/329#discussion_r1041215972 + + sha256 = hashlib.sha256() + # Per coreutils' ioblksize.h: 128KB performs optimally across a range + # of systems in terms of minimizing syscall overhead. + view = memoryview(bytearray(128 * 1024)) + + nbytes = io.readinto(view) # type: ignore + while nbytes: + sha256.update(view[:nbytes]) + nbytes = io.readinto(view) # type: ignore + + return sha256.digest() diff --git a/sigstore/_verify/models.py b/sigstore/_verify/models.py index 46f391e8..159792f1 100644 --- a/sigstore/_verify/models.py +++ b/sigstore/_verify/models.py @@ -19,16 +19,16 @@ from __future__ import annotations import base64 -import hashlib import json import logging from dataclasses import dataclass +from typing import IO from cryptography.x509 import Certificate, load_pem_x509_certificate from pydantic import BaseModel from sigstore._internal.rekor import RekorClient, RekorEntry -from sigstore._utils import base64_encode_pem_cert +from sigstore._utils import base64_encode_pem_cert, sha256_streaming logger = logging.getLogger(__name__) @@ -95,14 +95,9 @@ class VerificationMaterials: Represents the materials needed to perform a Sigstore verification. """ - input_: bytes + input_digest: bytes """ - The input that was signed for. - """ - - artifact_hash: str - """ - The hex-encoded SHA256 hash of `input_`. + The SHA256 hash of the verification input, as raw bytes. """ certificate: Certificate @@ -139,13 +134,12 @@ class VerificationMaterials: def __init__( self, *, - input_: bytes, + input_: IO[bytes], cert_pem: str, signature: bytes, offline_rekor_entry: RekorEntry | None, ): - self.input_ = input_ - self.artifact_hash = hashlib.sha256(self.input_).hexdigest() + self.input_digest = sha256_streaming(input_) self.certificate = load_pem_x509_certificate(cert_pem.encode()) self.signature = signature self._offline_rekor_entry = offline_rekor_entry @@ -172,7 +166,7 @@ def rekor_entry(self, client: RekorClient) -> RekorEntry: logger.debug("retrieving rekor entry") entry = client.log.entries.retrieve.post( self.signature, - self.artifact_hash, + self.input_digest.hex(), self.certificate, ) @@ -203,7 +197,9 @@ def rekor_entry(self, client: RekorClient) -> RekorEntry: "content": base64.b64encode(self.signature).decode(), "publicKey": {"content": base64_encode_pem_cert(self.certificate)}, }, - "data": {"hash": {"algorithm": "sha256", "value": self.artifact_hash}}, + "data": { + "hash": {"algorithm": "sha256", "value": self.input_digest.hex()} + }, }, } diff --git a/sigstore/_verify/verifier.py b/sigstore/_verify/verifier.py index c1de54e9..e733c186 100644 --- a/sigstore/_verify/verifier.py +++ b/sigstore/_verify/verifier.py @@ -26,6 +26,7 @@ from cryptography.exceptions import InvalidSignature from cryptography.hazmat.primitives import hashes from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives.asymmetric.utils import Prehashed from cryptography.x509 import ( ExtendedKeyUsage, KeyUsage, @@ -217,7 +218,9 @@ def verify( signing_key = materials.certificate.public_key() signing_key = cast(ec.EllipticCurvePublicKey, signing_key) signing_key.verify( - materials.signature, materials.input_, ec.ECDSA(hashes.SHA256()) + materials.signature, + materials.input_digest, + ec.ECDSA(Prehashed(hashes.SHA256())), ) except InvalidSignature: return VerificationFailure(reason="Signature is invalid for input") @@ -231,7 +234,8 @@ def verify( entry = materials.rekor_entry(self._rekor) except RekorEntryMissingError: return RekorEntryMissing( - signature=materials.signature, artifact_hash=materials.artifact_hash + signature=materials.signature, + artifact_hash=materials.input_digest.hex(), ) except InvalidRekorEntryError: return VerificationFailure( diff --git a/test/unit/conftest.py b/test/unit/conftest.py index a0f89175..525e0ba1 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -101,12 +101,13 @@ def _signing_materials(name: str) -> Tuple[bytes, bytes, bytes]: bundle = RekorBundle.parse_file(bundle) entry = bundle.to_entry() - materials = VerificationMaterials( - input_=file.read_bytes(), - cert_pem=cert.read_text(), - signature=base64.b64decode(sig.read_text()), - offline_rekor_entry=entry, - ) + with file.open(mode="rb", buffering=0) as io: + materials = VerificationMaterials( + input_=io, + cert_pem=cert.read_text(), + signature=base64.b64decode(sig.read_text()), + offline_rekor_entry=entry, + ) return materials diff --git a/test/unit/test_sign.py b/test/unit/test_sign.py index efc33f04..50d1660a 100644 --- a/test/unit/test_sign.py +++ b/test/unit/test_sign.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import io import secrets import pytest @@ -37,7 +38,7 @@ def test_sign_rekor_entry_consistent(signer): token = detect_credential() assert token is not None - payload = secrets.token_bytes(32) + payload = io.BytesIO(secrets.token_bytes(32)) expected_entry = signer.sign(payload, token).log_entry actual_entry = signer._rekor.log.entries.get(log_index=expected_entry.log_index) diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py index 6cc7d412..856cb545 100644 --- a/test/unit/test_utils.py +++ b/test/unit/test_utils.py @@ -14,7 +14,9 @@ import hashlib +import io +import pytest from cryptography import x509 from cryptography.hazmat.primitives import serialization @@ -56,3 +58,15 @@ def test_key_id(): hashlib.sha256(public_key).hexdigest() == "086c0ea25b60e3c44a994d0d5f40b81a0d44f21d63df19315e6ddfbe47373817" ) + + +@pytest.mark.parametrize( + "size", [0, 1, 2, 4, 8, 32, 128, 1024, 128 * 1024, 1024 * 1024, 128 * 1024 * 1024] +) +def test_sha256_streaming(size): + buf = b"x" * size + + expected_digest = hashlib.sha256(buf).digest() + actual_digest = utils.sha256_streaming(io.BytesIO(buf)) + + assert expected_digest == actual_digest