From d4fb3a76110fe3bf393c0c7550259a22e86ade3f Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sat, 2 Nov 2024 05:19:22 +0000 Subject: [PATCH 1/2] CBMC: Add spec + proof for `poly_add()` This commit adds a CBMC spec and proof for `poly_add()`. Previously, poly_add() had the signature (r[out], a[in], b[in]), allowing the output polynomial to be separate from the two input polynomials. Yet, uses of `poly_add()` always have the form `poly_add(r,r,b)`, aliasing the first and second parameters, and thereby accumulating `b` onto `r`. Since aliasing poses additional verification challenges, and the non-aliased version of `poly_add()` is never used, this commit firstly modifies the signature of `poly_add()` to only take two inputs, accumulating the second onto the first. With that done, the spec and proof in CBMC are straightforward, taking care to use `OLD(...)` and `LOOP_ENTY(...)` in spec and loop invariants to refer to the original values of memory. Signed-off-by: Hanno Becker --- cbmc/proofs/poly_add/Makefile | 54 +++++++++++++++++++++++++ cbmc/proofs/poly_add/cbmc-proof.txt | 3 ++ cbmc/proofs/poly_add/poly_add_harness.c | 27 +++++++++++++ mlkem/indcpa.c | 8 ++-- mlkem/poly.c | 27 ++++++------- mlkem/poly.h | 26 +++++++++++- mlkem/polyvec.c | 6 +-- mlkem/polyvec.h | 2 +- 8 files changed, 130 insertions(+), 23 deletions(-) create mode 100644 cbmc/proofs/poly_add/Makefile create mode 100644 cbmc/proofs/poly_add/cbmc-proof.txt create mode 100644 cbmc/proofs/poly_add/poly_add_harness.c diff --git a/cbmc/proofs/poly_add/Makefile b/cbmc/proofs/poly_add/Makefile new file mode 100644 index 000000000..d4a3516b0 --- /dev/null +++ b/cbmc/proofs/poly_add/Makefile @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = poly_add_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = poly_add + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/poly.c + +CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)poly_add +USE_FUNCTION_CONTRACTS= +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = poly_add + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mlkem/poly.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/cbmc/proofs/poly_add/cbmc-proof.txt b/cbmc/proofs/poly_add/cbmc-proof.txt new file mode 100644 index 000000000..3d1913ebf --- /dev/null +++ b/cbmc/proofs/poly_add/cbmc-proof.txt @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: Apache-2.0 + +# This file marks this directory as containing a CBMC proof. diff --git a/cbmc/proofs/poly_add/poly_add_harness.c b/cbmc/proofs/poly_add/poly_add_harness.c new file mode 100644 index 000000000..0be911f8c --- /dev/null +++ b/cbmc/proofs/poly_add/poly_add_harness.c @@ -0,0 +1,27 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 AND Apache-2.0 + +/* + * Insert copyright notice + */ + +/** + * @file poly_add_harness.c + * @brief Implements the proof harness for basemul_cached function. + */ +#include "poly.h" + +/* + * Insert project header files that + * - include the declaration of the function + * - include the types needed to declare function arguments + */ + +/** + * @brief Starting point for formal analysis + * + */ +void harness(void) { + poly *r, *b; + poly_add(r, b); +} diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c index 21cbf9c21..0d9190c33 100644 --- a/mlkem/indcpa.c +++ b/mlkem/indcpa.c @@ -321,7 +321,7 @@ void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], } // Arithmetic cannot overflow, see static assertion at the top - polyvec_add(&pkpv, &pkpv, &e); + polyvec_add(&pkpv, &e); polyvec_reduce(&pkpv); polyvec_reduce(&skpv); @@ -395,9 +395,9 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], poly_invntt_tomont(&v); // Arithmetic cannot overflow, see static assertion at the top - polyvec_add(&b, &b, &ep); - poly_add(&v, &v, &epp); - poly_add(&v, &v, &k); + polyvec_add(&b, &ep); + poly_add(&v, &epp); + poly_add(&v, &k); polyvec_reduce(&b); poly_reduce(&v); diff --git a/mlkem/poly.c b/mlkem/poly.c index 054fa1a3a..ad57789d0 100644 --- a/mlkem/poly.c +++ b/mlkem/poly.c @@ -496,20 +496,19 @@ void poly_reduce(poly *r) { } #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */ -/************************************************* - * Name: poly_add - * - * Description: Add two polynomials; no modular reduction is performed - * - * Arguments: - poly *r: pointer to output polynomial - * - const poly *a: pointer to first input polynomial - * - const poly *b: pointer to second input polynomial - **************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) { - unsigned int i; - for (i = 0; i < MLKEM_N; i++) { - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; - } +void poly_add(poly *r, const poly *b) { + int i; + for (i = 0; i < MLKEM_N; i++) + // clang-format off + ASSIGNS(i, OBJECT_WHOLE(r)) + INVARIANT(i >= 0 && i <= MLKEM_N) + INVARIANT(FORALL(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == LOOP_ENTRY(*r).coeffs[k0])) + INVARIANT(FORALL(int, k1, 0, i - 1, r->coeffs[k1] == LOOP_ENTRY(*r).coeffs[k1] + b->coeffs[k1])) + DECREASES(MLKEM_N - i) + // clang-format on + { + r->coeffs[i] = r->coeffs[i] + b->coeffs[i]; + } } /************************************************* diff --git a/mlkem/poly.h b/mlkem/poly.h index bcc8dd463..fc6de4bd3 100644 --- a/mlkem/poly.h +++ b/mlkem/poly.h @@ -432,7 +432,31 @@ ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, 0, MLKEM_Q - 1)); // clang-format on #define poly_add MLKEM_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); +/************************************************************ + * Name: poly_add + * + * Description: Adds two polynomials in place + * + * Arguments: - r: Pointer to input-output polynomial to be added to. + * - b: Pointer to input polynomial that should be added + * to r. Must be disjoint from r. + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + ************************************************************/ +// REF-CHANGE: +// The reference implementation uses a 3-argument poly_add. +// We specialize to the accumulator form to avoid reasoning about aliasing. +void poly_add(poly *r, const poly *b) // clang-format off +REQUIRES(IS_FRESH(r, sizeof(poly))) +REQUIRES(IS_FRESH(b, sizeof(poly))) +REQUIRES(FORALL(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX)) +REQUIRES(FORALL(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN)) +ENSURES(FORALL(int, k, 0, MLKEM_N - 1, r->coeffs[k] == OLD(*r).coeffs[k] + b->coeffs[k])) +ASSIGNS(OBJECT_WHOLE(r)); +// clang-format on + #define poly_sub MLKEM_NAMESPACE(poly_sub) void poly_sub(poly *r, const poly *a, const poly *b); diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c index dd2eaa0b6..d0df6fc0d 100644 --- a/mlkem/polyvec.c +++ b/mlkem/polyvec.c @@ -233,7 +233,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, for (i = 1; i < MLKEM_K; i++) { poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], &b_cache->vec[i]); - poly_add(r, r, &t); + poly_add(r, &t); // abs bounds: < (i+1) * 3/2 * q } @@ -310,9 +310,9 @@ void polyvec_reduce(polyvec *r) { * - const polyvec *a: pointer to first input vector of polynomials * - const polyvec *b: pointer to second input vector of polynomials **************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { +void polyvec_add(polyvec *r, const polyvec *b) { unsigned int i; for (i = 0; i < MLKEM_K; i++) { - poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); + poly_add(&r->vec[i], &b->vec[i]); } } diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h index ed0d4e219..4bd67b4dc 100644 --- a/mlkem/polyvec.h +++ b/mlkem/polyvec.h @@ -80,6 +80,6 @@ ASSIGNS(OBJECT_WHOLE(x)); void polyvec_reduce(polyvec *r); #define polyvec_add MLKEM_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); +void polyvec_add(polyvec *r, const polyvec *b); #endif From af5daa911a28aa8efc02d82b614f0f43dc360c59 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Sat, 2 Nov 2024 05:20:40 +0000 Subject: [PATCH 2/2] CBMC: Add spec + proof for polyvec_basemul_acc_montgomery_cached The proof is less trivial than the ones before since one has to use the output bounds of the lower level functions to show that the accumulation loop does not overflow. Signed-off-by: Hanno Becker --- .../Makefile | 54 +++++++++++++++++++ .../cbmc-proof.txt | 3 ++ ...ec_basemul_acc_montgomery_cached_harness.c | 30 +++++++++++ mlkem/polyvec.c | 33 +++++++++--- mlkem/polyvec.h | 30 ++++++++++- 5 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 cbmc/proofs/polyvec_basemul_acc_montgomery_cached/Makefile create mode 100644 cbmc/proofs/polyvec_basemul_acc_montgomery_cached/cbmc-proof.txt create mode 100644 cbmc/proofs/polyvec_basemul_acc_montgomery_cached/polyvec_basemul_acc_montgomery_cached_harness.c diff --git a/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/Makefile b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/Makefile new file mode 100644 index 000000000..a0e9205a1 --- /dev/null +++ b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/Makefile @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = polyvec_basemul_acc_montgomery_cached_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = polyvec_basemul_acc_montgomery_cached + +DEFINES += +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += $(MLKEM_NAMESPACE)polyvec_basemul_acc_montgomery_cached.0:4 # Largest value of MLKEM_K + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mlkem/polyvec.c + +CHECK_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)polyvec_basemul_acc_montgomery_cached +USE_FUNCTION_CONTRACTS=$(MLKEM_NAMESPACE)poly_basemul_montgomery_cached $(MLKEM_NAMESPACE)poly_add +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = polyvec_basemul_acc_montgomery_cached + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mlkem/poly.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/cbmc-proof.txt b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/cbmc-proof.txt new file mode 100644 index 000000000..3d1913ebf --- /dev/null +++ b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/cbmc-proof.txt @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: Apache-2.0 + +# This file marks this directory as containing a CBMC proof. diff --git a/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/polyvec_basemul_acc_montgomery_cached_harness.c b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/polyvec_basemul_acc_montgomery_cached_harness.c new file mode 100644 index 000000000..bb0d3e0b8 --- /dev/null +++ b/cbmc/proofs/polyvec_basemul_acc_montgomery_cached/polyvec_basemul_acc_montgomery_cached_harness.c @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT-0 AND Apache-2.0 + +/* + * Insert copyright notice + */ + +/** + * @file polyvec_basemul_acc_montgomery_cached_harness.c + * @brief Implements the proof harness for basemul_cached function. + */ +#include "polyvec.h" + +/* + * Insert project header files that + * - include the declaration of the function + * - include the types needed to declare function arguments + */ + +/** + * @brief Starting point for formal analysis + * + */ +void harness(void) { + poly *r; + polyvec *a, *b; + polyvec_mulcache *b_cached; + + polyvec_basemul_acc_montgomery_cached(r, a, b, b_cached); +} diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c index d0df6fc0d..10df385e0 100644 --- a/mlkem/polyvec.c +++ b/mlkem/polyvec.c @@ -226,18 +226,35 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, POLYVEC_BOUND(b, NTT_BOUND); POLYVEC_BOUND(b_cache, MLKEM_Q); - unsigned int i; + int i; poly t; poly_basemul_montgomery_cached(r, &a->vec[0], &b->vec[0], &b_cache->vec[0]); - for (i = 1; i < MLKEM_K; i++) { - poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], - &b_cache->vec[i]); - poly_add(r, &t); - // abs bounds: < (i+1) * 3/2 * q - } - // abs bounds: < MLKEM_K * 3/2 * q <= 4 * 3/2 * q = 19974 + for (i = 1; i < MLKEM_K; i++) + // clang-format off + ASSIGNS(i, t, OBJECT_WHOLE(r)) + INVARIANT(i >= 1 && i <= MLKEM_K) + INVARIANT(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, + i * (-3 * HALF_Q + 1), i * (3 * HALF_Q - 1))) + DECREASES(MLKEM_K - i) + // clang-format on + { + poly_basemul_montgomery_cached(&t, &a->vec[i], &b->vec[i], + &b_cache->vec[i]); + poly_add(r, &t); + // abs bounds: < (i+1) * 3/2 * q + } + + // Those bounds are true for the C implementation, but not needed + // in the higher level bounds reasoning. It is thus best to omit + // them from the spec to not unnecessarily constraint native implementations. + ASSERT( + ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, r->coeffs, + MLKEM_K * (-3 * HALF_Q + 1), MLKEM_K * (3 * HALF_Q - 1)), + "polyvec_basemul_acc_montgomery_cached output bounds"); + // TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set + POLY_BOUND(r, MLKEM_K * 3 * HALF_Q); } #else /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, diff --git a/mlkem/polyvec.h b/mlkem/polyvec.h index 4bd67b4dc..61aaa0a37 100644 --- a/mlkem/polyvec.h +++ b/mlkem/polyvec.h @@ -40,9 +40,37 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, // REF-CHANGE: This function does not exist in the reference implementation #define polyvec_basemul_acc_montgomery_cached \ MLKEM_NAMESPACE(polyvec_basemul_acc_montgomery_cached) +/************************************************* + * Name: polyvec_basemul_acc_montgomery_cached + * + * Description: Scalar product of two vectors of polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - a is assumed to be coefficient-wise < q in absolute value. + * - No bounds guarantees for the coefficients in the result. + * + * Arguments: - poly *r: pointer to output polynomial + * - const polyvec *a: pointer to first input polynomial vector + * - const polyvec *b: pointer to second input polynomial vector + * - const polyvec_mulcache *b_cache: pointer to mulcache + * for second input polynomial vector. Can be computed + * via polyvec_mulcache_compute(). + **************************************************/ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a, const polyvec *b, - const polyvec_mulcache *b_cache); + const polyvec_mulcache *b_cache) + // clang-format off +REQUIRES(IS_FRESH(r, sizeof(poly))) +REQUIRES(IS_FRESH(a, sizeof(polyvec))) +REQUIRES(IS_FRESH(b, sizeof(polyvec))) +REQUIRES(IS_FRESH(b_cache, sizeof(polyvec_mulcache))) +// Input is coefficient-wise < q in absolute value +REQUIRES(FORALL(int, k1, 0, MLKEM_K - 1, + ARRAY_IN_BOUNDS(int, k2, 0, MLKEM_N - 1, + a->vec[k1].coeffs, -(MLKEM_Q - 1), (MLKEM_Q - 1)))) +ASSIGNS(OBJECT_WHOLE(r)); +// clang-format on // REF-CHANGE: This function does not exist in the reference implementation #define polyvec_mulcache_compute MLKEM_NAMESPACE(polyvec_mulcache_compute)