diff --git a/library/rmc/stubs/C/hashset/hashset.c b/library/rmc/stubs/C/hashset/hashset.c new file mode 100644 index 0000000000000..1f5d96e37ec85 --- /dev/null +++ b/library/rmc/stubs/C/hashset/hashset.c @@ -0,0 +1,153 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#include +#include +#include + +// This HashSet stub implementation is supposed to work with c_hashset.rs. +// Please refer to that file for an introduction to the idea of a HashSet and +// some other implemenntation details. Public methods defined in c_hashset.rs +// act as wrappers around methods implemented here. + +// As noted before, this HashSet implementation is specifically for inputs which +// are u16. The details below can be extended to larger sets if necessary. The +// domain of the output is i16. +// +// The hash function that we choose is the identity function. +// For all input x, hasher(x) = x. For our case, this satisfies all the +// requirements of an ideal hash function, it is 1:1 and there exists only one +// element in the input domain for each value in the output domain. +// +// An important thing to note here is that the hash function can be +// appropriately modified depending on the type of the input value which is +// stored in the hashset. As an example, if the HashSet needs to store a tuple +// of integers, say , the hash function can be modified to be: +// +// hash((x, y)) = prime * x + y; +// +// Although this value can be greater than the chosen output domain, the +// function is still sound if the value wraps around because it guarantees a +// unique output for a given pair of x and y. +// +// Another way to think about this problem could be through the lens of +// uninterpreted functions where : if x == y => f(x) == f(y). Exploring this can +// be future work. The idea would be to implement a HashSet similar to that seen +// in functional programming languages. +// +// For the purpose of a HashSet, we dont necessarily need a SENTINEL outside the +// range of the hashing function because of the way we design the HashSet +// operations. +const uint16_t SENTINEL = 1; + +uint16_t hasher(uint16_t value) { + return value; +} + +// We initialize all values of the domain to be 0 by initializing it with +// calloc. This lets us get around the problem of looping through all elements +// to initialize them individually with a special value. +// +// The domain array is to be interpreted such that +// if domain[index] != 0, value such that hash(value) = index is present. +// +// However, this logic does not work for the value 0. For this, we choose the +// SENTINEL value to initialize that element. +typedef struct { + uint16_t* domain; +} hashset; + +// Ideally, this approach is much more suitable if we can work with arrays of +// arbitrary size, specifically infinity. This would allow us to define hash +// functions for any type because the output domain can be considered to be +// infinite. However, CBMC currently does not handle unbounded arrays correctly. +// Please see: https://github.com/diffblue/cbmc/issues/6261. Even in that case, +// we might run into theoretical limitations of how solvers handle uninterpreted +// symbols such as unbounded arrays. For the case of this API, the consumer can +// request for an arbitrary number of HashSets which can be dynamically chosen. +// As a result, the solver cannot know apriori how many unbounded arrays it +// needs to initialize which might lead to errors. +// +// Firecracker uses HashSet (src/devices/src/virtio/vsock/unix/muxer.rs). +// But for working with u32s, we run into the problem that the entire domain +// cannot be allocated through malloc. We run into the error "array too large +// for flattening". For that reason, we choose to work with u16 to demonstrate +// the feasability of this approach. However, it should be extensible to other +// integer types. +// +// Returns: pointer to a hashset instance which tracks the domain memory. This +// pointer is used in later callbacks such as insert() and remove(). +hashset* hashset_new() { + hashset* set = (hashset *) malloc(sizeof(hashset)); + // Initializes value all indexes to be 0, indicating that those elements are + // not present in the HashSet. + set->domain = calloc(UINT16_MAX, sizeof(uint16_t)); + // For 0, choose another value to achieve the same. + set->domain[0] = SENTINEL; + return set; +} + +// For insert, we need to first check if the value exists in the HashSet. If it +// does, we immediately return a 0 (false) value back. +// +// If it doesnt, then we mark that element of the domain array with the value to +// indicate that this element has been inserted. For element 0, we mark it with +// the SENTINEL. +// +// To check if a value exists, we simply check if domain[hash] != 0 and +// in the case of 0 if domain[0] != SENTINEL. +// +// Returns: an integer value 1 or 0. If the value is already present in the +// hashset, this function returns a 0. If the value is sucessfully inserted, we +// return a 1. +uint32_t hashset_insert(hashset* s, uint16_t value) { + uint16_t hash = hasher(value); + + if ((hash == 0 && s->domain[hash] != SENTINEL) || + (hash !=0 && s->domain[hash] != 0)) { + return 0; + } + + s->domain[hash] = value; + return 1; +} + +// We perform a similar check here as described in hashset_insert(). We do not +// duplicate code so as to not compute the hash twice. This can be improved. +// +// Returns: an integer value 1 or 0. If the value is present in the hashset, +// this function returns a 1, otherwise 0. +uint32_t hashset_contains(hashset* s, uint16_t value) { + uint16_t hash = hasher(value); + + if ((hash == 0 && s->domain[hash] != SENTINEL) || + (hash != 0 && s->domain[hash] != 0)) { + return 1; + } + + return 0; +} + +// We check if the element exists in the array. If it does not, we return a 0 +// (false) value back. If it does, we mark it with 0 and in the case of 0, we +// mark it with the SENTINEL and return 1. +// +// Returns: an integer value 1 or 0. If the value is not present in the hashset, +// this function returns a 0. If the value is sucessfully removed from the +// hashset, it returns a 1. +uint32_t hashset_remove(hashset* s, uint16_t value) { + uint16_t hash = hasher(value); + + if ((hash == 0 && s->domain[hash] == SENTINEL) || + (hash !=0 && s->domain[hash] == 0)) { + return 0; + } + + if (hash == 0) { + s->domain[hash] = SENTINEL; + } else { + s->domain[hash] = 0; + } + + return 1; +} diff --git a/library/rmc/stubs/C/vec/vec.c b/library/rmc/stubs/C/vec/vec.c new file mode 100644 index 0000000000000..9a30898a1c2c2 --- /dev/null +++ b/library/rmc/stubs/C/vec/vec.c @@ -0,0 +1,182 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#include +#include +#include +#include + +// This Vector stub implementation is supposed to work with c_vec.rs. Please +// refer to that file for a detailed explanation about the workings of this +// abstraction. Public methods implemented in c_vec.rs act as wrappers around +// methods implemented here. + +// __CPROVER_max_malloc_size is dependent on the number of offset bits used to +// represent a pointer variable. By default, this is chosen to be 56, in which +// case the max_malloc_size is 2 ** (offset_bits - 1). We could go as far as to +// assign the default capacity to be the max_malloc_size but that would be overkill. +// Instead, we choose a high-enough value 2 ** 10. Another reason to do +// this is that it would be easier for the solver to reason about memory if multiple +// Vectors are initialized by the abstraction consumer. +// +// For larger array sizes such as 2 ** (31 - 1) we encounter "array size too large +// for flattening" error. +#define DEFAULT_CAPACITY 1024 +#define MAX_MALLOC_SIZE 18014398509481984 + +// A Vector is a dynamically growing array type with contiguous memory. We track +// allocated memory, the length of the Vector and the capacity of the +// allocation. + +// As can be seen from the pointer to mem (unint32_t*), we track memory in terms +// of words. The current implementation works only if the containing type is +// u32. This was specifically chosen due to a use case seen in the Firecracker +// codebase. This structure is used to communicate over the FFI boundary. +// Future work: +// Ideally, the pointer to memory would be uint8_t* - representing that we treat +// memory as an array of bytes. This would allow us to be generic over the type +// of the element contained in the Vector. In that case, we would have to treat +// every sizeof(T) bytes as an indivdual element and cast memory accordingly. +typedef struct { + uint32_t* mem; + size_t len; + size_t capacity; +} vec; + +// The grow operation resizes the vector and copies its original contents into a +// new allocation. This is one of the more expensive operations for the solver +// to reason about and one way to get around this problem is to use a large +// allocation size. We also implement sized_grow which takes a argument +// definining the minimum number of additional elements that need to be fit into +// the Vector memory. This aims to replicate behavior as seen in the Rust +// standard library where the size of the vector is decided based on the +// following equation: +// new_capacity = max(capacity * 2, capacity + additional). +// Please refer to method amortized_grow in raw_vec.rs in the Standard Library +// for additional information. +// The current implementation performance depends on CBMCs performance about +// reasoning about realloc. If CBMC does better, do would we in the case of +// this abstraction. +// +// One important callout to make here is that because we allocate a large enough +// buffer, we cant reason about buffer overflow bugs. This is because the +// allocated memory will (most-likely) always have enough space allocated after +// the required vec capacity. +// +// Future work: +// Ideally, we would like to get around the issue of resizing altogether since +// CBMC supports unbounded arrays. In that case, we would allocate memory of +// size infinity and work with that. For program verification, this would +// optimize a lot of operations since the solver does not really have to worry +// about the bounds of memory. The appropriate constant for capacity would be +// __CPROVER_constant_infinity_uint but this is currently blocked due to +// incorrect translation of the constant: https://github.com/diffblue/cbmc/issues/6261. +// +// Another way to approach this problem would be to implement optimizations in +// the realloc operation of CBMC. Rather than allocating a new memory block and +// copying over elements, we can track only the end pointer of the memory and +// shift it to track the new length. Since this behavior is that of the +// allocator, the consumer of the API is blind to it. +void vec_grow_exact(vec* v, size_t new_cap) { + uint32_t* new_mem = (uint32_t* ) realloc(v->mem, new_cap * sizeof(*v->mem)); + + v->mem = new_mem; + v->capacity = new_cap; +} + +void vec_grow(vec* v) { + size_t new_cap = v->capacity * 2; + if (new_cap > MAX_MALLOC_SIZE) { + // Panic if the new size requirement is greater than max size that can + // be allocated through malloc. + assert(0); + } + + vec_grow_exact(v, new_cap); +} + +void vec_sized_grow(vec* v, size_t additional) { + size_t min_cap = v->capacity + additional; + size_t grow_cap = v->capacity * 2; + + // This resembles the Rust Standard Library behavior - amortized_grow in + // alloc/raw_vec.rs + // + // Reference: https://doc.rust-lang.org/src/alloc/raw_vec.rs.html#421 + size_t new_cap = min_cap > grow_cap ? min_cap : grow_cap; + if (new_cap > MAX_MALLOC_SIZE) { + // Panic if the new size requirement is greater than max size that can + // be allocated through malloc. + assert(0); + } + + vec_grow_exact(v, new_cap); +} + +vec* vec_new() { + vec *v = (vec *) malloc(sizeof(vec)); + // Default size is DEFAULT_CAPACITY. We compute the maximum number of + // elements to ensure that allocation size is aligned. + size_t max_elements = DEFAULT_CAPACITY / sizeof(*v->mem); + v->mem = (uint32_t *) malloc(max_elements * sizeof(*v->mem)); + v->len = 0; + v->capacity = max_elements; + // Return a pointer to the allocated vec structure, which is used in future + // callbacks. + return v; +} + +vec* vec_with_capacity(size_t capacity) { + vec *v = (vec *) malloc(sizeof(vec)); + if (capacity > MAX_MALLOC_SIZE) { + // Panic if the new size requirement is greater than max size that can + // be allocated through malloc. + assert(0); + } + + v->mem = (uint32_t *) malloc(capacity * sizeof(*v->mem)); + v->len = 0; + v->capacity = capacity; + return v; +} + +void vec_push(vec* v, uint32_t elem) { + // If we have already reached capacity, resize the Vector before + // pushing in new elements. + if (v->len == v->capacity) { + // Ensure that we have capacity to hold atleast one more element + vec_sized_grow(v, 1); + } + + v->mem[v->len] = elem; + v->len += 1; +} + +uint32_t vec_pop(vec* v) { + assert(v->len > 0); + v->len -= 1; + + return v->mem[v->len]; +} + +void vec_append(vec* v1, vec* v2) { + // Reserve enough space before adding in new elements. + vec_sized_grow(v1, v2->len); + // Perform a memcpy of elements which is cheaper than pushing each element + // at once. + memcpy(v1->mem + v1->len, v2->mem, v2->len * sizeof(*v2->mem)); + v1->len = v1->len + v2->len; +} + +size_t vec_len(vec* v) { + return v->len; +} + +size_t vec_cap(vec* v) { + return v->capacity; +} + +void vec_free(vec* v) { + free(v->mem); + free(v); +} diff --git a/library/rmc/stubs/README.md b/library/rmc/stubs/README.md new file mode 100644 index 0000000000000..c2972a8c9d8e0 --- /dev/null +++ b/library/rmc/stubs/README.md @@ -0,0 +1,4 @@ +Verification-friendly Vector stubs +---------- + + diff --git a/library/rmc/stubs/Rust/hashset/c_hashset.rs b/library/rmc/stubs/Rust/hashset/c_hashset.rs new file mode 100644 index 0000000000000..8f8fbec8b4c4d --- /dev/null +++ b/library/rmc/stubs/Rust/hashset/c_hashset.rs @@ -0,0 +1,98 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// NOTE: Code in this file and hashset.c is experimental and is meant to be a +// proof-of-concept implementation of the idea. It is unsound and might not work +// with all test cases. More details below. + +// CHashSet is an abstraction of the HashSet library. This is also implemented as +// a Rust frontend and a C backend (similar to CVec). HashSets are hard to reason +// about for verification tools due to the nature of hash functions. To that end, +// this program tries to implement a HashSet for u16 values. +// +// A typical hashset is defined using two main components: +// +// 1. Hash function which maps the values in the input domain to a set of values +// in an output domain. Ideally, the output domain is larger than the input domain +// to ensure that there are special values such as the SENTINEL value which cannot +// be generated through the hashing function. Hash functions are 1:1 injections - +// for a certain input, they will deterministically generate the same output value +// and that no other input will generate that hash value. +// For our case, we have implemented this idea for u16. This implies that +// the input domain is <0 .. u16::MAX>. The output domain is chosen to be +// i16 <-i16::MAX .. i16::MAX>. We can theoretically choose any output domain +// which can provide us with a special value such that it does not lie in the range +// of the hash function. +// +// 2. HashSets also have a map which allow us to check the existence of an element +// in amortized constant O(1) time. They use the hashed value as the key into the +// map to check if a truth value is present. For implementing HashMaps however, we +// need to store the mapped value at the hashed location. +// +// We implement this idea in hashset.c, please refer to that file for implementation +// specifics. + +// c_hashset consists of a pointer to the memory which tracks the hashset allocation +// in the C backend. This is used to exchange information over the FFI boundary. +// +// In theory, this can also be implemented purely in Rust. We chose to implement +// using the C-FFI to leverage CBMC constructs. +// +// But it important to note here that RMC currently does not support unbounded +// structures and arrays; +// Tracking issue: https://github.com/model-checking/rmc/issues/311 +#[repr(C)] +pub struct c_hashset { + domain: *mut int16_t, +} + +// All of the functions below call into implementations defined in vec.c. +// +// For other related future work on how this interface can be automatically +// generated and made cleaner, please refer c_vec.rs. +extern "C" { + // Returns a pointer to a new c_hashset structure. + fn hashset_new() -> *mut c_hashset; + + // Inserts a new value in the hashset. If the value is already present, + // this function returns 0 else, returns 1. + fn hashset_insert(ptr: *mut c_hashset, value: uint16_t) -> uint32_t; + + // Checks if the value is contained in the hashset. Returns 1 if present, 0 + // otherwise. + fn hashset_contains(ptr: *mut c_hashset, value: uint16_t) -> uint32_t; + + // Removes a value from the hashset. If the value is not present, it returns 0 + // else 1. + fn hashset_remove(ptr: *mut c_hashset, value: uint16_t) -> uint32_t; +} + +// The HashSet interface exposed to the user only tracks the pointer to the +// low-level c_hashset structure. All methods defined on this structure act as +// wrappers and call into the C implementation. +// +// The implementation is currently not generic over the contained type. +pub struct HashSet { + ptr: *mut c_hashset, + _marker: PhantomData, +} + +// Wrapper methods which ensure that consumer code does not have to make calls +// to unsafe C-FFI functions. +impl HashSet { + pub fn new() -> Self { + unsafe { HashSet { ptr: hashset_new(), _marker: Default::default() } } + } + + pub fn insert(&mut self, value: uint16_t) -> bool { + unsafe { hashset_insert(self.ptr, value) != 0 } + } + + pub fn contains(&self, value: &uint16_t) -> bool { + unsafe { hashset_contains(self.ptr, *value) != 0 } + } + + pub fn remove(&mut self, value: uint16_t) -> bool { + unsafe { hashset_remove(self.ptr, value) != 0 } + } +} diff --git a/library/rmc/stubs/Rust/vec/c_vec.rs b/library/rmc/stubs/Rust/vec/c_vec.rs new file mode 100644 index 0000000000000..269e49da29f32 --- /dev/null +++ b/library/rmc/stubs/Rust/vec/c_vec.rs @@ -0,0 +1,201 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +mod utils; +use utils::libc::{int16_t, size_t, uint16_t, uint32_t}; + +use std::marker::PhantomData; + +// CVec is an abstraction of the Vector library which is implemented as a Rust-based +// frontend and a C based backend. All public facing methods here are implemented +// as wrappers around FFI functions which call into methods implemented in C. There +// were multiple reasons as to why this abstractions was conceived: +// +// 1. Reduce the cost of translation: RMC translates Rust code into equivalent +// gotoC representation which is used for verification with CBMC. However, this +// might introduce additional overhead due to nesting of calls, monomorphization, +// handling generics, etc. CVec gets around that issue by making direct calls +// to the C implementation. This is usually hard to reason about for most other +// frameworks since they are unable to handle unsafe code. But because of the +// way RMC works, it is almost zero cost to achieve this. +// +// 2. Leverage CBMC primitives: Some CBMC primitives cannot yet be correctly +// translated/handled by RMC, for instance: __CPROVER_constant_infinity_uint. If +// there are improvements in CBMC, a quick way to test their applicability in +// RMC would be to work with this abstraction and perform experiments. + +// A c_vec consists of a pointer to allocated memory, the capacity of the allocation and +// the length of the vector to be used in verification. This structure is also +// defined in c_vec and is used across the FFI boundary. +// +// An important callout to make here is that this structure is currently only defined +// to work with a Vec. This code was meant to experiment with and demonstrate +// the ability to work with CBMCs C interface. +#[repr(C)] +pub struct c_vec { + mem: *mut uint32_t, + len: size_t, + capacity: size_t, +} + +// All of the functions below call into implementations defined in vec.c +// +// We could also move to a more polished definition which is defined in a .h header +// file which is what this interface would need to care about. For now, the +// definition and the implementation reside in vec.c. +// +// Although these are defined manually, it might be worthwhile to look at projects +// such as cbindgen which can generate automatically generate headers for Rust +// code which exposes a public interface. For instance, we could define generic +// Vector representations and have the framework generate headers for us which we +// can then implement. In that case, we would have to implement the C backend in +// such a way that it can handle types of arbitrary sizes by casting memory blocks +// and Vector elements and treating them as such. +// Reference: https://github.com/eqrion/cbindgen +extern "C" { + // Returns pointer to a new c_vec structure. The default capacity of the allocated + // vec is (1073741824 / sizeof(u32)) at the maximum. + fn vec_new() -> *mut c_vec; + + // Returns pointer to a new c_vec structure. The capacity is provided as an + // argument. + fn vec_with_capacity(cap: size_t) -> *mut c_vec; + + // Pushes a new elements to the Vector. If there is not enough space to allocate + // the element, the Vector will resize itself. + fn vec_push(ptr: *mut c_vec, elem: uint32_t); + + // Pop an element out of the Vector. The wrapper function contains a check + // to ensure that we are not popping a value off of an empty Vector. + fn vec_pop(ptr: *mut c_vec) -> uint32_t; + + // Returns the current capacity of allocation. + fn vec_cap(ptr: *mut c_vec) -> size_t; + + // Returns the length of the Vector + fn vec_len(ptr: *mut c_vec) -> size_t; + + // Append Vector represented by ptr2 to ptr1. + fn vec_append(ptr1: *mut c_vec, ptr2: *mut c_vec); + + // Grow the allocated vector in size such that it accomodates atleast + // additional elements. This is similar in behavior to the implementation of + // the Rust Standard Library. Please refer to vec.c for more details. + fn vec_sized_grow(ptr: *mut c_vec, additional: size_t); + + // Free allocated memory for the Vec + fn vec_free(ptr: *mut c_vec); +} + +// The Vec interface which is exposed to the user only tracks the pointer to the +// low-level c_vec structure. All methods defined on this structure act as wrappers +// and call into the C implementation. +// +// The implementation is currently not generic over the contained type. +pub struct Vec { + ptr: *mut c_vec, + _marker: PhantomData, +} + +// Wrapper methods which ensure that consumer code does not have to make calls +// to unsafe C-FFI functions. +impl Vec { + pub fn ptr(&mut self) -> *mut c_vec { + return self.ptr; + } + + pub fn new() -> Self { + unsafe { Vec { ptr: vec_new(), _marker: Default::default() } } + } + + pub fn with_capacity(cap: usize) -> Self { + unsafe { Vec { ptr: vec_with_capacity(cap), _marker: Default::default() } } + } + + pub fn push(&mut self, elem: uint32_t) { + unsafe { + vec_push(self.ptr, elem); + } + } + + // Check if the length of the Vector is 0, in which case we return a None. + // Otherwise, we make a call to the vec_pop() function and wrap the result around + // a Some. + pub fn pop(&mut self) -> Option { + if self.len() == 0 { None } else { unsafe { Some(vec_pop(self.ptr)) } } + } + + pub fn append(&mut self, other: &mut Self) { + unsafe { + vec_append(self.ptr, other.ptr()); + } + } + + pub fn capacity(&self) -> usize { + unsafe { vec_cap(self.ptr) as usize } + } + + pub fn len(&self) -> usize { + unsafe { vec_len(self.ptr) as usize } + } + + pub fn reserve(&mut self, additional: usize) { + unsafe { + vec_sized_grow(self.ptr, additional); + } + } +} + +impl Drop for Vec { + // We have implemented Vec for u32 which does not have any drop semantics + // associated with it. We are only responsible for deallocating the space + // allocated on the C backend for the Vec and the c_vec structure. + // + // For elements of the Vector which need a custom drop, the ideal behavior + // here would be to pop each element from the Vector and call drop_in_place(). + // Refer: https://doc.rust-lang.org/std/ptr/fn.drop_in_place.html + fn drop(&mut self) { + unsafe { + vec_free(self.ptr); + } + } +} + +// Here, we define the rmc_vec! macro which behaves similar to the vec! macro +// found in the std prelude. If we try to override the vec! macro, we get error: +// +// = note: `vec` could refer to a macro from prelude +// note: `vec` could also refer to the macro defined here +// +// Relevant Zulip stream: +// https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Override.20prelude.20macro +// +// The workaround for now is to define a new macro. rmc_vec! will initialize a new +// Vec based on its definition in this file. We support two types of initialization +// expressions: +// +// [ elem; count] - initialize a Vector with element value `elem` occurring count times. +// [ elem1, elem2, ...] - initialize a Vector with elements elem1, elem2... +#[cfg(abs_type = "c-ffi")] +#[macro_export] +macro_rules! rmc_vec { + ( $val:expr ; $count:expr ) => + ({ + let mut result = Vec::with_capacity($count); + let mut i: usize = 0; + while i < $count { + result.push($val); + i += 1; + } + result + }); + ( $( $xs:expr ),* ) => { + { + let mut result = Vec::new(); + $( + result.push($xs); + )* + result + } + }; +} diff --git a/library/rmc/stubs/Rust/vec/mod.rs b/library/rmc/stubs/Rust/vec/mod.rs new file mode 100644 index 0000000000000..4a55687978038 --- /dev/null +++ b/library/rmc/stubs/Rust/vec/mod.rs @@ -0,0 +1,7 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +pub mod c_vec; +pub mod noback_vec; +pub mod rmc_vec; +pub mod utils; diff --git a/library/rmc/stubs/Rust/vec/noback_vec.rs b/library/rmc/stubs/Rust/vec/noback_vec.rs new file mode 100644 index 0000000000000..9bbd109da04c9 --- /dev/null +++ b/library/rmc/stubs/Rust/vec/noback_vec.rs @@ -0,0 +1,246 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::marker::PhantomData; +use std::mem; + +// NoBackVec implements an abstraction of the Vector library which tracks only +// the length of the vector. It does not contain a backing store which implies +// that writes only increment the length and all reads return a non-deterministic +// value. +// +// This abstraction is particularly effective for use cases where the customer +// code only cares about the length of the vector. All length queries are +// fast because the solver does not have to reason about the memory model at all. +// +// This abstraction has several limitations however. Since it does not model any +// memory, defining general methods which operate on the values of the vector is +// hard and in some cases, unsound. Please see the README.md for a more in-depth +// discussion of potential improvements to this abstraction. +// +// NOTE: It would also be difficult to soundly model a Vector where the contained +// type has a non-trivial drop method defined for it. + +// __CPROVER_max_malloc_size is dependent on the number of offset bits used to +// represent a pointer variable. By default, this is chosen to be 56, in which +// case the max_malloc_size is 2 ** (offset_bits - 1). We could go as far as to +// assign the default capacity to be the max_malloc_size but that would be overkill. +// Instead, we choose a high-enough value 2 ** (31 - 1). Another reason to do +// this is that it would be easier for the solver to reason about memory if multiple +// Vectors are initialized by the abstraction consumer. +const DEFAULT_CAPACITY: usize = 1073741824; +const MAX_MALLOC_SIZE: usize = 18014398509481984; + +// The Vec structure here models the length and the capacity. +pub struct Vec { + len: usize, + capacity: usize, + // We use a _marker variable since we want the Vector to be generic over type + // T. It is a zero-sized type which is used to mark things such that they act + // like they own a T. + _marker: PhantomData, +} + +impl Vec { + // The standard library Vec implementation calls reserve() to reserve + // space for an additional element -> self.reserve(1). However, the + // semantics of reserve() are ambiguous. reserve(num) allocates space for + // "atleast num more elements of the containing type". The operation can + // be found in function `grow_amortized()` in raw_vec.rs in the standard + // library. The logic for choosing a new value is: + // self.cap = max(self.cap * 2, self.len + additional) + // We try to implement similar semantics here. + fn grow(&mut self, additional: usize) { + let new_len = self.len + additional; + let grow_cap = self.capacity * 2; + let new_capacity = if new_len > grow_cap { new_len } else { grow_cap }; + + if new_capacity > MAX_MALLOC_SIZE { + panic!("Malloc failed to allocate enough memory"); + } + + self.capacity = new_capacity; + } +} + +impl Vec { + pub fn new() -> Vec { + // By default, we create a vector with a high default capacity. An + // important callout to make here is that it prevents us from discovering + // buffer-overflow bugs since we will (most-likely) always have enough + // space allocated additional to the required vec capacity. + // NOTE: This is however not a concern for this abstaction. + Vec { len: 0, capacity: DEFAULT_CAPACITY, _marker: Default::default() } + } + + // Even though we dont model any memory, we can soundly model the capacity + // of the allocation. + pub fn with_capacity(capacity: usize) -> Self { + Vec { len: 0, capacity: capacity, _marker: Default::default() } + } + + pub fn push(&mut self, elem: T) { + // Please refer to grow() for better understanding the semantics of reserve(). + if self.capacity == self.len { + self.reserve(1); + } + + assert!(self.capacity >= self.len); + // We only increment the length of the vector disregarding the actual + // element added to the Vector. + self.len += 1; + } + + // We check if there are any elements in the Vector. If not, we return a None + // otherwise we return a nondeterministic value since we dont track any concrete + // values in the Vector. + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.len -= 1; + Some(__nondet::()) + } + } + + pub fn append(&mut self, other: &mut Vec) { + let new_len = self.len + other.len; + // Please refer to grow() for better understanding the semantics of grow(). + if self.capacity < new_len { + self.reserve(other.len); + } + + assert!(self.capacity >= new_len); + // Drop all writes, increment the length of the Vector with the size + // of the Vector which is appended. + self.len = new_len; + } + + // At whichever position we insert the new element into, the overall effect on + // the abstraction is that the length increases by 1. + pub fn insert(&mut self, index: usize, elem: T) { + assert!(index <= self.len); + + self.len += 1; + } + + // We only care that the index we are removing from lies somewhere as part of + // the length of the Vector. The only effect on the abstraction is that the + // length decreases by 1. In the case that it is a valid removal, we return a + // nondeterministic value. + pub fn remove(&mut self, index: usize) -> T { + assert!(index < self.len); + + self.len -= 1; + __nondet::() + } + + pub fn extend(&mut self, iter: I) + where + I: Iterator, + { + // We first compute the length of the iterator. + let mut iter_len = 0; + for value in iter { + iter_len += 1; + } + + // Please refer to grow() for better understanding the semantics of grow(). + self.reserve(iter_len); + self.len += iter_len; + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn capacity(&self) -> usize { + self.capacity + } + + // Please refer to grow() for better understanding the semantics of reserve(). + pub fn reserve(&mut self, additional: usize) { + self.grow(additional); + } +} + +// NoBackIter is a structure which implements Iterator suitable for NoBackVec. We +// only track the index values to the start and end of the iterator. +pub struct NoBackIter { + start: usize, + end: usize, + // Please refer to the NoBackvec definition to understand why PhantomData is used + // here. + _marker: PhantomData, +} + +impl NoBackIter { + pub fn new(len: usize) -> Self { + // By default, initialize the start to index 0 and end to the last index + // of the Vector. + NoBackIter { start: 0, end: len, _marker: Default::default() } + } +} + +impl Iterator for NoBackIter { + type Item = T; + + // Unless we are at the end of the array, return a nondeterministic value + // wrapped around a Some. + fn next(&mut self) -> Option { + if self.start == self.end { None } else { Some(__nondet::()) } + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.end - self.start; + (len, Some(len)) + } +} + +impl IntoIterator for Vec { + type Item = T; + type IntoIter = NoBackIter; + + fn into_iter(self) -> NoBackIter { + NoBackIter::new(self.len()) + } +} + +// Here, we define the rmc_vec! macro which behaves similar to the vec! macro +// found in the std prelude. If we try to override the vec! macro, we get error: +// +// = note: `vec` could refer to a macro from prelude +// note: `vec` could also refer to the macro defined here +// +// Relevant Zulip stream: +// https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Override.20prelude.20macro +// +// The workaround for now is to define a new macro. rmc_vec! will initialize a new +// Vec based on its definition in this file. We support two types of initialization +// expressions: +// +// [ elem; count] - initialize a Vector with element value `elem` occurring count times. +// [ elem1, elem2, ...] - initialize a Vector with elements elem1, elem2... +#[cfg(abs_type = "no-back")] +#[macro_export] +macro_rules! rmc_vec { + ( $val:expr ; $count:expr ) => + ({ + let mut result = Vec::new(); + let mut i: usize = 0; + while i < $count { + result.push($val); + i += 1; + } + result + }); + ( $( $xs:expr ),* ) => { + { + let mut result = Vec::new(); + $( + result.push($xs); + )* + result + } + }; +} diff --git a/library/rmc/stubs/Rust/vec/rmc_vec.rs b/library/rmc/stubs/Rust/vec/rmc_vec.rs new file mode 100644 index 0000000000000..02d54122ee461 --- /dev/null +++ b/library/rmc/stubs/Rust/vec/rmc_vec.rs @@ -0,0 +1,1100 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +mod utils; +use utils::libc; + +use std::cmp; +use std::convert::TryFrom; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::iter::FromIterator; +use std::mem; +use std::ops::{Deref, DerefMut, FnMut, Index, IndexMut}; +use std::ptr::{drop_in_place, read}; +use std::slice; + +// __CPROVER_max_malloc_size is dependent on the number of offset bits used to +// represent a pointer variable. By default, this is chosen to be 56, in which +// case the max_malloc_size is 2 ** (offset_bits - 1). We could go as far as to +// assign the default capacity to be the max_malloc_size but that would be overkill. +// Instead, we choose a high-enough value 2 ** 10. Another reason to do +// this is that it would be easier for the solver to reason about memory if multiple +// Vectors are initialized by the abstraction consumer. +// +// For larger array sizes such as 2 ** (31 - 1) we encounter "array size too large +// for flattening" error. +const DEFAULT_CAPACITY: usize = 1024; +const CBMC_MAX_MALLOC_SIZE: usize = 18014398509481984; + +// We choose a constant which will ensure that we dont allocate small vectors. +// Small vectors will lead to more resizing operations and hence slowdown in +// verification performance. It is possible for the consumer of this abstraction +// allocate small buffers, specifically using with_capacity() functions. But there +// are no guarantees made about the allocation once it is full. Even then, the +// user can then choose to shrink_to_fit() if they want to play around with +// tight bounds on the Vec capacity. +const MIN_NON_ZERO_CAP: usize = 1024; + +// RmcVec implements a fine-grained abstraction of the Vector library for Rust. +// It is aimed to provide a lot more functionality than the other two available +// abstractions - NoBackVec and CVec. RmcVec aims to implement close-to-complete +// compatibility with the Rust Standard Library (RSL) implementation. +// +// The goal of RMCVec is to implement basic operations of the Vec such as push(), +// pop(), append(), insert() in a much simpler way than it is done in the RSL. The +// intuition behind this idea is that with a simple trace, it would be much easier +// for verification techniques such as bounded model checking to reason about +// that piece of code. For that reason, we choose to directly drop down to libc +// functions for low-level operations so that they can be directly translated +// to CBMC primitives. That way, if CBMC performs better through some optimizations +// RMC would too. +// +// We first implement RMCRawVec, an auxiliary data structure which holds a pointer +// to allocated memory and the capacity of the allocation. This abstracts away +// all low-level memory resizing operations from the actual Vec data structure. +// It is also used later to implement RMCIter, an iterator for the RMCVec +// data structure. +// +// We then use RMCRawVec as a member of Vec (RMCVec) which is the interface exposed +// to the public. RMCVec aims to main close-to-complete compatibility with the +// RSL Vec implementation. +// +// An important future work direction here is to abstract other relevant +// data-structures such as Strings and HashMaps but implementing optimization +// for slices seems super crucial. Most customer code deals with slices since +// operations such as sort(), split(), get() which traditionally deal with linear +// data structures are implemented on the slice. The advantage of doing it so +// is that other data structures have to implement coercion to a slice and +// can then get all these methods for free. For instance, this is done for Vec, +// String, etc. Currently, we implement coercion to std::slice primitive type allowing +// us to take benefits of that implementation directly but we could get much better +// performance for real world code if we could develop abstractions for that +// as well. Initial intuitions are that it might be harder since those operations +// are typically not verification-friendly. +// +// Please note that this implementation has not been tested against ZSTs - Zero +// Sized Types and might show unsound behavior. + +// RMCRawVec consists of a pointer to allocated memory and another variable tracking +// the capacity of the allocation. +struct RmcRawVec { + ptr: *const T, + cap: usize, +} + +impl RmcRawVec { + fn new() -> Self { + let elem_size = mem::size_of::(); + // NOTE: Currently, this abstraction is not tested against code which has + // zero-sized types. + assert!(elem_size != 0); + // We choose to allocate a Vector of DEFAULT_CAPACITY which is chosen + // to be a very high value. This way, for most tests, the trace will not + // generate any resizing operations. + // + // An important callout to make here is that this however prevents us + // from finding buffer overflow bugs. As we always allocate large enough + // memory, there will always be enough space for writing data after the + // index crosses the length of the array. + let cap = DEFAULT_CAPACITY; + let ptr = unsafe { libc::malloc(cap * elem_size) as *mut T }; + RmcRawVec { ptr, cap } + } + + fn new_with_capacity(cap: usize) -> Self { + let elem_size = mem::size_of::(); + // In this case, allocate space for capacity elements as requested. + let ptr = unsafe { libc::malloc(cap * elem_size) as *mut T }; + RmcRawVec { ptr, cap } + } + + // Checks if the Vector needs to be resized to allocate additional more elements. + fn needs_to_grow(&self, len: usize, additional: usize) -> bool { + additional > self.cap - len + } + + // grow() and grow_exact() are functions which reallocate the memory to a larger + // allocation if we run out of space. These are typically called from wrappers + // such as reserve() and reserve_exact() from Vec. The semantics of both of these + // functions is similar to that implemented in the RSL. It is important to call + // out what they are. + // + // According to the RSL, the reserve() function is defined as: + // + // "Reserves capacity for at least additional more elements to be inserted in + // the given Vec. The collection may reserve more space to avoid frequent + // reallocations. After calling reserve, capacity will be greater than or + // equal to self.len() + additional. + // Does nothing if capacity is already sufficient." + // + // The important point to note here is that it is expected to reserve space + // for "atleast" additional more elements. Because of which, there cannot be + // any guarantees made about how much the exact capacity would be after the + // grow()/reserve() operation is performed. + // + // For the purpose of this implementation, we follow the specifics implemented + // in raw_vec.rs -> grow_amortized(). We choose: + // + // Reference: https://doc.rust-lang.org/src/alloc/raw_vec.rs.html#421 + // + // max ( current_capacity * 2 , current_length + additional ). + // This ensures exponential growth of the allocated memory and also reduces + // the number of resizing operations required. + // + // We also ensure that the new allocation is greater than a certain minimum + // that we want to deal with for verification. + fn grow(&mut self, len: usize, additional: usize) { + let elem_size = mem::size_of::(); + let req_cap = len + additional; + let grow_cap = self.cap * 2; + + let new_cap = if req_cap > grow_cap { req_cap } else { grow_cap }; + let new_cap = if MIN_NON_ZERO_CAP > new_cap { MIN_NON_ZERO_CAP } else { new_cap }; + // As per the definition of reserve() + assert!(new_cap * elem_size <= isize::MAX as usize); + unsafe { + self.ptr = libc::realloc(self.ptr as *mut libc::c_void, new_cap * elem_size) as *mut T; + } + self.cap = new_cap; + } + + fn reserve(&mut self, len: usize, additional: usize) { + if self.needs_to_grow(len, additional) { + self.grow(len, additional); + } + } + + // grow_exact() also poses interesting semantics for the case of our abstraction. + // According to the RSL: + // + // "Reserves the minimum capacity for exactly additional more elements to be inserted in the + // given Vec. After calling reserve_exact, capacity will be greater than or equal to + // self.len() + additional. Does nothing if the capacity is already sufficient. + // Note that the allocator may give the collection more space than it requests. Therefore, + // capacity can not be relied upon to be precisely minimal. Prefer reserve if future insertions + // are expected." + // + // As can be observed, the capacity cannot be relied upon to be precisely minimal. + // However, we try to model the RSL behavior as much as we can. Please refer to + // grow_exact() from rmc_vec.rs for more details. + fn grow_exact(&mut self, len: usize, additional: usize) { + let elem_size = mem::size_of::(); + let req_cap = len + additional; + // The RSL implementation checks if we are growing beyond usize::MAX + // for ZSTs and panics. The idea is that if we need to grow for a ZST, + // that effectively means that something has gone wrong. + assert!(elem_size != 0); + unsafe { + self.ptr = libc::realloc(self.ptr as *mut libc::c_void, req_cap * elem_size) as *mut T; + } + self.cap = req_cap; + } + + fn reserve_exact(&mut self, len: usize, additional: usize) { + if self.needs_to_grow(len, additional) { + self.grow_exact(len, additional); + } + } + + // Reallocate memory such that the allocation size is equal to the exact + // requirement of the Vector. We try to model RSL behavior (refer raw_vec.rs + // shrink()) but according to the RSL: + // + // "It will drop down as close as possible to the length but the allocator + // may still inform the vector that there is space for a few more elements." + // + // Even in this case, no guarantees can be made to ensure that the capacity + // of the allocationa after shrinking would be exactly equal to the length. + fn shrink_to_fit(&mut self, len: usize) { + assert!(len <= self.cap); + let elem_size = mem::size_of::(); + unsafe { + self.ptr = libc::realloc(self.ptr as *mut libc::c_void, len * elem_size) as *mut T; + } + self.cap = len; + } + + fn capacity(&self) -> usize { + self.cap + } +} + +// Since we allocate memory manually, the Drop for RMCVec should ensure that we +// free that allocation. We drop to libc::free since we have a pointer to the memory +// that was allocated by libc::malloc / libc::realloc. +impl Drop for RmcRawVec { + fn drop(&mut self) { + unsafe { + libc::free(self.ptr as *mut _); + } + } +} + +// In theory, there is no need to track the Allocator here. However, the RSL +// implementation of the Vector is generic over the type of the Allocator that +// it takes. Also, many functions are part of impl Blocks which require that the +// Vec be generic over the type of the Allocator that it takes. +// +// We define an empty trait Allocator which shadows std::alloc::Allocator. +// +// We also define an empty RmcAllocator structure here which serves as the default type +// for the Vec data structure. The Vec implemented as part of the Rust Standard +// Library has the Global allocator as its default. +pub trait Allocator {} + +#[derive(Clone, Copy)] +pub struct RmcAllocator {} + +impl RmcAllocator { + pub fn new() -> Self { + RmcAllocator {} + } +} + +// Implement the Allocator trait +impl Allocator for RmcAllocator {} + +// This is the primary Vec abstraction that is exposed to the user. It has a +// RmcRawVec which tracks the underlying memory and values stored in the Vec. We +// also track the length and an allocator instance. +pub struct Vec { + buf: RmcRawVec, + len: usize, + allocator: A, +} + +// Impl block for helper functions. +impl Vec { + fn ptr(&self) -> *mut T { + self.buf.ptr as *mut T + } + + fn with_capacity_in(capacity: usize, allocator: A) -> Self { + Vec { buf: RmcRawVec::new_with_capacity(capacity), len: 0, allocator: allocator } + } +} + +impl Vec { + pub fn new() -> Self { + Vec { buf: RmcRawVec::new(), len: 0, allocator: RmcAllocator::new() } + } + + pub fn with_capacity(cap: usize) -> Self { + Self::with_capacity_in(cap, RmcAllocator::new()) + } + + // A lot of invariants here are not checked: + // * If the pointer was not allocated via a String/Vec, it is highly likely to be + // incorrect. + // * T needs to have the same and alignment as what ptr was allocated with. + // * length needs to be less than or equal to the capacity. + // * capacity needs to be capacity that the pointer was allocated with. + pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self { + // Assert that the alignment of T and the allocated pointer are the same. + assert_eq!(mem::align_of::(), mem::align_of_val(&ptr)); + // Assert that the length is less than or equal to the capacity + assert!(length <= capacity); + // We cannot check if the capacity of the memory pointer to by ptr is + // atleast "capacity", this is to be assumed. + let mut v = Vec { + buf: RmcRawVec::new_with_capacity(capacity), + len: 0, + allocator: RmcAllocator::new(), + }; + unsafe { + let mut curr_idx: isize = 0; + while curr_idx < length as isize { + // The push performed here is cheap as we have already allocated + // enough capacity to hold the data. + v.push_unsafe(read(ptr.offset(curr_idx))); + curr_idx += 1; + } + } + v + } +} + +impl Vec { + pub fn allocator(&self) -> &A { + &self.allocator + } + + pub fn push(&mut self, elem: T) { + // Check if the buffer needs to grow in size, call grow() in that case. + if self.len == self.capacity() { + self.buf.grow(self.len, 1); + } + + unsafe { + *self.ptr().offset(self.len as isize) = elem; + } + self.len += 1; + } + + pub fn push_unsafe(&mut self, elem: T) { + unsafe { + *self.ptr().offset(self.len as isize) = elem; + } + self.len += 1; + } + + // It is important to note that pop() does not trigger any changes in the + // underlying allocation capacity. + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.len -= 1; + unsafe { Some(read(self.ptr().offset(self.len as isize))) } + } + } + + pub fn insert(&mut self, index: usize, elem: T) { + assert!(index <= self.len); + + // Check if the buffer needs to grow in size, call grow() in that case. + if self.capacity() < (self.len + 1) { + self.buf.grow(self.len, 1); + } + + unsafe { + if index < self.len { + // Perform a memmove of all data from the index starting at idx + // to idx+1 to make space for the element to be inserted + libc::memmove( + self.ptr().offset(index as isize + 1) as *mut libc::c_void, + self.ptr().offset(index as isize) as *mut libc::c_void, + (self.len - index) * mem::size_of::(), + ); + } + *self.ptr().offset(index as isize) = elem; + self.len += 1; + } + } + + pub fn remove(&mut self, index: usize) -> T { + assert!(index < self.len); + + unsafe { + self.len -= 1; + let result = read(self.ptr().offset(index as isize)); + if self.len - index > 0 { + // Perform a memmove of all data from the index starting at idx + 1 + // to idx to occupy space created by the element which was removed. + libc::memmove( + self.ptr().offset(index as isize) as *mut libc::c_void, + self.ptr().offset(index as isize + 1) as *mut libc::c_void, + (self.len - index) * mem::size_of::(), + ); + } + result + } + } + + pub fn len(&self) -> usize { + self.len + } + + // Please refer to grow() and grow_exact() for more details() + pub fn reserve(&mut self, additional: usize) { + self.buf.reserve(self.len, additional); + } + + pub fn reserve_exact(&mut self, additional: usize) { + self.buf.reserve(self.len, additional); + } + + // The following safety guarantees must be satisfied: + // + // * new_len must be less than or equal to capacity(). + // * The elements at old_len..new_len must be initialized. + pub unsafe fn set_len(&mut self, new_len: usize) { + assert!(new_len <= self.capacity()); + + self.len = new_len; + } + + pub fn as_mut_ptr(&mut self) -> *mut T { + self.ptr() + } + + // This is possible as we implement the Deref coercion for Vec + pub fn as_slice(&self) -> &[T] { + self + } + + // This is possible as we implement the DerefMut coercion for Vec + pub fn as_mut_slice(&mut self) -> &mut [T] { + self + } + + pub fn as_ptr(&self) -> *const T { + self.buf.ptr + } + + // According to the RSL: + // + // "Shortens the vector, keeping the first len elements and dropping the rest. + // If len is greater than the vector’s current length, this has no effect. + // Note that this method has no effect on the allocated capacity of the vector." + pub fn truncate(&mut self, len: usize) { + unsafe { + if len > self.len { + return; + } + + // Call drop for elements which are truncated + let remaining_len = self.len - len; + while self.len != len { + self.len -= 1; + drop_in_place(self.as_mut_ptr().offset(self.len as isize)); + } + } + } + + // Clears the vector, removing all values. + // This method has no effect on the allocated capacity of the vector + pub fn clear(&mut self) { + self.truncate(0); + } + + // Removes an element from the Vector and returns it. The removed element is + // replaced by the last element of the Vector. This does not preserve ordering, + // but is O(1) - because we dont perform memory resizing operations. + pub fn swap_remove(&mut self, index: usize) -> T { + let len = self.len; + assert!(index < len); + + unsafe { + let last = read(self.as_ptr().add(len - 1)); + let hole = self.as_mut_ptr().add(index); + self.set_len(len - 1); + let prev_hole = read(hole); + *hole = last; + prev_hole + } + } + + // According to the RSL: + // "Returns the number of elements the vector can hold without reallocating." + // The API consumer cannot rely on the precision of this function. + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + // Splits the collection into two at the given index. + // + // Returns a newly allocated vector containing the elements in the range [at, len). After the + // call, the original vector will be left containing the elements [0, at) with its previous + // capacity unchanged. + pub fn split_off(&mut self, at: usize) -> Self + where + A: Clone, + { + assert!(at <= self.len); + + let other_len = self.len - at; + let mut other = Vec::with_capacity_in(other_len, self.allocator().clone()); + + unsafe { + // Copy all the elements from "at" till the end of the Vector through + // a memcpy which is much cheaper than remove() and push() + libc::memcpy( + other.as_mut_ptr() as *mut libc::c_void, + self.as_ptr().offset(at as isize) as *mut libc::c_void, + other_len * mem::size_of::(), + ); + + // Set length to point to end of array. + self.set_len(at); + other.set_len(other_len); + } + + other + } + + pub fn append(&mut self, other: &mut Vec) { + // Reserve enough space to reduce the number of resizing operations + self.reserve(other.len()); + unsafe { + libc::memmove( + self.as_ptr().offset(self.len as isize) as *mut libc::c_void, + other.as_ptr() as *mut libc::c_void, + other.len() * mem::size_of::(), + ); + self.len += other.len(); + other.set_len(0); + } + } + + // Resizes the Vec in-place so that len is equal to new_len. + // + // If new_len is greater than len, the Vec is extended by the difference, with each additional + // slot filled with the result of calling the closure f. The return values from f will end up + // in the Vec in the order they have been generated. + // + // If new_len is less than len, the Vec is simply truncated. + pub fn resize_with(&mut self, new_len: usize, f: F) + where + F: FnMut() -> T, + { + let len = self.len; + + if new_len > len { + let additional = new_len - len; + self.reserve(additional); + let mut closure = f; + for _ in 0..additional { + // This push is cheap as we have already reserved enough space. + self.push_unsafe(closure()); + } + } else { + self.truncate(new_len); + } + } + + // The semantics of shrink() and shrink_to_fit() are similar to that of reserve(). + // According to the RSL: + // + // "Shrinks the capacity of the vector as much as possible. + // It will drop down as close as possible to the length but the allocator may still inform the + // vector that there is space for a few more elements." + // + // There cannot be any guarantees made that the capacity will be changed + // to fit the length of the Vector exactly. + pub fn shrink_to_fit(&mut self) { + if self.capacity() > self.len { + self.buf.shrink_to_fit(self.len); + } + } + + // This is an experimental API. According to the RSL: + // + // "Shrinks the capacity of the vector with a lower bound. + // The capacity will remain at least as large as both the length and the supplied value. + // If the current capacity is less than the lower limit, this is a no-op." + pub fn shrink_to(&mut self, min_capacity: usize) { + if self.capacity() > min_capacity { + let max = if self.len > min_capacity { self.len } else { min_capacity }; + self.buf.shrink_to_fit(max); + } + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn new_in(alloc: A) -> Self { + Vec { buf: RmcRawVec::new(), len: 0, allocator: alloc } + } +} + +impl Vec { + // Resizes the Vec in-place so that len is equal to new_len. + // + // If new_len is greater than len, the Vec is extended by the difference, with each additional + // slot filled with value. If new_len is less than len, the Vec is simply truncated. + // + // This method requires T to implement Clone, in order to be able to clone the passed value. + pub fn resize(&mut self, new_len: usize, value: T) { + let len = self.len; + + if new_len > len { + let additional = new_len - len; + self.reserve(additional); + for _ in 0..additional { + // This push is cheap as we have already reserved enough space. + self.push_unsafe(value.clone()); + } + } else { + self.truncate(new_len); + } + } + + // Clones and appends all elements in a slice to the Vec. + // + // Iterates over the slice other, clones each element, and then appends it to this Vec. The + // other vector is traversed in-order. + pub fn extend_from_slice(&mut self, other: &[T]) { + let other_len = other.len(); + self.reserve(other_len); + for i in 0..other_len { + // This push is cheap as we have already reserved enough space. + self.push_unsafe(other[i].clone()); + } + } +} + +// Drop is codegen for most types, no need to perform any action here. +impl Drop for Vec { + fn drop(&mut self) {} +} + +// Trait implementations for Vec +// We try to implement all major traits for Vec which might be priority for +// our customers. +impl Default for Vec { + fn default() -> Self { + Vec::new() + } +} + +impl PartialEq for Vec { + fn eq(&self, other: &Self) -> bool { + if self.len != other.len() { + return false; + } + + for idx in 0..self.len { + if self[idx] != other[idx] { + return false; + } + } + + return true; + } +} + +// We implement the PartialEq trait for Vec with other slices by using a generic +// macro. As we implement the Deref coercion, we can perform self[index] and compare +// it with the RHS. +macro_rules! __impl_slice_eq1 { + ([$($vars:tt)*] $lhs:ty, $rhs:ty) => { + impl PartialEq<$rhs> for $lhs + where + T: PartialEq, A: Allocator + { + #[inline] + fn eq(&self, other: &$rhs) -> bool { self[..] == other[..] } + #[inline] + fn ne(&self, other: &$rhs) -> bool { self[..] != other[..] } + } + } +} + +__impl_slice_eq1! { [A] Vec, &[U] } +__impl_slice_eq1! { [A] Vec, &mut [U] } +__impl_slice_eq1! { [A] &[T], Vec } +__impl_slice_eq1! { [A] &mut [T], Vec } +__impl_slice_eq1! { [A, const N: usize] Vec, [U; N] } +__impl_slice_eq1! { [A, const N: usize] Vec, &[U; N] } + +// Coercion support into Deref allows us to benefit from operations on slice +// implemented in the standard library. Quoting the RSL: +// +// "Deref coercion is a convenience that Rust performs on arguments to functions +// and methods. Deref coercion works only on types that implement the Deref trait. +// Deref coercion converts such a type into a reference to another type. Deref coercion +// happens automatically when we pass a reference to a particular type’s value +// as an argument to a function or method that doesn’t match the parameter type +// in the function or method definition. A sequence of calls to the deref method +// converts the type we provided into the type the parameter needs." +// +// For our case, the deref coercion implemented here can convert a Vec into a +// primitive slice type. This allows us to benefit from methods implemented +// on the slice type such as sort(), split(), etc. +impl Deref for Vec { + type Target = [T]; + + fn deref(&self) -> &[T] { + unsafe { ::std::slice::from_raw_parts(self.ptr(), self.len) } + } +} + +impl DerefMut for Vec { + fn deref_mut(&mut self) -> &mut [T] { + unsafe { ::std::slice::from_raw_parts_mut(self.ptr() as *mut T, self.len) } + } +} + +// Clone +impl Clone for Vec { + fn clone(&self) -> Self { + let mut v = Self::with_capacity_in(self.len, self.allocator.clone()); + for idx in 0..self.len { + v.push_unsafe(self[idx].clone()); + } + v + } + + fn clone_from(&mut self, other: &Self) { + *self = other.clone(); + } +} + +// Hash +impl Hash for Vec { + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +// Index +impl, A: Allocator> Index for Vec { + type Output = I::Output; + + fn index(&self, index: I) -> &Self::Output { + Index::index(&**self, index) + } +} + +// IndexMut +impl, A: Allocator> IndexMut for Vec { + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut **self, index) + } +} + +// From the RSL: +// +// "Extend a collection with the contents of an iterator. +// Iterators produce a series of values, and collections can also be thought of +// as a series of values. The Extend trait bridges this gap, allowing you to +// extend a collection by including the contents of that iterator. When extending +// a collection with an already existing key, that entry is updated or, in the +// case of collections that permit multiple entries with equal keys, that +// entry is inserted." +// +// We cannot reserve space for the elements which are added as we dont know +// the size of the iterator. In this case, we perform sequential push operations. +// However because our underlying Vector grows exponential in size, we can be +// sure that we won't perform too many resizing operations. +impl Extend for Vec { + fn extend>(&mut self, iter: I) { + for elem in iter.into_iter() { + self.push(elem); + } + } +} + +impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec { + fn extend>(&mut self, iter: I) { + for elem in iter.into_iter() { + self.push(*elem); + } + } +} + +impl PartialOrd for Vec { + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +impl Eq for Vec {} + +impl Ord for Vec { + fn cmp(&self, other: &Self) -> cmp::Ordering { + Ord::cmp(&**self, &**other) + } +} + +impl AsRef> for Vec { + fn as_ref(&self) -> &Vec { + self + } +} + +impl AsMut> for Vec { + fn as_mut(&mut self) -> &mut Vec { + self + } +} + +// AsRef to a slice is possible because we implement the Deref coercion. +impl AsRef<[T]> for Vec { + fn as_ref(&self) -> &[T] { + self + } +} + +// AsMut to a slice is possible because we implement the Deref coercion +impl AsMut<[T]> for Vec { + fn as_mut(&mut self) -> &mut [T] { + self + } +} + +// Debug +impl fmt::Debug for Vec { + // fmt implementation left empty since we dont care about debug messages + // and such in the verification case + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Ok(()) + } +} + +// Create a new Vec from a slice reference +impl From<&[T]> for Vec { + fn from(s: &[T]) -> Vec { + let s_len = s.len(); + // Reserve space for atleast s.len() elements to avoid resizing + let mut v = Vec::with_capacity(s_len); + for i in 0..s_len { + // This push is cheap as we reserve enough space earlier. + v.push_unsafe(s[i].clone()); + } + v + } +} + +// Create a new Vec from a slice mut reference +impl From<&mut [T]> for Vec { + fn from(s: &mut [T]) -> Vec { + let s_len = s.len(); + // Reserve space for atleast s.len() elements to avoid resizing + let mut v = Vec::with_capacity(s_len); + for i in 0..s_len { + // This push is cheap as we reserve enough space earlier. + v.push_unsafe(s[i].clone()); + } + v + } +} + +// Create a new Vec from an array +impl From<[T; N]> for Vec { + fn from(s: [T; N]) -> Vec { + // Reserve space for atleast s.len() elements to avoid resizing + let mut v = Vec::with_capacity(s.len()); + for elem in s { + // This push is cheap as we reserve enough space earlier. + v.push_unsafe(elem); + } + v + } +} + +impl From<&str> for Vec { + fn from(s: &str) -> Vec { + From::from(s.as_bytes()) + } +} + +// Gets the entire contents of the `Vec` as an array, +// if its size exactly matches that of the requested array. +impl TryFrom> for [T; N] { + type Error = Vec; + + fn try_from(mut vec: Vec) -> Result<[T; N], Vec> { + if vec.len() != N { + return Err(vec); + } + + unsafe { + vec.set_len(0); + } + + let array = unsafe { read(vec.as_ptr() as *const [T; N]) }; + Ok(array) + } +} + +// We implement an IntoIterator for (Rmc)Vec using a custom structure - +// RmcIter. For RmcIter, we implement RmcRawValIter as a member which stores +// raw pointers to the start and end of memory of the sequence. +struct RmcRawValIter { + start: *const T, + end: *const T, +} + +impl RmcRawValIter { + unsafe fn new(slice: &[T]) -> Self { + RmcRawValIter { + // The pointer to the slice marks its beginning + start: slice.as_ptr(), + end: if mem::size_of::() == 0 { + // Handle ZST (Zero-sized types) + ((slice.as_ptr() as usize) + slice.len()) as *const _ + } else if slice.len() == 0 { + // If the length of the slice is 0, the pointer to the slice also + // marks its end + slice.as_ptr() + } else { + // For the general case, compute offset from the start by counting + // slice.len() elements. + slice.as_ptr().offset(slice.len() as isize) + }, + } + } +} + +// An interface for dealing with iterators. +impl Iterator for RmcRawValIter { + type Item = T; + + // Yield the next element of the sequence. This method changes the internal + // state of the iterator. + fn next(&mut self) -> Option { + // If we have already reached the end, yield a None value. According to + // the documentation, individual implementations may or may not choose + // to return a Some() again at some point. In our case, we dont. + if self.start == self.end { + None + } else { + unsafe { + let result = read(self.start); + self.start = if mem::size_of::() == 0 { + // Handle ZSTs correctly + (self.start as usize + 1) as *const _ + } else { + // For the general case, offset increment the start by 1. + self.start.offset(1) + }; + Some(result) + } + } + } +} + +// An iterator able to yield elements from both ends. +// +// Something that implements DoubleEndedIterator has one extra capability over +// something that implements Iterator: the ability to also take Items from the back, +// as well as the front. +// +// once a DoubleEndedIterator returns None from a next_back(), calling it again +// may or may not ever return Some again +impl DoubleEndedIterator for RmcRawValIter { + fn next_back(&mut self) -> Option { + // If we have already consumed the iterator, return a None. According to + // the documentation, individual implementations may or may not choose + // to return a Some() again at some point. In our case, we dont. + if self.start == self.end { + None + } else { + unsafe { + self.end = if mem::size_of::() == 0 { + // Handle ZSTs + (self.end as usize - 1) as *const _ + } else { + // Offset decrement the end by 1 + self.end.offset(-1) + }; + // Read from end and wrap around a Some() + Some(read(self.end)) + } + } + } +} + +// RmcIntoIter contains a RmcRawVec and RmcRawValIter to track the Vector and +// the Iterator. This exposes a public interface which can be used with Vec. +pub struct RmcIntoIter { + _buf: RmcRawVec, + iter: RmcRawValIter, +} + +impl Iterator for RmcIntoIter { + type Item = T; + + fn next(&mut self) -> Option { + self.iter.next() + } +} + +impl DoubleEndedIterator for RmcIntoIter { + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +// Implement IntoIterator for Vec +// +// By implementing IntoIterator for a type, you define how it will be converted +// to an iterator. +impl IntoIterator for Vec { + type Item = T; + type IntoIter = RmcIntoIter; + + fn into_iter(self) -> RmcIntoIter { + unsafe { + let iter = RmcRawValIter::new(&self); + let buf = read(&self.buf); + // into_iter() takes self by value, and it consumes that collection. + // For that reason, we need to ensure that the destructor for the Vec + // is not called since that will free the underlying buffer. In that + // case, we need to take ownership of the data while making sure + // that the destructor is not called. mem::forget allows us to do + // that. We implement a Drop for RmcIntoIter to ensure that elements + // which were not yielded are dropped appropriately. + // + // For reference: https://doc.rust-lang.org/nomicon/vec-into-iter.html + mem::forget(self); + + RmcIntoIter { iter, _buf: buf } + } + } +} + +// FromIterator defines how a Vec will be created from an Iterator. +impl FromIterator for Vec { + fn from_iter>(iter: I) -> Vec { + let mut v = Vec::new(); + for elem in iter.into_iter() { + v.push_unsafe(elem); + } + v + } +} + +// IntoIterator defines how we can convert a Vec into a struct which implements +// Iterator. For our case, we choose std::Iter. +impl<'a, T, A: Allocator> IntoIterator for &'a Vec { + type Item = &'a T; + type IntoIter = slice::Iter<'a, T>; + + fn into_iter(self) -> slice::Iter<'a, T> { + self.iter() + } +} + +impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec { + type Item = &'a mut T; + type IntoIter = slice::IterMut<'a, T>; + + fn into_iter(self) -> slice::IterMut<'a, T> { + self.iter_mut() + } +} + +// Here, we define the rmc_vec! macro which behaves similar to the vec! macro +// found in the std prelude. If we try to override the vec! macro, we get error: +// +// = note: `vec` could refer to a macro from prelude +// note: `vec` could also refer to the macro defined here +// +// Relevant Zulip stream: +// https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Override.20prelude.20macro +// +// The workaround for now is to define a new macro. rmc_vec! will initialize a new +// Vec based on its definition in this file. We support two types of initialization +// expressions: +// +// [ elem; count] - initialize a Vector with element value `elem` occurring count times. +// [ elem1, elem2, ...] - initialize a Vector with elements elem1, elem2... +#[cfg(abs_type = "rmc")] +#[macro_export] +macro_rules! rmc_vec { + ( $val:expr ; $count:expr ) => + ({ + // Reserve space for atleast $count elements to avoid resizing operations + let mut result = Vec::with_capacity($count); + let mut i: usize = 0; + while i < $count { + result.push($val); + i += 1; + } + result + }); + ( $( $xs:expr ),* ) => { + { + let mut result = Vec::new(); + $( + result.push($xs); + )* + result + } + }; +} diff --git a/library/rmc/stubs/Rust/vec/utils.rs b/library/rmc/stubs/Rust/vec/utils.rs new file mode 100644 index 0000000000000..1a211add2213e --- /dev/null +++ b/library/rmc/stubs/Rust/vec/utils.rs @@ -0,0 +1,17 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// This file should contain imports and methods which can be used across the +// different abstractions. + +// We use methods from libc as they are directly translated into CBMC primitives. +// In which case, if CBMC does better by implementing any optimizations on these +// operations, RMC would do better too. +pub extern crate libc; + +// Currently, the way we handle non-determinism is to implement a __nondet::::() +// function which is stubbed to be `unimplemented!()`. However, at a later time +// it could be possible to implement a Nondet trait per type. This would with +// enum types such as Option where we could decide whether we want to return +// a None or a Some(Nondet). That method would likely end up in this file so +// that it can be used throughout. diff --git a/scripts/rmc b/scripts/rmc index 6e1ec91b7c861..fc33a50134fae 100755 --- a/scripts/rmc +++ b/scripts/rmc @@ -11,6 +11,7 @@ import pathlib MY_PATH = pathlib.Path(__file__).parent.parent.absolute() RMC_C_LIB = MY_PATH / "library" / "rmc" / "rmc_lib.c" +RMC_C_STUB = MY_PATH / "library" / "rmc" / "stubs" / "C" EXIT_CODE_SUCCESS = 0 CBMC_VERIFICATION_FAILURE_EXIT_CODE = 10 @@ -45,7 +46,7 @@ def main(): c_filename = base + ".c" symbols_filename = base + ".symbols" - if EXIT_CODE_SUCCESS != rmc.compile_single_rust_file(args.input, json_filename, args.verbose, args.debug, args.keep_temps, args.mangler, args.dry_run, []): + if EXIT_CODE_SUCCESS != rmc.compile_single_rust_file(args.input, json_filename, args.verbose, args.debug, args.keep_temps, args.mangler, args.dry_run, args.use_abs, args.abs_type, []): return 1 if EXIT_CODE_SUCCESS != rmc.symbol_table_to_gotoc(json_filename, goto_filename, args.verbose, args.keep_temps, args.dry_run): @@ -53,6 +54,15 @@ def main(): args.c_lib.append(str(RMC_C_LIB)) + # Check if the program wants to use the C-FFI based abstraction + if args.use_abs and args.abs_type == "c-ffi": + RMC_C_VEC = RMC_C_STUB / "vec" / "vec.c" + args.c_lib.append(str(RMC_C_VEC)) + + # Currently experimental + RMC_C_HASHSET = RMC_C_STUB / "hashset" / "hashset.c" + args.c_lib.append(str(RMC_C_HASHSET)) + if EXIT_CODE_SUCCESS != rmc.link_c_lib(goto_filename, goto_filename, args.c_lib, args.verbose, args.quiet, args.function, args.dry_run): return 1 diff --git a/scripts/rmc.py b/scripts/rmc.py index faebc5fac0a54..bc69ce7b1aab4 100644 --- a/scripts/rmc.py +++ b/scripts/rmc.py @@ -138,16 +138,24 @@ def run_cmd(cmd, label=None, cwd=None, env=None, output_to=None, quiet=False, ve return process.returncode # Generates a symbol table from a rust file -def compile_single_rust_file(input_filename, output_filename, verbose=False, debug=False, keep_temps=False, mangler="v0", dry_run=False, symbol_table_passes=[]): +def compile_single_rust_file(input_filename, output_filename, verbose=False, debug=False, keep_temps=False, mangler="v0", dry_run=False, use_abs=False, abs_type="std", symbol_table_passes=[]): if not keep_temps: atexit.register(delete_file, output_filename) - + build_cmd = [RMC_RUSTC_EXE, - "-Z", "codegen-backend=gotoc", - "-Z", "trim-diagnostic-paths=no", - "-Z", f"symbol-mangling-version={mangler}", - "-Z", f"symbol_table_passes={' '.join(symbol_table_passes)}", - f"--cfg={RMC_CFG}", "-o", output_filename, input_filename] + "-Z", "codegen-backend=gotoc", + "-Z", "trim-diagnostic-paths=no", + "-Z", f"symbol-mangling-version={mangler}", + "-Z", f"symbol_table_passes={' '.join(symbol_table_passes)}", + f"--cfg={RMC_CFG}"] + + if use_abs: + build_cmd += ["-Z", "force-unstable-if-unmarked=yes", + "--cfg=use_abs", + "--cfg", f'abs_type="{abs_type}"'] + + build_cmd += ["-o", output_filename, input_filename] + if "RUSTFLAGS" in os.environ: build_cmd += os.environ["RUSTFLAGS"].split(" ") build_env = os.environ diff --git a/scripts/rmc_flags.py b/scripts/rmc_flags.py index 23364127ad7e9..d9806763a5e59 100644 --- a/scripts/rmc_flags.py +++ b/scripts/rmc_flags.py @@ -165,6 +165,10 @@ def add_developer_flags(make_group, add_flag, config): help="Pass through directly to CBMC; must be the last flag") add_flag(group, "--mangler", default="v0", choices=["v0", "legacy"], help="Change what mangler is used by the Rust compiler") + add_flag(group, "--use-abs", default=False, action=BooleanOptionalAction, + help="Use abstractions for the standard library") + add_flag(group, "--abs-type", default="std", choices=["std", "rmc", "c-ffi", "no-back"], + help="Choose abstraction for modules of standard library if available") # Adds the flags common to both rmc and cargo-rmc. # Allows you to specify flags/groups of flags to not add. diff --git a/src/test/rmc-prelude.rs b/src/test/rmc-prelude.rs index 32949166a1f90..0cbec8f7cbb09 100644 --- a/src/test/rmc-prelude.rs +++ b/src/test/rmc-prelude.rs @@ -1,6 +1,39 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 OR MIT +// Passing a --use-abs flag to rmc allows rmc users to replace out parts of the +// standard library with simpler, verification-friendly stubs. The prelude chooses +// a specific abstraction depending on the --abs-type flag given to rmc. +// This is currently only implemented for the Vec abstractionbut a PoC code for +// HashSet is given too. +// +// Eventually we would want to move to a more stable method of performing +// stubbing. +// Tracking issue: https://github.com/model-checking/rmc/issues/455 +// +// The default option "std" uses the standard library implementation. +// rmc uses the fine-grained, std compatible but verification-friendly implementation +// C-FFI and NoBackVec are more experimental abstractions. + +#[cfg(not(use_abs))] +use std::vec::Vec; + +#[cfg(use_abs)] +#[cfg(abs_type = "rmc")] +include! {"../../library/rmc/stubs/Rust/vec/rmc_vec.rs"} + +#[cfg(use_abs)] +#[cfg(abs_type = "no-back")] +include! {"../../library/rmc/stubs/Rust/vec/noback_vec.rs"} + +#[cfg(use_abs)] +#[cfg(abs_type = "c-ffi")] +include! {"../../library/rmc/stubs/Rust/vec/c_vec.rs"} + +#[cfg(use_abs)] +#[cfg(abs_type = "c-ffi")] +include! {"../../library/rmc/stubs/Rust/hashset/c_hashset.rs"} + fn __VERIFIER_assume(cond: bool) { unimplemented!() }