Skip to content

Commit

Permalink
Use MAX_PREALLOCATION consistently
Browse files Browse the repository at this point in the history
Use `MAX_PREALLOCATION` both when reading a vec from bytes and when
decoding each element.
  • Loading branch information
serban300 committed Jun 19, 2024
1 parent 51106c8 commit 744c41c
Showing 1 changed file with 101 additions and 93 deletions.
194 changes: 101 additions & 93 deletions src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ use crate::{
DecodeFinished, Error,
};

pub(crate) const MAX_PREALLOCATION: usize = 4 * 1024;
const A_BILLION: u32 = 1_000_000_000;

/// Trait that allows reading of data into a slice.
Expand Down Expand Up @@ -834,52 +833,6 @@ pub(crate) fn encode_slice_no_len<T: Encode, W: Output + ?Sized>(slice: &[T], de
}
}

/// Decode the vec (without a prepended len).
///
/// This is equivalent to decode all elements one by one, but it is optimized in some
/// situation.
pub fn decode_vec_with_len<T: Decode, I: Input>(
input: &mut I,
len: usize,
) -> Result<Vec<T>, Error> {
fn decode_unoptimized<I: Input, T: Decode>(
input: &mut I,
items_len: usize,
) -> Result<Vec<T>, Error> {
let input_capacity = input
.remaining_len()?
.unwrap_or(MAX_PREALLOCATION)
.checked_div(mem::size_of::<T>())
.unwrap_or(0);
let mut r = Vec::with_capacity(input_capacity.min(items_len));
input.descend_ref()?;
for _ in 0..items_len {
r.push(T::decode(input)?);
}
input.ascend_ref();
Ok(r)
}

macro_rules! decode {
( $ty:ty, $input:ident, $len:ident ) => {{
if cfg!(target_endian = "little") || mem::size_of::<T>() == 1 {
let vec = read_vec_from_u8s::<_, $ty>($input, $len)?;
Ok(unsafe { mem::transmute::<Vec<$ty>, Vec<T>>(vec) })
} else {
decode_unoptimized($input, $len)
}
}};
}

with_type_info! {
<T as Decode>::TYPE_INFO,
decode(input, len),
{
decode_unoptimized(input, len)
},
}
}

impl_for_non_zero! {
NonZeroI8,
NonZeroI16,
Expand Down Expand Up @@ -1113,71 +1066,126 @@ impl<T: Encode> Encode for [T] {
}
}

struct VecDecoder<T> {
decoded_vec: Vec<T>,
num_undecoded_items: usize,
chunk_len: usize,
}

impl<T> VecDecoder<T> {
const MAX_PREALLOCATION: usize = 4 * 1024;

fn new(len: usize) -> Self {
debug_assert!(Self::MAX_PREALLOCATION >= mem::size_of::<T>(), "Invalid precondition");
let chunk_len = Self::MAX_PREALLOCATION / mem::size_of::<T>();

Self { decoded_vec: vec![], num_undecoded_items: len, chunk_len }
}

fn decode_chunked<F>(mut self, mut decode_chunk: F) -> Result<Vec<T>, Error>
where
F: FnMut(&mut Vec<T>, usize) -> Result<(), Error>,
{
while self.num_undecoded_items > 0 {
let chunk_len = self.chunk_len.min(self.num_undecoded_items);
self.decoded_vec.reserve_exact(chunk_len);

decode_chunk(&mut self.decoded_vec, chunk_len)?;

self.num_undecoded_items = self.num_undecoded_items.saturating_sub(chunk_len);
}

Ok(self.decoded_vec)
}
}

/// Create a `Vec<T>` by casting directly from a buffer of read `u8`s
///
/// The encoding of `T` must be equal to its binary representation, and size of `T` must be less or
/// equal to [`MAX_PREALLOCATION`].
pub(crate) fn read_vec_from_u8s<I, T>(input: &mut I, items_len: usize) -> Result<Vec<T>, Error>
/// The encoding of `T` must be equal to its binary representation, and size of `T` must be less
/// or equal to [`MAX_PREALLOCATION`].
fn read_vec_from_bytes<T, I>(input: &mut I, len: usize) -> Result<Vec<T>, Error>
where
I: Input,
T: ToMutByteSlice + Default + Clone,
I: Input,
{
debug_assert!(MAX_PREALLOCATION >= mem::size_of::<T>(), "Invalid precondition");
let decoder = VecDecoder::<T>::new(len);

let byte_len = items_len
let byte_len = len
.checked_mul(mem::size_of::<T>())
.ok_or("Item is too big and cannot be allocated")?;

let input_len = input.remaining_len()?;

// If there is input len and it cannot be pre-allocated then return directly.
if input_len.map(|l| l < byte_len).unwrap_or(false) {
return Err("Not enough data to decode vector".into());
// Check if there is enough data in the input buffer.
if let Some(input_len) = input.remaining_len()? {
if input_len < byte_len {
return Err("Not enough data to decode vector".into());
}
}

// In both these branches we're going to be creating and resizing a Vec<T>,
// but casting it to a &mut [u8] for reading.
decoder.decode_chunked(|decoded_vec, chunk_len| {
let decoded_vec_len = decoded_vec.len();
let decoded_vec_size = decoded_vec_len * mem::size_of::<T>();
unsafe {
decoded_vec.set_len(decoded_vec_len + chunk_len);
}

// Note: we checked that if input_len is some then it can preallocated.
let r = if input_len.is_some() || byte_len < MAX_PREALLOCATION {
// Here we pre-allocate the whole buffer.
let mut items: Vec<T> = vec![Default::default(); items_len];
let bytes_slice = items.as_mut_byte_slice();
input.read(bytes_slice)?;
let bytes_slice = decoded_vec.as_mut_byte_slice();
input.read(&mut bytes_slice[decoded_vec_size..])
})
}

items
} else {
// An allowed number of preallocated item.
// Note: `MAX_PREALLOCATION` is expected to be more or equal to size of `T`, precondition.
let max_preallocated_items = MAX_PREALLOCATION / mem::size_of::<T>();
fn decode_vec_from_items<T, I>(input: &mut I, len: usize) -> Result<Vec<T>, Error>
where
T: Decode,
I: Input,
{
let decoder = VecDecoder::<T>::new(len);

// Here we pre-allocate only the maximum pre-allocation
let mut items: Vec<T> = vec![];
// Check if there is enough data in the input buffer.
if let Some(input_len) = input.remaining_len()? {
if input_len < len {
return Err("Not enough data to decode vector".into());
}
}

let mut items_remains = items_len;
input.descend_ref()?;
let vec = decoder.decode_chunked(|decoded_vec, chunk_len| {
for _ in 0..chunk_len {
decoded_vec.push(T::decode(input)?);
}

while items_remains > 0 {
let items_len_read = max_preallocated_items.min(items_remains);
Ok(())
})?;
input.ascend_ref();

let items_len_filled = items.len();
let items_new_size = items_len_filled + items_len_read;
Ok(vec)
}

items.reserve_exact(items_len_read);
unsafe {
items.set_len(items_new_size);
/// Decode the vec (without a prepended len).
///
/// This is equivalent to decode all elements one by one, but it is optimized in some
/// situation.
pub fn decode_vec_with_len<T: Decode, I: Input>(
input: &mut I,
len: usize,
) -> Result<Vec<T>, Error> {
macro_rules! decode {
( $ty:ty, $input:ident, $len:ident ) => {{
if cfg!(target_endian = "little") || mem::size_of::<T>() == 1 {
let vec = read_vec_from_bytes::<$ty, _>($input, $len)?;
Ok(unsafe { mem::transmute::<Vec<$ty>, Vec<T>>(vec) })
} else {
decode_vec_from_items::<T, _>($input, $len)
}
}};
}

let bytes_slice = items.as_mut_byte_slice();
let bytes_len_filled = items_len_filled * mem::size_of::<T>();
input.read(&mut bytes_slice[bytes_len_filled..])?;

items_remains = items_remains.saturating_sub(items_len_read);
}

items
};

Ok(r)
with_type_info! {
<T as Decode>::TYPE_INFO,
decode(input, len),
{
decode_vec_from_items::<T, _>(input, len)
},
}
}

impl<T> WrapperTypeEncode for Vec<T> {}
Expand Down Expand Up @@ -1898,7 +1906,7 @@ mod tests {
}
}

let len = MAX_PREALLOCATION * 2 + 1;
let len = VecDecoder::<u8>::MAX_PREALLOCATION * 2 + 1;
let mut i = Compact(len as u32).encode();
i.resize(i.len() + len, 0);
assert_eq!(<Vec<u8>>::decode(&mut NoLimit(&i[..])).unwrap(), vec![0u8; len]);
Expand Down

0 comments on commit 744c41c

Please sign in to comment.