From 31a0356ee344e437a0113ebea577afa1e7698050 Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Wed, 26 Jan 2022 18:35:14 +0100 Subject: [PATCH] Mega refactor: PoC/preparation for 0.3 This refactors lot of stuff based on knowledge learned over past years and lot of thinking. Apart from obvious details like using `Infallible` or the `alloc` crate, this introduces the concept of maybe uninitialized buffers. Their actual initializedness is tracked as typestate or dynamically as needed. This should enable efficient bridging with `std` among other things. The code contains quite a bit of `unsafe` that needs better auditing and ther's still shitload of work to do but looks like this direction is good. Closes #23 --- Cargo.toml | 9 +- src/alloc_impls.rs | 277 +++++++++++ src/bufio.rs | 243 ++-------- src/error.rs | 39 +- src/ext.rs | 24 +- src/lib.rs | 386 ++++++--------- src/std_impls.rs | 127 ++--- src/util/bytes.rs | 11 +- src/util/chain.rs | 12 +- src/util/empty.rs | 18 +- src/util/mod.rs | 8 +- src/util/repeat.rs | 22 +- src/util/repeat_bytes.rs | 40 +- src/util/restarting.rs | 12 +- src/util/sink.rs | 7 +- src/util/write_trunc.rs | 20 +- uninit_buffer/Cargo.toml | 14 + uninit_buffer/src/buf_init.rs | 205 ++++++++ uninit_buffer/src/lib.rs | 858 ++++++++++++++++++++++++++++++++++ 19 files changed, 1691 insertions(+), 641 deletions(-) create mode 100644 src/alloc_impls.rs create mode 100644 uninit_buffer/Cargo.toml create mode 100644 uninit_buffer/src/buf_init.rs create mode 100755 uninit_buffer/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 0cfc418..6170013 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,13 +22,10 @@ be better at expressing what kinds of error can happen. edition = "2018" [dependencies] -void = { version = "1", default-features = false } byteorder = { version = "1", optional = true } -possibly_uninit = "0.1" +uninit_buffer = { path = "uninit_buffer", default-features = false } [features] default = ["std"] -std = ["void/std"] - -# Deprecated, kept for backward compatibility -use_std = ["std"] +std = ["alloc", "uninit_buffer/std"] +alloc = ["uninit_buffer/alloc"] diff --git a/src/alloc_impls.rs b/src/alloc_impls.rs new file mode 100644 index 0000000..58b1358 --- /dev/null +++ b/src/alloc_impls.rs @@ -0,0 +1,277 @@ +use crate::{Read, Write, ExtendFromReader, ExtendError, OutBuf, OutBytes}; +use crate::error::BufError; +use crate::bufio::{BufWrite, BufRead, BufReadRequire, BufReadProgress}; +use core::convert::Infallible; +use core::mem::MaybeUninit; +use core::fmt; +use alloc::vec::Vec; +use uninit_buffer::possibly_uninit::slice::BorrowOutSlice; + +const DEFAULT_BUF_SIZE: usize = 8 * 1024; + +#[cfg(not(feature = "vec_try_reserve"))] +#[derive(Debug, Clone)] +pub struct AllocError(core::convert::Infallible); + +#[cfg(feature = "vec_try_reserve")] +#[derive(Debug, Clone)] +pub struct AllocError(alloc::collections::TryReserveError); + +impl fmt::Display for AllocError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for AllocError { + #[inline] + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.0.source() + } +} + +/// Use try_reserve or reserve depending on enabled feature. +fn try_reserve(vec: &mut Vec, required: usize) -> Result<(), AllocError> { + #[cfg(not(feature = "vec_try_reserve"))] + vec.reserve(required); + + #[cfg(feature = "vec_try_reserve")] + vec.try_reserve(required)?; + Ok(()) +} + +fn try_reserve_exact(vec: &mut Vec, required: usize) -> Result<(), AllocError> { + #[cfg(not(feature = "vec_try_reserve"))] + vec.reserve_exact(required); + + #[cfg(feature = "vec_try_reserve")] + vec.try_reserve_exact(required)?; + Ok(()) +} + +#[cfg(feature = "alloc")] +impl ExtendFromReader for Vec { + // We could return OOM, but there is no `try_alloc`, so we have to panic. + // That means `Vec` can never fail. + type ExtendError = AllocError; + + fn extend_from_reader( + &mut self, + reader: &mut R, + ) -> Result> { + if self.len() == self.capacity() { + try_reserve(&mut *self, 1024).map_err(ExtendError::ExtendErr)?; + } + + buffer::with_vec_as_out_buf(&mut *self, |out_buf| { + reader.read(out_buf) + }).map_err(ExtendError::ReadErr) + } +} + +impl Write for Vec { + type WriteError = Infallible; + type FlushError = Infallible; + + #[inline] + fn write(&mut self, buf: &[u8]) -> Result { + self.extend_from_slice(buf); + Ok(buf.len()) + } + + #[inline] + fn flush(&mut self) -> Result<(), Self::FlushError> { + Ok(()) + } + + #[inline] + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option) { + match max_bytes { + Some(max_bytes) => { + // This is a refactored way of checking (len + min) * 2 >= len + max + // The idea is that vec usually grows by factor of two to acheive amortized + // constant complexity. So we allow the vec to grow to the size (len + min) * 2 - + // len which conveniently equals to len + min*2 which also can be checked against + // max_bytes avoiding some math operations. + let max_len = min_bytes + .saturating_mul(2) + .saturating_add(self.len()); + if max_bytes <= max_len { + // We use actual maximum so we don't need spare capacity + let _ = try_reserve_exact(self, max_bytes); + } else { + // The actual maximum is higher so allow the allocator giving us more if + // beneficial. + let _ = try_reserve(self, max_len); + } + }, + None => { + let _ = try_reserve(self, min_bytes); + }, + } + } + + #[inline] + fn uses_size_hint(&self) -> bool { + true + } +} + +unsafe impl BufWrite for Vec { + #[inline] + fn request_buffer(&mut self) -> Result<&mut OutBytes, Self::WriteError> { + use core::slice; + + // Ensure there is a space for data + self.reserve(1); + // SAFETY: + // * len can never be so high `add` would overflow + // * there is `capacity` bytes of valid memory at `ptr` + // * capacity >= len + // * casting `&mut [u8]` to `&mut [MaybeUninit]` is sound as long as the latter is not + // exposed to safe code. `OutBytes` prevents exposing it. + unsafe { + let len = self.len(); + let remaining = self.capacity() - len; + let ptr = self.as_mut_ptr() + .add(len) as *mut MaybeUninit; + + Ok(slice::from_raw_parts_mut(ptr, remaining).borrow_out_slice()) + } + } + + #[inline] + unsafe fn submit_buffer(&mut self, size: usize) { + let new_len = self.len() + size; + self.set_len(new_len) + } +} + +/// Wrapper that provides buffering for a reader. +#[cfg(feature = "alloc")] +pub struct BufReaderRequire { + reader: R, + buffer: ::std::vec::Vec, + start: usize, +} + +#[cfg(feature = "alloc")] +impl BufReaderRequire { + /// Creates buffered reader. + #[inline] + pub fn new(reader: R) -> Self { + BufReaderRequire { + reader, + buffer: Vec::with_capacity(DEFAULT_BUF_SIZE), + start: 0, + } + } + + /// Unwraps inner reader. + /// + /// Any data in the internal buffer is lost. + #[inline] + pub fn into_inner(self) -> R { + self.reader + } + + /// Gets the number of bytes in the buffer. + /// + /// This is the amount of data that can be returned immediately, without reading from the + /// wrapped reader. + #[inline] + fn available(&self) -> usize { + self.len() - self.start + } + + #[inline] + fn len(&self) -> usize { + self.buffer.len() + } + + #[inline] + fn capacity(&self) -> usize { + self.buffer.capacity() + } + + #[inline] + fn erase_beginning(&mut self) { + let start = self.start; + // set to 0 first to avoid pointing out of bounds if drain panics + self.start = 0; + self.buffer.drain(..start); + } +} + +#[cfg(feature = "alloc")] +impl Read for BufReaderRequire { + type ReadError = R::ReadError; + type BufInit = buffer::init::Uninit; + + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + let n = { + let data = self.fill_buf()?; + buf.write_slice_min(data) + }; + self.consume(n); + Ok(n) + } +} + +impl BufRead for BufReaderRequire { + fn fill_buf(&mut self) -> Result<&[u8], Self::ReadError> { + if self.available() == 0 { + self.erase_beginning(); + let reader = &mut self.reader; + buffer::with_vec_as_out_buf(&mut self.buffer, |out_buf| reader.read(out_buf))?; + } + Ok(&self.buffer[self.start..]) + } + + #[inline] + fn consume(&mut self, amount: usize) { + self.start = self.start.saturating_add(amount).max(self.len()); + } +} + +impl BufReadProgress for BufReaderRequire { + type BufReadError = crate::alloc_impls::AllocError; + + fn fill_progress(&mut self) -> Result<&[u8], BufError> { + let amount = self.available() + 1; + self.require_bytes(amount) + } +} + +impl BufReadRequire for BufReaderRequire { + type BufReadError = crate::alloc_impls::AllocError; + + fn require_bytes(&mut self, amount: usize) -> Result<&[u8], BufError> { + // if there's enough bytes available, return + // if we don't have enough capacity reallocate inexact + // if tail is too short or beginning is too far, memmove the beginning + if amount <= self.available() { + return Ok(&self.buffer[self.start..]); + } + if amount > self.buffer.capacity() { + self.buffer.reserve(amount - self.len()); + } + if amount > self.capacity() - self.len() || self.start > self.capacity() / 2 || self.available() == 0 { + self.erase_beginning(); + } + while self.available() < amount { + let reader = &mut self.reader; + let result = buffer::with_vec_as_out_buf(&mut self.buffer, |out_buf| { + reader.read(out_buf) + }); + match result { + Ok(0) => return Err(BufError::End), + Ok(_) => (), + Err(error) => return Err(BufError::OtherErr(error)), + } + } + Ok(&self.buffer[self.start..]) + } +} diff --git a/src/bufio.rs b/src/bufio.rs index 29faab0..7f67164 100644 --- a/src/bufio.rs +++ b/src/bufio.rs @@ -1,11 +1,10 @@ //! Contains traits and impls for buffering. use crate::error::BufError; -use crate::Read; -use crate::Write; -use void::Void; - -const DEFAULT_BUF_SIZE: usize = 8 * 1024; +use crate::{Read, Write, OutBytes, BorrowOutSlice}; +use core::convert::Infallible; +#[cfg(feature = "alloc")] +pub use crate::alloc_impls::BufReaderRequire; /// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it to perform extra ways /// of reading. @@ -72,7 +71,7 @@ pub trait BufReadRequire: BufRead { } impl<'a> BufReadRequire for &'a [u8] { - type BufReadError = Void; + type BufReadError = Infallible; fn require_bytes( &mut self, @@ -89,15 +88,13 @@ impl<'a> BufReadRequire for &'a [u8] { /// When writing, it might be better to serialize directly into a buffer. This trait allows such /// situation. /// -/// This triat is `unsafe` because it optimizes buffers to not require zeroing. +/// This trait is `unsafe` because it optimizes buffers to not require zeroing. pub unsafe trait BufWrite: Write { /// Requests buffer for writing. - /// The buffer is represented as a pointer because it may contain uninitialized memory - only - /// writing is allowed. The pointer must not outlive Self! /// /// The returned slice must always be non-empty. If non-emty slice can't be returned, `Err` must /// be returned instead. If the underlying writer is full, it has to flush the buffer. - fn request_buffer(&mut self) -> Result<*mut [u8], Self::WriteError>; + fn request_buffer(&mut self) -> Result<&mut OutBytes, Self::WriteError>; /// Tells the buf writer that `size` bytes were written into buffer. /// @@ -106,11 +103,7 @@ pub unsafe trait BufWrite: Write { /// Writes single byte. Since this is buffered, the operation will be efficient. fn write_byte(&mut self, byte: u8) -> Result<(), Self::WriteError> { - unsafe { - (*self.request_buffer()?)[0] = byte; - self.submit_buffer(1); - } - Ok(()) + self.write(&[byte]).map(drop) } } @@ -122,162 +115,32 @@ pub unsafe trait BufWriteRequire: BufWrite { /// Require buffer with minimum `size` bytes. It is an error to return smaller buffer but /// `unsafe` code can't rely on it. - fn require_buffer(&mut self, size: usize) -> Result<*mut [u8], Self::BufWriteError>; -} - -/// Represents type that can serve as (possibly uninitialized) buffer -pub trait AsRawBuf { - /// Returns a pointer to the buffer. It may point to uninitialized data. The pointer must not - /// outlive the buffer. - fn as_raw_buf(&mut self) -> *mut [u8]; - - /// Returns the length of the buffer. - fn len(&self) -> usize; -} - -impl + AsMut<[u8]>> AsRawBuf for T { - fn as_raw_buf(&mut self) -> *mut [u8] { - self.as_mut() - } - - fn len(&self) -> usize { - self.as_ref().len() - } -} - -/// Wrapper that provides buffering for a reader. -#[cfg(feature = "std")] -pub struct BufReaderRequire { - reader: R, - buffer: ::std::vec::Vec, - start: usize, - end: usize, -} - -#[cfg(feature = "std")] -impl BufReaderRequire { - /// Creates buffered reader. - pub fn new(reader: R) -> Self { - let mut buffer = ::std::vec::Vec::new(); - buffer.resize(DEFAULT_BUF_SIZE, 0); - BufReaderRequire { - reader, - buffer, - start: 0, - end: 0, - } - } - - /// Unwraps inner reader. - /// - /// Any data in the internal buffer is lost. - pub fn into_inner(self) -> R { - self.reader - } - - /// Gets the number of bytes in the buffer. - /// - /// This is the amount of data that can be returned immediately, without reading from the - /// wrapped reader. - fn available(&self) -> usize { - self.end - self.start - } -} - -#[cfg(feature = "std")] -impl Read for BufReaderRequire { - type ReadError = R::ReadError; - - fn read(&mut self, buf: &mut [u8]) -> Result { - let n = { - let data = self.fill_buf()?; - let n = data.len().max(buf.len()); - buf[..n].copy_from_slice(&data[..n]); - n - }; - self.consume(n); - Ok(n) - } -} - -#[cfg(feature = "std")] -impl BufRead for BufReaderRequire { - fn fill_buf(&mut self) -> Result<&[u8], Self::ReadError> { - if self.available() == 0 { - self.start = 0; - self.end = self.reader.read(&mut self.buffer[..])?; - } - Ok(&self.buffer[self.start..self.end]) - } - - fn consume(&mut self, amount: usize) { - self.start = self.start.saturating_add(amount).max(self.buffer.len()); - } -} - -#[cfg(feature = "std")] -impl BufReadProgress for BufReaderRequire { - type BufReadError = Void; - - fn fill_progress(&mut self) -> Result<&[u8], BufError> { - let amount = self.available() + 1; - self.require_bytes(amount) - } -} - -#[cfg(feature = "std")] -impl BufReadRequire for BufReaderRequire { - type BufReadError = Void; - - fn require_bytes(&mut self, amount: usize) -> Result<&[u8], BufError> { - if self.available() >= amount { - return Ok(&self.buffer[self.start..self.end]); - } - if amount > self.buffer.len() { - let len = self.buffer.len(); - self.buffer.reserve(amount - len); - let new_capacity = self.buffer.capacity(); - self.buffer.resize(new_capacity, 0); - } - if amount > self.buffer.len() - self.start { - self.buffer.drain(..self.start); - self.end -= self.start; - self.start = 0; - let capacity = self.buffer.capacity(); - self.buffer.resize(capacity, 0); - } - while self.available() < amount { - match self.reader.read(&mut self.buffer[self.end..]) { - Ok(0) => return Err(BufError::End), - Ok(read_len) => self.end += read_len, - Err(error) => return Err(BufError::OtherErr(error)), - } - } - Ok(&self.buffer[self.start..self.end]) - } + fn require_buffer(&mut self, size: usize) -> Result<&mut OutBytes, Self::BufWriteError>; } /// Wrapper that provides buffering for a writer. -pub struct BufWriter { +pub struct BufWriter> { writer: W, - buffer: B, - cursor: usize, + buffer: crate::Buffer, + write_pos: usize, } -impl BufWriter { +impl> BufWriter { /// Creates buffered writer. /// /// Warning: buffer must be non-zero! Otherwise the program may panic! - pub fn new(writer: W, buffer: B) -> Self { + pub fn new(writer: W, storage: B) -> Self { + assert!(!storage.borrow_uninit_slice().is_empty()); + BufWriter { writer, - buffer, - cursor: 0, + buffer: crate::Buffer::new(storage), + write_pos: 0, } } fn flush_if_full(&mut self) -> Result<(), ::WriteError> { - if self.cursor == self.buffer.len() { + if self.buffer.as_out().is_full() { self.flush() } else { Ok(()) @@ -285,48 +148,36 @@ impl BufWriter { } } -impl Write for BufWriter { +impl> Write for BufWriter { type WriteError = W::WriteError; type FlushError = W::WriteError; fn write(&mut self, data: &[u8]) -> Result { - let buf_len = self.buffer.len(); - if self.cursor == buf_len { + if self.buffer.as_out().is_full() { self.flush()?; - if data.len() >= buf_len { + if data.len() >= self.buffer.capacity() { return self.writer.write(&data); } } - // This is correct because it only writes to the buffer - unsafe { - // Get the ref to uninitialized buffer - let buf = &mut (*self.buffer.as_raw_buf())[self.cursor..]; - - // Calculate how much bytes to copy (doesn't read uninitialized). - let to_copy = ::core::cmp::min(buf_len, data.len()); - - // Copy data. Overwrites uninitialized. - buf[0..to_copy].copy_from_slice(&data[0..to_copy]); - - // Updates cursor by exactly the amount of bytes overwritten. - self.cursor += to_copy; - - Ok(to_copy) - } + Ok(self.buffer.as_out().write_slice_min(data)) } fn flush(&mut self) -> Result<(), Self::FlushError> { - // This is correct because it gets slice to initialized data - let buf = unsafe { &mut (*self.buffer.as_raw_buf())[0..self.cursor] }; - - self.cursor = 0; - - self.writer.write_all(buf) + let mut to_flush = &self.buffer.written()[self.write_pos..]; + while !to_flush.is_empty() { + self.write_pos += self.writer.write(to_flush)?; + to_flush = &self.buffer.written()[self.write_pos..]; + } + self.write_pos = 0; + self.buffer.reset(); + Ok(()) } - fn size_hint(&mut self, bytes: usize) { - self.writer.size_hint(bytes) + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option) { + let min = min_bytes.saturating_add(self.buffer.written().len() - self.write_pos); + let max = max_bytes.map(|max| max.saturating_add(self.buffer.written().len() - self.write_pos)); + self.writer.size_hint(min, max); } fn uses_size_hint(&self) -> bool { @@ -334,25 +185,21 @@ impl Write for BufWriter { } } -unsafe impl BufWrite for BufWriter { - fn request_buffer(&mut self) -> Result<*mut [u8], Self::WriteError> { +unsafe impl> BufWrite for BufWriter { + fn request_buffer(&mut self) -> Result<&mut OutBytes, Self::WriteError> { self.flush_if_full()?; // This simply returns pointer to the uninitialized buffer - unsafe { - let slice = &mut (*self.buffer.as_raw_buf())[self.cursor..]; - assert!(slice.len() > 0); - Ok(slice) - } + Ok(self.buffer.out_bytes()) } unsafe fn submit_buffer(&mut self, size: usize) { - self.cursor += size; + self.buffer.as_out().advance_unchecked(size) } } unsafe impl<'a, W: BufWrite> BufWrite for &'a mut W { - fn request_buffer(&mut self) -> Result<*mut [u8], Self::WriteError> { + fn request_buffer(&mut self) -> Result<&mut OutBytes, Self::WriteError> { (*self).request_buffer() } @@ -362,16 +209,16 @@ unsafe impl<'a, W: BufWrite> BufWrite for &'a mut W { } unsafe impl<'a> BufWrite for &'a mut [u8] { - fn request_buffer(&mut self) -> Result<*mut [u8], Self::WriteError> { - if self.len() > 0 { - Ok(*self) - } else { + fn request_buffer(&mut self) -> Result<&mut OutBytes, Self::WriteError> { + if self.is_empty() { Err(crate::error::BufferOverflow) + } else { + Ok(self.borrow_out_slice()) } } unsafe fn submit_buffer(&mut self, size: usize) { - let tmp = ::core::mem::replace(self, &mut []); + let tmp = core::mem::replace(self, &mut []); *self = &mut tmp[size..]; } } diff --git a/src/error.rs b/src/error.rs index 8684cc9..a3daa35 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,8 +1,7 @@ //! Error types and various operations on them. -use ::core::fmt; -use void; -use void::Void; +use core::fmt; +use core::convert::Infallible; /// Specifies an error that happened during I/O operation. This enables one to compose read and /// write errors into single type. @@ -134,9 +133,9 @@ impl IntoIntrError for IntrError { } } -impl From for IntrError { - fn from(e: Void) -> Self { - void::unreachable(e) +impl From for IntrError { + fn from(e: Infallible) -> Self { + match e {} } } @@ -180,3 +179,31 @@ impl fmt::Display for BufferOverflow { write!(f, "provided buffer was too small") } } + +/// Error returned from `write_all` when writing fails. +/// +/// This error contains the information about the state of writing and the underlying cause. This +/// way writing can be restarted and the error message is more detailed. +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct WriteAllError { + /// Number of bytes that were written to the writer before it errored. + pub bytes_written: usize, + /// How many more bytes should have been written. + pub bytes_missing: usize, + /// The cause of error. + pub error: T, +} + +impl WriteAllError { + /// Discards the information about the state of writing and converts the type. + pub fn into_inner(self) -> T { + self.error + } +} + +impl fmt::Display for WriteAllError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "couldn't write {} bytes, writing failed after {} bytes", self.bytes_written + self.bytes_missing, self.bytes_written) + } +} diff --git a/src/ext.rs b/src/ext.rs index f06a1ae..9b74c44 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -1,7 +1,7 @@ //! This module contains various extension traits. use crate::error::ReadExactError; -use crate::Read; +use crate::{Read, OutBuf}; /// Result of successful read operation. pub enum ReadResult<'a> { @@ -56,20 +56,22 @@ impl<'a> ReadResult<'a> { /// } pub trait ReadExt: Read { /// Reads from the reader and converts the result. - fn read_ext<'a, 'b>(&'a mut self, buf: &'b mut [u8]) - -> Result, Self::ReadError>; + fn read_ext<'a>(&mut self, buf: OutBuf<'a, Self::BufInit>) + -> Result, Self::ReadError>; } impl ReadExt for R { - fn read_ext<'a, 'b>( - &'a mut self, - buf: &'b mut [u8], - ) -> Result, Self::ReadError> { - let len = self.read(buf)?; - if len > 0 { - Ok(ReadResult::Bytes(&mut buf[..len])) - } else { + fn read_ext<'a>( + &mut self, + buf: OutBuf<'a, Self::BufInit>, + ) -> Result, Self::ReadError> { + let (slice, res) = buf.scoped(|buf| self.read(buf)); + let amount = res?; + debug_assert_eq!(amount, slice.len()); + if slice.is_empty() { Ok(ReadResult::End) + } else { + Ok(ReadResult::Bytes(slice)) } } } diff --git a/src/lib.rs b/src/lib.rs index 233e599..6e7139a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,11 +13,14 @@ #[cfg(feature = "std")] extern crate std; +#[cfg(feature = "alloc")] +extern crate alloc; + #[cfg(feature = "byteorder")] extern crate byteorder; -extern crate void; - +#[cfg(feature = "alloc")] +mod alloc_impls; #[cfg(feature = "std")] pub mod std_impls; @@ -26,200 +29,21 @@ pub mod error; pub mod ext; pub mod util; -use core::ops::{Deref, DerefMut}; -use core::ptr::NonNull; -use core::mem::MaybeUninit; +pub extern crate uninit_buffer as buffer; +/// Re-exported for convenience. +pub use buffer::{Buffer, OutBuf, OutBytes}; + +use buffer::possibly_uninit::slice::BorrowOutSlice; use crate::{ error::{ExtendError, ReadExactError}, util::Chain, - void::Void, }; -/// Abbreviation for OutSlice -pub type OutBytes = possibly_uninit::slice::OutSlice; - #[cfg(feature = "byteorder")] use byteorder::ByteOrder; -/// Buffer of bytes used in readers -pub struct ReadBuffer<'a> { - buffer: &'a mut OutBytes, - // modifying this field is unsafe - // invariant: self.position <= self.buffer.len() - position: usize, -} - -impl<'a> ReadBuffer<'a> { - /// Creates a new buffer using provided storage and position starting at 0 - #[inline] - pub fn new>(buffer: T) -> Self { - ReadBuffer { - buffer: buffer.into(), - position: 0, - } - } - - /// Writes a single byte into the buffer and advances the position by one - /// - /// ## Panics - /// - /// This method panicks if the buffer is full. - #[inline] - //#[track_caller] ? - pub fn write_byte(&mut self, byte: u8) { - if self.position >= self.buffer.len() { - panic!("Attempt to write into a full buffer"); - } - self.buffer.at_mut(self.position).write(byte); - self.position += 1; - } - - /// Writes a byte slice into the buffer and advance the position by slice length - /// - /// ## Panics - /// - /// This method panicks if the length of the slice is greater than what buffer can hold. - #[inline] - //#[track_caller] ? - pub fn write_slice(&mut self, bytes: &[u8]) { - self.buffer.copy_from_slice(bytes); - self.position += bytes.len(); - } - - /// Writes as many bytes from slice as fit into the buffer. - /// - /// This method is similar to `write_slice` but it truncates the slice being written - /// instead of panicking. - #[inline] - pub fn write_slice_min(&mut self, bytes: &[u8]) { - let to_write = self.remaining().min(bytes.len()); - self.buffer.copy_from_slice(&bytes[0..to_write]); - self.position += to_write; - } - - /// Returns how many bytes can be written into the buffer - pub fn remaining(&self) -> usize { - self.buffer.len() - self.position - } - - /// Returns true if no more bytes can be written to the buffer - pub fn is_full(&self) -> bool { - self.remaining() == 0 - } - - /// Returns uninitialized portion of the buffer. - /// - /// Readers need to store the bytes into this slice. - /// This method should be only used in low-level Read implementations. - /// If you already have a slice or a byte see `write_slice` and `write_byte` methods. - pub fn uninit_mut(&mut self) -> &mut OutBytes { - &mut self.buffer[self.position..] - } - - /// Moves position by `amount` bytes - /// - /// This method marks the `amount` bytes after current position as initialized. - /// - /// ## Safety - /// - /// The caller may only call this method if and only if it has written `amount` consecutive bytes to the slice - /// returned by `uninit_mut()` starting at position 0. - /// - /// The behavior is undefined if amount is greater than the length of the slice returned by - /// `uninit_mut()` or if the amount is greater than the number of bytes written to the slice. - /// - /// Calling this method with zero `amount` is NO-OP and always sound. - pub unsafe fn advance(&mut self, amount: usize) { - self.position += amount; - } - - /// Returns initialized part of the buffer - pub fn init(&self) -> &[u8] { - unsafe { - self.buffer[0..self.position].assume_init() - } - } - - /// Returns initialized part of the buffer - pub fn init_mut(&mut self) -> &mut [u8] { - unsafe { - self.buffer[0..self.position].assume_init_mut() - } - } - - /// Splits the buffer between initialized and uninitialized part and returns them - pub fn split_at_pos_mut(&mut self) -> (&mut [u8], &mut OutBytes) { - unsafe { - let (init, uninit) = self.buffer.split_at_mut(self.position); - (init.assume_init_mut(), uninit) - } - } -} - -pub trait ByteSliceExt: Deref + DerefMut where for<'a> &'a mut ::Target: Into<&'a OutBytes> { - fn as_read_buffer(&mut self) -> ReadBuffer<'_> { - ReadBuffer::new(&mut **self) - } -} - -impl ByteSliceExt for &'_ mut [u8] {} -impl ByteSliceExt for &'_ mut [MaybeUninit] {} - -pub unsafe trait BufferLike { - fn whole_as_out_bytes(&mut self) -> &mut OutBytes; - fn position(&mut self) -> usize; - unsafe fn set_position(&mut self, position: usize); -} - -pub trait BufferLikeExt: BufferLike { - fn uninit_as_read_buffer(&mut self) -> BufferUpdater { - let position = self.position(); - let storage = NonNull::from(self); - let buffer = self.whole_as_out_bytes(); - - BufferUpdater { - storage, - buffer: ReadBuffer { - buffer, - position, - } - } - } -} - -/// Ensures position of the underlying storage is updated on drop -pub struct BufferUpdater<'a, T: BufferLike> { - storage: NonNull, - buffer: ReadBuffer<'a>, -} - -impl<'a, T: BufferLike> Deref for BufferUpdater<'a, T> { - type Target = ReadBuffer<'a>; - - fn deref(&self) -> &Self::Target { - &self.buffer - } -} - -impl<'a, T: BufferLike> DerefMut for BufferUpdater<'a, T> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.buffer - } -} - -impl<'a, T: BufferLike> Drop for BufferUpdater<'a, T> { - fn drop(&mut self) { - unsafe { - // This is the last time buffer is accessed - let position = self.buffer.position; - // This is sound according to stacked borrows because self.buffer is no longr accessed - // position <= len is enforced by ReadBuffer - // initializedness is enforced by ReadBuffer - self.storage.as_mut().set_position(position); - } - } -} +pub use buffer::{init, BufInit}; /// The Read trait allows for reading bytes from a source. /// @@ -238,9 +62,19 @@ impl<'a, T: BufferLike> Drop for BufferUpdater<'a, T> { pub trait Read { /// Value of this type is returned when `read()` fails. /// - /// It's highly recommended to use `Void` from `void` crate if `read()` can never fail. + /// It's highly recommended to use [`core::convert::Infallible`] if `read()` can never fail. type ReadError; + /// Marker for buffers this reader accepts. + /// + /// This should be almost always [`buffer::init::Uninit`]. + /// Exceptions: + /// + /// * Bridges to old APIs such as `std::io::Read` need to use [`buffer::init::Init`] + /// * [`TrackBuffer`] has to use [`buffer::init::Dynamic`] to track initializedness and unify + /// the types. + type BufInit: buffer::BufInit; + /// Pull some bytes from this source into the specified buffer, returning how many bytes were /// read. /// @@ -258,28 +92,20 @@ pub trait Read { /// /// 2. The buffer specified was 0 bytes in length. /// - /// No guarantees are provided about the contents of buf when this function is called, - /// implementations cannot rely on any property of the contents of buf being true. It is - /// recommended that implementations only write data to buf instead of reading its contents. - /// /// # Errors /// /// If this function encounters any form of I/O or other error, an error /// variant will be returned. If an error is returned then it must be /// guaranteed that no bytes were read. - fn read(&mut self, buf: &mut ReadBuffer<'_>) -> Result; + fn read(&mut self, buf: OutBuf<'_, Self::BufInit>) -> Result; /// Read the exact number of bytes required to fill `buf`. /// /// This function reads as many bytes as necessary to completely fill the specified buffer `buf`. - /// - /// No guarantees are provided about the contents of `buf` when this function is called, - /// implementations cannot rely on any property of the contents of `buf` being true. It is - /// recommended that implementations only write data to `buf` instead of reading its contents. - fn read_exact(&mut self, buf: &mut ReadBuffer<'_>) -> Result<(), ReadExactError> { + fn read_exact(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result<(), ReadExactError> { if self.available_bytes(buf.remaining()) { while !buf.is_full() { - let read_bytes = self.read(buf)?; + let read_bytes = self.read(buf.reborrow())?; if read_bytes == 0 { return Err(ReadExactError::UnexpectedEnd); } @@ -292,15 +118,31 @@ pub trait Read { /// Hints whether there are at least `at_least` bytes available. /// - /// This function should return true if it can't determine exact amount. That is also the default. + /// This function should return true if it can't determine the exact amount. + /// That is also the default. + /// The method should be cheap to call. It's mainly intended for buffered readers. /// /// # Errors /// /// It is an error to return false even if there are more bytes available. + #[inline] fn available_bytes(&self, _at_least: usize) -> bool { true } + /// An expensive way to get the number of available bytes. + /// + /// The method returns `Some(num_available_bytes)` if it succeeded in finding out how many + /// bytes are available, `None` otherwise. This computation can be as expensive as a `read` + /// call. (If `read` involves syscall, this is allowed to perform syscall too.) + /// + /// This can be used in optimizations to allocate the whole required buffer upfront. + /// If the number of bytes is larger than `usize::MAX` this should return `usize::MAX`. + #[inline] + fn retrieve_available_bytes(&self) -> Option { + None + } + /// Chains another reader after `self`. When self ends (returns Ok(0)), the other reader will /// provide bytes to read. fn chain(self, other: R) -> Chain @@ -332,14 +174,18 @@ pub trait Read { } } - /* - fn map_read_err E>(self, f: F) -> MapErr where Self: Sized { - MapErr { + /// Creates a reader that converts all its errors to some other type. + /// + /// This is useful mainly when you have multiple readers of different error type and you need to + /// unify their error types e.g. to store them in a collection of trait objects. You can use a + /// conversion function that converts the errors to a common type so that all the error types + /// become the same. + fn map_read_err E>(self, f: F) -> MapReadErr where Self: Sized { + MapReadErr { reader: self, map_fn: f, } } - */ /// Creates a "by reference" adaptor for this instance of `Read`. /// @@ -354,69 +200,82 @@ pub trait Read { /// Reads an unsigned 16 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_u16(&mut self) -> Result> { - let mut buf = [0; 2]; - self.read_exact(&mut buf)?; - Ok(BO::read_u16(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 2], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_u16(buf.written())) } /// Reads an unsigned 32 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_u32(&mut self) -> Result> { - let mut buf = [0; 4]; - self.read_exact(&mut buf)?; - Ok(BO::read_u32(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 4], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_u32(buf.written())) } /// Reads an unsigned 64 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_u64(&mut self) -> Result> { - let mut buf = [0; 8]; - self.read_exact(&mut buf)?; - Ok(BO::read_u64(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 8], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_u64(buf.written())) } /// Reads an signed 16 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_i16(&mut self) -> Result> { - let mut buf = [0; 2]; - self.read_exact(&mut buf)?; - Ok(BO::read_i16(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 2], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_i16(buf.written())) } /// Reads an signed 32 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_i32(&mut self) -> Result> { - let mut buf = [0; 4]; - self.read_exact(&mut buf)?; - Ok(BO::read_i32(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 4], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_i32(buf.written())) } /// Reads an signed 64 bit integer from the underlying reader. #[cfg(feature = "byteorder")] fn read_i64(&mut self) -> Result> { - let mut buf = [0; 8]; - self.read_exact(&mut buf)?; - Ok(BO::read_i64(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 8], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_i64(buf.written())) } /// Reads a IEEE754 single-precision (4 bytes) floating point number from the underlying /// reader. #[cfg(feature = "byteorder")] fn read_f32(&mut self) -> Result> { - let mut buf = [0; 4]; - self.read_exact(&mut buf)?; - Ok(BO::read_f32(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 4], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_f32(buf.written())) } /// Reads a IEEE754 double-precision (8 bytes) floating point number from the underlying /// reader. #[cfg(feature = "byteorder")] fn read_f64(&mut self) -> Result> { - let mut buf = [0; 8]; - self.read_exact(&mut buf)?; - Ok(BO::read_f64(&buf)) + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 8], Self::BufInit>(); + self.read_exact(buf.as_out())?; + Ok(BO::read_f64(buf.written())) + } +} + +/* +pub struct TrackBuffer(R); + +impl Read for TrackBuffer { + type Error = R::Error; + type BufInit = buffer::init::Dynamic; + + fn read(&mut self, buf: &mut Buffer<'_, Self::BufInit>) -> Result { + self.0.read(buf.maybe_zeroed()) } } +*/ /// Some types can be extended by reading from reader. The most well-known is probably `Vec`. It /// is possible to implement it manually, but it may be more efficient if the type implements this @@ -433,6 +292,11 @@ pub trait ExtendFromReader { reader: &mut R, ) -> Result>; + /// Extends `self` with the contents of whole reader. + /// + /// This calls `extend_from_reader` until it returns 0. + /// The method returns the number of bytes read. + /// If the amount exceeds `usize::MAX` the return value is `usize::MAX`. fn extend_from_reader_to_end( &mut self, reader: &mut R, @@ -444,7 +308,7 @@ pub trait ExtendFromReader { if bytes == 0 { return Ok(total_bytes); } - total_bytes += bytes; + total_bytes = total_bytes.saturating_add(bytes); } } } @@ -505,9 +369,16 @@ pub trait Write { fn flush(&mut self) -> Result<(), Self::FlushError>; /// Attempts to write an entire buffer into this `Write`. - fn write_all(&mut self, mut buf: &[u8]) -> Result<(), Self::WriteError> { + fn write_all(&mut self, mut buf: &[u8]) -> Result<(), error::WriteAllError> { + let total_len = buf.len(); while !buf.is_empty() { - let len = self.write(buf)?; + let len = self + .write(buf) + .map_err(|error| error::WriteAllError { + bytes_written: total_len - buf.len(), + bytes_missing: buf.len(), + error, + })?; buf = &buf[len..]; } Ok(()) @@ -526,7 +397,7 @@ pub trait Write { /// The function is mandatory, as a lint to incentivize implementors to implement it, if /// applicable. Note that if you implement this function, you must also implement /// `uses_size_hint`. - fn size_hint(&mut self, bytes: usize); + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option); /// Reports to the caller whether size hint is actually used. This can prevent costly /// computation of size hint that would be thrown away. @@ -627,8 +498,9 @@ pub trait Write { impl<'a, R: Read + ?Sized> Read for &'a mut R { type ReadError = R::ReadError; + type BufInit = R::BufInit; - fn read(&mut self, buf: &mut ReadBuffer<'_>) -> Result { + fn read(&mut self, buf: OutBuf<'_, Self::BufInit>) -> Result { (*self).read(buf) } } @@ -645,8 +517,8 @@ impl<'a, W: Write + ?Sized> Write for &'a mut W { (*self).flush() } - fn size_hint(&mut self, bytes: usize) { - (*self).size_hint(bytes) + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option) { + (*self).size_hint(min_bytes, max_bytes) } /// Reports to the caller whether size hint is actually used. This can prevent costly @@ -657,28 +529,17 @@ impl<'a, W: Write + ?Sized> Write for &'a mut W { } impl<'a> Read for &'a [u8] { - type ReadError = Void; - - fn read(&mut self, buf: &mut ReadBuffer<'_>) -> Result { - use core::cmp::min; + type ReadError = core::convert::Infallible; + type BufInit = init::Uninit; + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { if self.is_empty() { return Ok(0); } - let amt = min(buf.remaining(), self.len()); - let (a, b) = self.split_at(amt); + let amt = buf.write_slice_min(self); - // First check if the amount of bytes we want to read is small: - // `copy_from_slice` will generally expand to a call to `memcpy`, and - // for a single byte the overhead is significant. - if amt == 1 { - buf.write_byte(a[0]) - } else { - buf.write_slice(a); - } - - *self = b; + *self = &self[amt..]; Ok(amt) } @@ -689,7 +550,7 @@ impl<'a> Read for &'a [u8] { impl<'a> Write for &'a mut [u8] { type WriteError = error::BufferOverflow; - type FlushError = Void; + type FlushError = core::convert::Infallible; fn write(&mut self, buf: &[u8]) -> Result { if buf.len() <= self.len() { @@ -706,5 +567,24 @@ impl<'a> Write for &'a mut [u8] { Ok(()) } - fn size_hint(&mut self, _bytes: usize) {} + fn size_hint(&mut self, _min_bytes: usize, _max_bytes: Option) {} +} + +/// Reader that maps all its errors using the provided function. +/// +/// This is returned from [`Read::map_read_err`] method. Check its documentation for more details. +pub struct MapReadErr { + reader: R, + map_fn: F, +} + +impl Read for MapReadErr where R: Read, F: FnMut(R::ReadError) -> E { + type ReadError = E; + type BufInit = R::BufInit; + + fn read(&mut self, buf: OutBuf<'_, Self::BufInit>) -> Result { + self.reader.read(buf).map_err(&mut self.map_fn) + } + + // TODO: forward other methods } diff --git a/src/std_impls.rs b/src/std_impls.rs index 5c518bf..56fb957 100644 --- a/src/std_impls.rs +++ b/src/std_impls.rs @@ -1,87 +1,17 @@ //! This module contains glue `for std::io` and other `std` types. -use crate::bufio::BufWrite; -use crate::error::ExtendError; -use crate::ExtendFromReader; use crate::Read; use crate::Write; +use crate::OutBuf; use std::io; use std::io::{Empty, Sink}; -use std::vec::Vec; -use void::Void; - -impl Write for Vec { - type WriteError = Void; - type FlushError = Void; - - fn write(&mut self, buf: &[u8]) -> Result { - self.extend_from_slice(buf); - Ok(buf.len()) - } - - fn flush(&mut self) -> Result<(), Self::FlushError> { - Ok(()) - } - - fn size_hint(&mut self, bytes: usize) { - self.reserve(bytes) - } - - fn uses_size_hint(&self) -> bool { - true - } -} - -unsafe impl BufWrite for Vec { - fn request_buffer(&mut self) -> Result<*mut [u8], Self::WriteError> { - use std::slice; - - // Ensure there is a space for data - self.reserve(1); - unsafe { - Ok(&mut slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity())[self.len()..]) - } - } - - unsafe fn submit_buffer(&mut self, size: usize) { - let new_len = self.len() + size; - self.set_len(new_len) - } -} - -unsafe impl BufferLike for Vec { - fn whole_as_out_bytes(&mut self) -> &mut OutBytes { - possibly_uninit::slice::from_raw_parts(self.as_mut_ptr(), self.capacity()) - } - - fn position(&mut self) -> usize { - self.len() - } - - unsafe fn set_position(&mut self, position: usize) { - self.set_len(position); - } -} - -impl ExtendFromReader for Vec { - // We could return OOM, but there is no `try_alloc`, so we have to panic. - // That means `Vec` can never fail. - type ExtendError = Void; - - fn extend_from_reader( - &mut self, - reader: &mut R, - ) -> Result> { - // Prepare space - self.reserve(1024); - reader.read(self.uninit_as_read_buffer()).map_err(ExtendError::ReadErr) - } -} +use core::convert::Infallible; +use buffer::Buffer; // Same as our Sink. impl Write for Sink { - type WriteError = Void; - type FlushError = Void; + type WriteError = Infallible; + type FlushError = Infallible; fn write(&mut self, buf: &[u8]) -> Result { Ok(buf.len()) @@ -91,14 +21,15 @@ impl Write for Sink { Ok(()) } - fn size_hint(&mut self, _bytes: usize) {} + fn size_hint(&mut self, _min_bytes: usize, _max_bytes: Option) {} } // Same as our Empty. impl Read for Empty { - type ReadError = Void; + type ReadError = Infallible; + type BufInit = buffer::init::Uninit; - fn read(&mut self, _buf: &mut [u8]) -> Result { + fn read(&mut self, _buf: OutBuf<'_, Self::BufInit>) -> Result { Ok(0) } } @@ -119,15 +50,9 @@ impl StdRead { } impl, R: Read> io::Read for StdRead { - fn read(&mut self, buf: &ReadBuffer<'_>) -> Result { - // Safety: we initialize whole buffer - // and we also check whether returned amount is not greater than remaining() - unsafe { - let amount = self.0.read(buf.uninit().write_zeroes())?; - assert!(amount <= buf.uninit().remaining()); - self.advance(amount); - Ok(amount) - } + fn read(&mut self, buf: &mut [u8]) -> Result { + let mut buffer: Buffer<&mut [u8], R::BufInit> = Buffer::new_from_init(buf); + self.0.read(buffer.as_out()).map_err(Into::into) } } @@ -181,14 +106,8 @@ impl< impl, T: Read> io::Read for StdIo { fn read(&mut self, buf: &mut [u8]) -> Result { - // Safety: we initialize whole buffer - // and we also check whether returned amount is not greater than remaining() - unsafe { - let amount = self.0.read(buf.uninit().write_zeroes())?; - assert!(amount <= buf.uninit().remaining()); - self.advance(amount); - Ok(amount) - } + let mut buffer: Buffer<&mut [u8], T::BufInit> = Buffer::new_from_init(buf); + self.0.read(buffer.as_out()).map_err(Into::into) } } @@ -221,9 +140,12 @@ impl GenioRead { impl Read for GenioRead { type ReadError = io::Error; + type BufInit = buffer::init::Init; - fn read(&mut self, buf: &mut [u8]) -> Result { - self.0.read(&mut buf.as_read_buffer()) + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + let amount = self.0.read(&mut buf.bytes_mut())?; + buf.advance(amount); + Ok(amount) } } @@ -254,7 +176,7 @@ impl Write for GenioWrite { self.0.flush() } - fn size_hint(&mut self, _bytes: usize) {} + fn size_hint(&mut self, _min_bytes: usize, _max_bytes: Option) {} } /// Wrapper providing `genio::Read + genio::Write` traits for `std::io::Read + std::io::Write` types. @@ -274,9 +196,12 @@ impl GenioIo { impl Read for GenioIo { type ReadError = io::Error; + type BufInit = buffer::init::Init; - fn read(&mut self, buf: &mut [u8]) -> Result { - self.0.read(&mut buf.as_read_buffer()) + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + let amount = self.0.read(&mut buf.bytes_mut())?; + buf.advance(amount); + Ok(amount) } } @@ -292,5 +217,5 @@ impl Write for GenioIo { self.0.flush() } - fn size_hint(&mut self, _bytes: usize) {} + fn size_hint(&mut self, _min_bytes: usize, _max_bytes: Option) {} } diff --git a/src/util/bytes.rs b/src/util/bytes.rs index 76d01fa..936e76d 100644 --- a/src/util/bytes.rs +++ b/src/util/bytes.rs @@ -1,4 +1,5 @@ use crate::Read; +use core::mem::MaybeUninit; /// Represents reader as iterator over bytes. /// @@ -18,11 +19,11 @@ impl Iterator for Bytes { type Item = Result; fn next(&mut self) -> Option { - let mut buf = [0]; - match self.reader.read(&mut buf) { - Ok(0) => None, - Ok(_) => Some(Ok(buf[0])), - Err(e) => Some(Err(e)), + let mut buf = buffer::new_maybe_init::<[MaybeUninit; 1], R::BufInit>(); + let result = self.reader.read(buf.as_out()); + match (result, buf.written().first()) { + (Err(e), _) => Some(Err(e)), + (Ok(_), byte) => byte.copied().map(Ok), } } } diff --git a/src/util/chain.rs b/src/util/chain.rs index 18bfc3d..80abe83 100644 --- a/src/util/chain.rs +++ b/src/util/chain.rs @@ -1,5 +1,6 @@ use crate::error::ChainError; -use crate::Read; +use crate::{Read, OutBuf}; +use buffer::Combine; /// Chains two readers. /// @@ -22,14 +23,15 @@ impl Chain { } } -impl Read for Chain { +impl Read for Chain where F::BufInit: Combine { type ReadError = ChainError; + type BufInit = >::Combined; - fn read(&mut self, buf: &mut [u8]) -> Result { + fn read(&mut self, buf: OutBuf<'_, Self::BufInit>) -> Result { if self.first_finished { - self.second.read(buf).map_err(ChainError::Second) + self.second.read(buf.uncombine_right::()).map_err(ChainError::Second) } else { - self.first.read(buf).map_err(ChainError::First).map(|l| { + self.first.read(buf.uncombine_left::()).map_err(ChainError::First).map(|l| { if l == 0 { self.first_finished = true; } diff --git a/src/util/empty.rs b/src/util/empty.rs index 22454c7..b3ae40e 100644 --- a/src/util/empty.rs +++ b/src/util/empty.rs @@ -1,13 +1,23 @@ -use crate::Read; -use void::Void; +use crate::{Read, OutBuf}; /// This reader is empty - always returns 0 from read method. pub struct Empty; impl Read for Empty { - type ReadError = Void; + type ReadError = core::convert::Infallible; + type BufInit = buffer::init::Uninit; - fn read(&mut self, _buf: &mut [u8]) -> Result { + fn read(&mut self, _buf: OutBuf<'_, Self::BufInit>) -> Result { Ok(0) } + + #[inline] + fn available_bytes(&self, at_least: usize) -> bool { + at_least == 0 + } + + #[inline] + fn retrieve_available_bytes(&self) -> Option { + Some(0) + } } diff --git a/src/util/mod.rs b/src/util/mod.rs index 5f5ca27..4aa3a48 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -22,6 +22,7 @@ const DEFAULT_BUF_SIZE: usize = 8 * 1024; use crate::error::IOError; use crate::{Read, Write}; +use core::mem::MaybeUninit; /// Copies the entire contents of a reader into a writer. /// @@ -41,12 +42,13 @@ pub fn copy( ) -> Result> { use crate::ext::{ReadExt, ReadResult}; - let mut buf = [0; DEFAULT_BUF_SIZE]; + let mut buf = buffer::new_maybe_init::<[MaybeUninit; DEFAULT_BUF_SIZE], R::BufInit>(); let mut written = 0; - while let ReadResult::Bytes(b) = reader.read_ext(&mut buf).map_err(IOError::Read)? { - writer.write_all(b).map_err(IOError::Write)?; + while let ReadResult::Bytes(b) = reader.read_ext(buf.as_out()).map_err(IOError::Read)? { + writer.write_all(b).map_err(|error| IOError::Write(error.into_inner()))?; written += b.len() as u64; + buf.reset(); } Ok(written) } diff --git a/src/util/repeat.rs b/src/util/repeat.rs index 120a6df..32177db 100644 --- a/src/util/repeat.rs +++ b/src/util/repeat.rs @@ -1,7 +1,6 @@ -use crate::Read; -use void::Void; +use crate::{Read, OutBuf}; -/// Reader that infinitely repeats single byte. +/// Reader that infinitely repeats a single byte. /// /// It will never fail and never return 0. pub struct Repeat { @@ -9,14 +8,19 @@ pub struct Repeat { } impl Read for Repeat { - type ReadError = Void; + type ReadError = core::convert::Infallible; + type BufInit = buffer::init::Uninit; - fn read(&mut self, buf: &mut [u8]) -> Result { - // TODO: use memset? - let len = buf.len(); - for b in buf { - *b = self.byte; + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + let len = buf.remaining(); + while !buf.is_full() { + buf.write_byte(self.byte); } Ok(len) } + + #[inline] + fn retrieve_available_bytes(&self) -> Option { + Some(usize::MAX) + } } diff --git a/src/util/repeat_bytes.rs b/src/util/repeat_bytes.rs index 52a41fc..6ec63a8 100644 --- a/src/util/repeat_bytes.rs +++ b/src/util/repeat_bytes.rs @@ -1,5 +1,4 @@ -use crate::Read; -use void::Void; +use crate::{Read, OutBuf}; /// Reader repeating a sequence of bytes infinitely. pub struct RepeatBytes { @@ -8,33 +7,26 @@ pub struct RepeatBytes { } impl> Read for RepeatBytes { - type ReadError = Void; + type ReadError = core::convert::Infallible; + type BufInit = buffer::init::Uninit; - fn read(&mut self, buf: &mut [u8]) -> Result { - use core::cmp::min; - let len = buf.len(); + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + let len = buf.remaining(); let bytes = self.bytes.as_ref(); - let amt = min(buf.len(), bytes.len() - self.offset); - buf[..amt].copy_from_slice(&bytes[self.offset..(self.offset)]); - self.offset += amt; - if self.offset == bytes.len() { - self.offset = 0 - } - - let buf = { - let tmp = &mut buf[amt..]; - tmp - }; - - for chunk in buf.chunks_mut(bytes.len()) { - if chunk.len() != bytes.len() { - chunk.copy_from_slice(&bytes[..bytes.len()]); - self.offset = chunk.len(); - } else { - chunk.copy_from_slice(bytes); + while !buf.is_full() { + let copied = buf.write_slice_min(&bytes[self.offset..]); + self.offset += copied; + if self.offset == bytes.len() { + self.offset = 0 } } + Ok(len) } + + #[inline] + fn retrieve_available_bytes(&self) -> Option { + Some(usize::MAX) + } } diff --git a/src/util/restarting.rs b/src/util/restarting.rs index 9fa723c..71f9b1d 100644 --- a/src/util/restarting.rs +++ b/src/util/restarting.rs @@ -1,6 +1,5 @@ use crate::error::{IntoIntrError, IntrError}; -use crate::Read; -use crate::Write; +use crate::{Read, Write, OutBuf}; /// Restarts all interrupted operations. /// @@ -40,9 +39,10 @@ where R::ReadError: IntoIntrError, { type ReadError = <::ReadError as IntoIntrError>::NonIntr; + type BufInit = R::BufInit; - fn read(&mut self, buf: &mut [u8]) -> Result { - Self::restart_call(|| self.0.read(buf)) + fn read(&mut self, mut buf: OutBuf<'_, Self::BufInit>) -> Result { + Self::restart_call(|| self.0.read(buf.reborrow())) } } @@ -63,8 +63,8 @@ where Self::restart_call(|| self.0.flush()) } - fn size_hint(&mut self, bytes: usize) { - self.0.size_hint(bytes); + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option) { + self.0.size_hint(min_bytes, max_bytes); } fn uses_size_hint(&self) -> bool { diff --git a/src/util/sink.rs b/src/util/sink.rs index 8232b36..cfcd687 100644 --- a/src/util/sink.rs +++ b/src/util/sink.rs @@ -1,12 +1,11 @@ use crate::Write; -use void::Void; /// Silently drops everything that is written to it. pub struct Sink; impl Write for Sink { - type WriteError = Void; - type FlushError = Void; + type WriteError = core::convert::Infallible; + type FlushError = core::convert::Infallible; fn write(&mut self, buf: &[u8]) -> Result { Ok(buf.len()) @@ -16,5 +15,5 @@ impl Write for Sink { Ok(()) } - fn size_hint(&mut self, _bytes: usize) {} + fn size_hint(&mut self, _min_bytes: usize, _max_bytes: Option) {} } diff --git a/src/util/write_trunc.rs b/src/util/write_trunc.rs index 2b48be7..184c454 100644 --- a/src/util/write_trunc.rs +++ b/src/util/write_trunc.rs @@ -1,4 +1,5 @@ use crate::Write; +use crate::error::WriteAllError; /// Truncates writing so that at most `n` bytes in total are written into the writer. /// @@ -46,7 +47,7 @@ impl Write for WriteTrunc { } } - fn write_all(&mut self, mut buf: &[u8]) -> Result<(), Self::WriteError> { + fn write_all(&mut self, mut buf: &[u8]) -> Result<(), WriteAllError> { let len = buf.len(); if len as u64 > self.remaining { let tmp = &buf[..(self.remaining as usize)]; @@ -66,12 +67,19 @@ impl Write for WriteTrunc { self.writer.flush() } - fn size_hint(&mut self, bytes: usize) { - if bytes as u64 > self.remaining { - self.writer.size_hint(self.remaining as usize) + fn size_hint(&mut self, min_bytes: usize, max_bytes: Option) { + let (min_bytes, max_bytes) = if min_bytes as u64 > self.remaining { + (self.remaining as usize, Some(self.remaining as usize)) + } else if self.remaining < usize::MAX as u64 { + let max_bytes = max_bytes + .unwrap_or(self.remaining as usize) + .min(self.remaining as usize); + (min_bytes, Some(max_bytes)) } else { - self.writer.size_hint(bytes) - } + (min_bytes, max_bytes) + }; + + self.writer.size_hint(min_bytes, max_bytes) } fn uses_size_hint(&self) -> bool { diff --git a/uninit_buffer/Cargo.toml b/uninit_buffer/Cargo.toml new file mode 100644 index 0000000..3181c95 --- /dev/null +++ b/uninit_buffer/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "uninit_buffer" +version = "0.1.0" +authors = ["Martin Habovstiak "] +edition = "2018" + +[features] +gat_unstable = [] +const_generics = [] +std = ["alloc"] +alloc = ["possibly_uninit/alloc"] + +[dependencies] +possibly_uninit = { version = "0.1", path = "../../possibly_uninit" } diff --git a/uninit_buffer/src/buf_init.rs b/uninit_buffer/src/buf_init.rs new file mode 100644 index 0000000..e8476fd --- /dev/null +++ b/uninit_buffer/src/buf_init.rs @@ -0,0 +1,205 @@ +/// Trait representing marker types usable for tracking of initializedness. +/// +/// This trait is sealed so only three types can actually be used. +pub trait BufInit: sealed::BufInit + sealed::Combine + sealed::Combine + sealed::Combine { +} + +impl + sealed::Combine + sealed::Combine> BufInit for T {} + +/// Markers for types that can be constructed from uninitialized data. +/// +/// This trait is sealed so only two types can actually be used: `Uninit` and `Dynamic`. +pub trait FromUninit: BufInit + sealed::FromUninit {} + +impl FromUninit for T {} + +/// A type operator that produces the most optimal initializedness marker to be consumed by two +/// generic byte producers. +/// +/// This can be used in generic code working with two generic types to pick the most performant +/// combined type. The implementation produces same type for equal types and `Dynamic` for unequal +/// types. +pub trait Combine: sealed::Combine { + /// The output of the type operator. + type Combined: BufInit; +} + +impl Combine for T where T: sealed::Combine, U: BufInit { + type Combined = >::Combined; +} + +pub(crate) mod sealed { + use super::init; + + pub unsafe trait BufInit: Sized { + type WithInit: super::BufInit; + type WithDynamic: super::BufInit; + type WithUninit: super::BufInit; + + // None means whole buffer! + fn init_len(&self) -> Option; + unsafe fn set_init(&mut self, len: usize); + unsafe fn update_min(&mut self, len: usize); + fn needs_init() -> bool; + unsafe fn new_unchecked(init: usize) -> Self; + fn is_dynamic() -> bool; + } + + pub unsafe trait FromUninit: BufInit { + fn new(init: usize) -> Self; + } + + // The trait is unsafe because if any operand is Dynamic then Combined MUST be Dynamic + pub unsafe trait Combine: Sized { + type Combined: super::BufInit; + } + + unsafe impl Combine for init::Init { + type Combined = T::WithInit; + } + + unsafe impl Combine for init::Dynamic { + type Combined = T::WithDynamic; + } + + unsafe impl Combine for init::Uninit { + type Combined = T::WithUninit; + } +} + +/// Contains markers for initializedness. +pub mod init { + use super::sealed::{BufInit, FromUninit}; + + /// Marker not giving any guarantees about the initializedness of storage. + // SAFETY: this must remain zero-sized! + pub struct Uninit(()); + + /// Marker dynamically tracking initializedness of storage. + /// + /// This provides flexibility at the cost of overhead. + pub struct Dynamic(usize); + + /// Marker guaranteeing that the storage is actually initialized. + pub struct Init(()); + + impl Init { + #[inline] + pub(crate) unsafe fn new() -> Self { + Init(()) + } + } + + unsafe impl FromUninit for Uninit { + #[inline] + fn new(_init: usize) -> Self { + Uninit(()) + } + } + + unsafe impl BufInit for Uninit { + type WithInit = Dynamic; + type WithDynamic = Dynamic; + type WithUninit = Self; + + #[inline] + fn init_len(&self) -> Option { + Some(0) + } + + #[inline] + unsafe fn set_init(&mut self, _len: usize) {} + + #[inline] + unsafe fn update_min(&mut self, _len: usize) {} + + #[inline] + fn needs_init() -> bool { + false + } + + #[inline] + unsafe fn new_unchecked(_init: usize) -> Self { + Uninit(()) + } + + fn is_dynamic() -> bool { + false + } + } + + unsafe impl BufInit for Init { + type WithInit = Self; + type WithDynamic = Dynamic; + type WithUninit = Dynamic; + + #[inline] + fn init_len(&self) -> Option { + None + } + + #[inline] + unsafe fn set_init(&mut self, _len: usize) {} + + #[inline] + unsafe fn update_min(&mut self, _len: usize) {} + + #[inline] + fn needs_init() -> bool { + true + } + + #[inline] + unsafe fn new_unchecked(_init: usize) -> Self { + Init(()) + } + + fn is_dynamic() -> bool { + false + } + } + + unsafe impl FromUninit for Dynamic { + #[inline] + fn new(init: usize) -> Self { + Dynamic(init) + } + } + + unsafe impl BufInit for Dynamic { + type WithInit = Self; + type WithDynamic = Self; + type WithUninit = Self; + + #[inline] + fn init_len(&self) -> Option { + Some(self.0) + } + + #[inline] + unsafe fn set_init(&mut self, len: usize) { + self.0 = len; + } + + #[inline] + unsafe fn update_min(&mut self, len: usize) { + if len > self.0 { + self.0 = len; + } + } + + #[inline] + fn needs_init() -> bool { + false + } + + #[inline] + unsafe fn new_unchecked(init: usize) -> Self { + Dynamic(init) + } + + fn is_dynamic() -> bool { + true + } + } +} diff --git a/uninit_buffer/src/lib.rs b/uninit_buffer/src/lib.rs new file mode 100755 index 0000000..e831855 --- /dev/null +++ b/uninit_buffer/src/lib.rs @@ -0,0 +1,858 @@ +//! Primitives for working with maybe uninitialized byte buffers safely. +//! +//! This crate contains basic safe encapsulations of `unsafe` byte buffer-related APIs. +//! It was motivated by the `genio` crate but may be reused by other crates. +//! +//! The crate contains two important types: [`Buffer`] and [`OutBuf`]. +//! +//! `Buffer` represents and underlying, maybe uninitialized, storage with additional information +//! used for tracking initializedness. Various underlying storages can be used: arrays, slices, +//! boxed slices. Both initialized and uninitialized. +//! +//! `OutBuf` is a custom mutable (actually write-only) reference to `Buffer` that can be handed out +//! to sources of bytes to be filled. Such is the case of sound, performant readers. `OutBuf` +//! erases irrelevant details like the type of backing storage by design, so it can be used in +//! trait objects. It still has a type parameter signalling whether the buffer is actually +//! initialized but this should not be a huge obstacle. The initializedness can be parametrised +//! and, if needed, dynamic tracking of initializedness can be used. This is mainly useful in case +//! of mixed byte sources. +//! +//! As mentioned, initializedness is tracked in a type parameter. The parameter is restricted to +//! implement a sealed trait, so there can truly be only three types: +//! +//! * [`init::Init`] - guarantees the buffer is actually initialized. +//! * [`init::Uninit`] - no guarantee about initializedness of the buffer. +//! * [`init::Dynamic`] - initializedness is tracked dynamically as a separate `usize` value. +//! +//! This can look a bit confusing however there's a simple way to choose the parameter: +//! +//! 0. Try using `init::Uninit`. +//! 1. If the above fails because some code needs `&mut [u8]` for writing (e.g. `std::io::Read` +//! trait) use `init::Init` +//! 2. If the code that requires `&mut [u8]` is not guaranteed to be executed use `init::Dynamic`. +//! +//! In general, `init::Uninit` is the most performant but occasionally can not be used due to +//! old/imperfect APIs. + +#![no_std] +#![deny(missing_docs)] + +/// Re-export of the `possibly_uninit` crate which provides us important primitives for working +/// with uninitialized memory. +pub extern crate possibly_uninit; + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +#[cfg(feature = "alloc")] +use alloc::boxed::Box; + +mod buf_init; + +pub use buf_init::*; +use core::mem::MaybeUninit; +use possibly_uninit::slice::BorrowOutSlice; +#[cfg(feature = "alloc")] +use core::borrow::BorrowMut; + +pub use out_buf::OutBuf; +pub use buffer::Buffer; + +/// Abbreviation for OutSlice +pub type OutBytes = possibly_uninit::slice::OutSlice; + +/// Stores the position and the number of initialized bytes (if dynamic) +struct Meta { + position: usize, + init: Init, +} + +/// Implements the core of `OutBuf` type, a poor man implementation of `unsafe` fields. +/// +/// This is in a separate module to ensure all accesses that could violate invariants use `unsafe` +/// block. It also abstracts away the core operations making it easy to use custom DST behind a +/// feature flag. +mod out_buf { + use crate::{BufInit, Meta, OutBytes}; + + /// Type-erased out reference for [`Buffer`](crate::Buffer) + /// + /// This reference can be used to write into the buffer safely and passed to sources of data. + /// It is used instead of `&mut Buffer` reference to allow casting it to non-dynamic types + /// without unsoundness. + /// + /// It is also possible to use this to abstract over multiple buffers with different backing + /// storages. + pub struct OutBuf<'a, Init: BufInit> { + bytes: &'a mut OutBytes, + meta: &'a mut Meta, + } + + impl<'a, Init: BufInit> OutBuf<'a, Init> { + /// Creates `OutBuf` assuming correct inputs. + /// + /// ## Safety + /// + /// The requirements for calling this being sound are: + /// + /// * `meta.position <= bytes.len()` + /// * if `meta.init.init_len().unwrap_or(bytes.len()) > meta.position` + /// then all bytes in `[meta.position..meta.init.init_len().unwrap_or(bytes.len())]` + /// are initialized. + #[inline] + pub(crate) unsafe fn new(bytes: &'a mut OutBytes, meta: &'a mut Meta) -> Self { + debug_assert!(meta.position <= bytes.len()); + + OutBuf { + bytes, + meta, + } + } + + /// Shortens the lifetime of `OutBuf`. + /// + /// This does the same thing `&mut *x` would do on native reference. + #[inline] + pub fn reborrow(&mut self) -> OutBuf<'_, Init> { + OutBuf { + bytes: &mut self.bytes, + meta: &mut self.meta, + } + } + + #[inline] + pub(crate) fn out_bytes_ref(&self) -> &OutBytes { + self.bytes + } + + #[inline] + pub(crate) fn meta(&self) -> &Meta { + &self.meta + } + + #[inline] + pub(crate) unsafe fn into_raw_parts(self) -> (&'a mut OutBytes, &'a mut Meta) { + (self.bytes, self.meta) + } + + #[inline] + pub(crate) unsafe fn meta_mut(&mut self) -> &mut Meta { + self.reborrow().into_raw_parts().1 + } + } +} + +mod buffer { + use crate::{BufInit, Meta, init, OutBuf, OutBytes}; + use possibly_uninit::slice::BorrowOutSlice; + + /// Buffer of bytes that may be uninitialized. + pub struct Buffer, Init: BufInit> { + bytes: Bytes, + meta: Meta, + } + + impl, Init: BufInit> Buffer { + pub(crate) unsafe fn new_unchecked(bytes: Bytes, meta: Meta) -> Self { + let slice_len = bytes.borrow_uninit_slice().len(); + debug_assert!(meta.position <= slice_len); + debug_assert!(meta.init.init_len().unwrap_or(slice_len) <= slice_len); + + Buffer { + bytes, + meta, + } + } + + /// Returns the maximum number of bytes the buffer can store. + pub fn capacity(&self) -> usize { + self.bytes.borrow_uninit_slice().len() + } + + pub(crate) fn position(&self) -> usize { + self.meta.position + } + + /// Returns abstracted write-only reference to the buffer. + /// + /// This is the primary interface for writing into the buffer that is intended to be passed + /// to functions that fill the buffer with bytes. It is designed to allow casting it to + /// uninit version soundly. + /// + /// It also prevents the consumers from touching written portion of the buffer. + #[inline] + pub fn as_out(&mut self) -> OutBuf<'_, Init> { + // SAFETY: our type enforces the safety requirements of this operation + unsafe { + OutBuf::new(self.bytes.borrow_out_slice(), &mut self.meta) + } + } + + /// Returns the slice of uninitialized bytes. + pub fn out_bytes(&mut self) -> &mut OutBytes { + &mut self.bytes.borrow_out_slice()[self.meta.position..] + } + + /// Returns the filled part of the buffer. + #[inline] + pub fn written(&self) -> &[u8] { + // SAFETY: the invariant of this type is that all bytes up to position are initialized + unsafe { + let written_uninit = &self.bytes.borrow_uninit_slice()[..self.meta.position]; + core::slice::from_raw_parts(written_uninit.as_ptr() as *const u8, written_uninit.len()) + } + } + + /// Returns the filled part of the buffer as mutable slice. + #[inline] + pub fn written_mut(&mut self) -> &mut [u8] { + // SAFETY: the invariant of this type is that all bytes up to position are initialized + unsafe { + self.bytes.borrow_out_slice()[..self.meta.position].assume_init_mut() + } + } + + /// Decomposes the buffer into inner storage and position. + #[inline] + pub fn into_parts(self) -> (Bytes, usize) { + (self.bytes, self.meta.position) + } + + /// Sets the position to the beginning (0). + #[inline] + pub fn reset(&mut self) { + unsafe { + self.meta.init.update_min(self.meta.position); + self.meta.position = 0; + } + } + + /// Rolls back the position by `count` bytes. + #[inline] + pub fn rewind(&mut self, count: usize) { + if count > self.meta.position { + panic!("attempt to update past beginning"); + } + + unsafe { + self.meta.init.update_min(self.meta.position); + self.meta.position -= count; + } + } + + /// Zeroes the buffer **if required** and converts it into initialized buffer. + pub fn into_init(mut self) -> Buffer { + unsafe { + self.as_out().perform_zeroing(); + let meta = Meta { + position: self.position(), + init: init::Init::new(), + }; + + // SAFETY: we've zeroed the buffer above as needed + Buffer::new_unchecked(self.bytes, meta) + } + } + + /// Creates a buffer that tracks its initializedness. + pub fn into_dynamic(self) -> Buffer { + use crate::sealed::FromUninit; + unsafe { + let init_len = self.meta.init.init_len().unwrap_or(self.bytes.borrow_uninit_slice().len()); + let meta = Meta { + position: self.position(), + init: init::Dynamic::new(init_len), + }; + + Buffer::new_unchecked(self.bytes, meta) + } + } + } + + impl<'a, Bytes: BorrowOutSlice + ?Sized, Init: BufInit> Buffer<&'a mut Bytes, Init> { + /// Converts the initialized part of the buffer into primitive slice. + /// + /// This preserves the lifetime of the underlying reference so it can be used e.g. when + /// returning from a function. + pub fn into_init_slice(self) -> &'a mut [u8] { + unsafe { + // SAFETY: we track that the buffer is initialized up to `position` + self.bytes.borrow_out_slice()[..self.meta.position].assume_init_mut() + } + } + } +} + +impl<'a, Init: BufInit> OutBuf<'a, Init> { + /// Returns whole `bytes`, not just subslice. + #[inline] + pub(crate) fn out_bytes_whole(&mut self) -> &mut OutBytes { + unsafe { + self.reborrow().into_raw_parts().0 + } + } + + /// Returns the remaining buffer that can be written into. + /// + /// Readers need to store the bytes into this slice. + /// This method should be only used in low-level Read implementations. + /// If you already have a slice or a byte see `write_slice` and `write_byte` methods. + #[inline] + pub fn out_bytes(&mut self) -> &mut OutBytes { + let position = self.meta().position; + &mut self.out_bytes_whole()[position..] + } + + + /// Marks `amount` of bytes having been written to the buffer. + /// + /// ## Safety + /// + /// The requirements for calling this being sound are: + /// + /// * `amount <= self.remaining()` + /// * `amount` of consecutive bytes were written to `self.out-bytes` starting from 0th byte. + #[inline] + pub unsafe fn advance_unchecked(&mut self, amount: usize) { + debug_assert!(amount <= self.remaining()); + self.meta_mut().position += amount; + } + + /// Returns the number of bytes available for writing. + // CR note: this is implemented here to make access not require `mut` + #[inline] + pub fn remaining(&self) -> usize { + self.out_bytes_ref().len() - self.meta().position + } + + /// Zeros **the uninitialized part** of the buffer + pub(crate) fn perform_zeroing(&mut self) { + if let Some(initialized_len) = self.meta().init.init_len() { + let initialized_len = initialized_len + .max(self.meta().position) + // This is important: because of with_limit() init_len() <= self.bytes.len() + // is NOT guaranteed even though it is for `Buffer`. + .min(self.out_bytes_whole().len()); + let truly_uninit = &mut self.out_bytes_whole()[initialized_len..]; + // Also, because initialized_len could've been past this slice lenght we should not + // overwrite it. + if !truly_uninit.is_empty() { + truly_uninit.write_zeroes(); + // SAFETY: we've just zeroed all uninit bytes + unsafe { + let whole_buf_len = self.out_bytes_whole().len(); + self.meta_mut().init.set_init(whole_buf_len); + } + } + } + } + + /// Reborrows `OutBuf` and casts to `Uninit` version. + /// + /// This operation is always cheap. + pub fn as_uninit(&mut self) -> OutBuf<'_, init::Uninit> { + self.reborrow().into_uninit() + } + + /// Converts `OutBuf` to `Uninit` version. + /// + /// This operation is always cheap. + pub fn into_uninit(self) -> OutBuf<'a, init::Uninit> { + unsafe { + let (bytes, meta) = self.into_raw_parts(); + // SAFETY: + // * this type already enforces requirements for itself + // * Meta has the same layout as `usize` because `init::Uninit` has zero + // size + OutBuf::new(bytes, &mut *(&mut meta.position as *mut _ as *mut Meta)) + } + } + + /// Zeroes the uninitialized part of the buffer, reborrows and casts it to `Init` version. + pub fn zeroing_as_init(&mut self) -> OutBuf<'_, init::Init> { + self.reborrow().zeroing_into_init() + } + + /// Zeroes the uninitialized part of the buffer and converts the reference to `Init` version. + pub fn zeroing_into_init(mut self) -> OutBuf<'a, init::Init> { + unsafe { + self.perform_zeroing(); + let (bytes, meta) = self.into_raw_parts(); + OutBuf::new(bytes, &mut *(&mut meta.position as *mut _ as *mut Meta)) + } + } + + /// Calls the closure with reborrowed `OutBuf` and returns the slice that the closure + /// have written into. + #[inline] + pub fn scoped) -> R>(mut self, f: F) -> (&'a mut [u8], R) { + let old_pos = self.meta().position; + let result = f(self.reborrow()); + let new_pos = self.meta().position; + unsafe { + let (bytes, _) = self.into_raw_parts(); + let written = &mut bytes[old_pos..new_pos]; + + (written.assume_init_mut(), result) + } + } + + /// Creates a new `OutBuf` with remaining length at most `limt` + #[inline] + pub fn with_limit(&mut self, limit: usize) -> OutBuf<'_, Init> { + unsafe { + let limit = self.remaining().min(limit); + let max_len = self.meta().position + limit; + let (bytes, meta) = self.reborrow().into_raw_parts(); + + OutBuf::new(&mut bytes[..max_len], meta) + } + } + + /// Writes a byte slice into the buffer and advance the position by slice length + /// + /// ## Panics + /// + /// This method panicks if the length of the slice is greater than what buffer can hold. + #[inline] + pub fn write_slice(&mut self, bytes: &[u8]) { + unsafe { + if bytes.len() > self.out_bytes().len() { + panic!("Attempt to write past the end of the buffer (buffer len: {}, write len: {})", self.out_bytes().len(), bytes.len()); + } + self.out_bytes()[..bytes.len()].copy_from_slice(bytes); + self.advance_unchecked(bytes.len()); + } + } + + /// Writes as many bytes from slice as fit into the buffer. + /// + /// This method is similar to `write_slice` but it truncates the slice being written + /// instead of panicking. + /// + /// Returns the number of bytes written + #[inline] + pub fn write_slice_min(&mut self, bytes: &[u8]) -> usize { + unsafe { + let to_write = bytes.len().min(self.out_bytes().len()); + // First check if the amount of bytes we want to read is small: + // `copy_from_slice` will generally expand to a call to `memcpy`, and + // for a single byte the overhead is significant. + if to_write == 1 { + self.write_byte(bytes[0]); + } else { + self.out_bytes()[..to_write].copy_from_slice(&bytes[..to_write]); + self.advance_unchecked(to_write); + } + + to_write + } + } + + /// Returns true if no more bytes can be written to the buffer + #[inline] + pub fn is_full(&self) -> bool { + self.remaining() == 0 + } + + /// Writes a single byte into the buffer and advances the position by one + /// + /// ## Panics + /// + /// This method panicks if the buffer is full. + #[inline] + pub fn write_byte(&mut self, byte: u8) { + self.write_slice(&[byte]); + } + + /// Uncombines specified operand of `Combine`. + /// + /// ## Safety + /// + /// `T` MUST be an operand of `Combine<_, Combined=Self>` + unsafe fn uncombine(mut self) -> OutBuf<'a, T> { + // only Init needs init so if Init is Init it's already init and doesn't need init + // if it's not init it needs init :) + if T::needs_init() && !Init::needs_init() { + self.perform_zeroing(); + } + + let (bytes, meta) = self.into_raw_parts(); + // SAFETY: + // * We did initialize the buffer above if it was needed + // * see each branch for additional requirements. + if T::is_dynamic() { + // Combine requires that Combined is Dynamic if an operand is Dynamic + // and this fn requires that Combined is Self + // So Self is Dynamic and T is Dynamic, thus this is a no-op but Rust doesn't understand + // that. + OutBuf::new(bytes, &mut *(meta as *mut _ as *mut Meta)) + } else { + // If T is not Dynamic, it's one of the other two and thus zero-sized. + // Because T is zero-sized the layout of Meta is the same as the layout of usize. + OutBuf::new(bytes, &mut *(&mut meta.position as *mut _ as *mut Meta)) + } + } + + /// Casts this buffer initailizedness into the left operand of the `Combine` trait. + /// + /// This performs initialization if required but there was not way to avoid it anyway. + #[inline] + pub fn uncombine_left(self) -> OutBuf<'a, L> where L: BufInit + Combine, R: BufInit { + unsafe { + // The type signature proves L is an operand of Combine + self.uncombine::() + } + } + + /// Casts this buffer initailizedness into the right operand of the `Combine` trait. + /// + /// This performs initialization if required but there was not way to avoid it anyway. + #[inline] + pub fn uncombine_right(self) -> OutBuf<'a, R> where L: BufInit + Combine, R: BufInit { + unsafe { + // The type signature proves R is an operand of Combine + self.uncombine::() + } + } +} + +impl<'a> OutBuf<'a, init::Init> { + /// Returns the underlying buffer as "initialized" mutable reference. + /// + /// **Important:** reading from this is still a bad idea even if not memory-unsafe! + /// This is provided for legacy code that doesn't use [`OutBytes`]. + #[inline] + pub fn bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: we have type proof this is actually initialized. + self.out_bytes().assume_init_mut() + } + } + + /// Advances the buffer position by `amount` bytes. + /// + /// This does the same thing as `advance_unchecked()` but it checks the bounds and is only + /// available when the buffer is type-proven to be initialized. + /// + /// ## Panics + /// + /// This method panicks if the amount would move the position past the end of the buffer. + #[inline] + pub fn advance(&mut self, amount: usize) { + unsafe { + if amount > self.remaining() { + panic!("Attempt to advance past the buffer"); + } + // SAFETY: we have type proof this is actually initialized and we've just checked + // the amount. + self.advance_unchecked(amount); + } + } +} + +/// Marker trait guaranteeing that the bytes in storage are initialized. +/// +/// This is used to prove safety of some operations. +/// +/// ## Safety +/// +/// This trait MUST NOT be implemented on types that return uninitialized slices from +/// `borrow_uninit_slice()`. +pub unsafe trait BorrowOutBytesInit: BorrowOutSlice {} + +macro_rules! impl_borrow_out_bytes_init_array { + ($($n:expr),* $(,)?) => { + $( + unsafe impl BorrowOutBytesInit for [u8; $n] {} + )* + } +} + +impl_borrow_out_bytes_init_array! { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, +} + +unsafe impl<'a> BorrowOutBytesInit for &'a mut [u8] {} +#[cfg(feature = "alloc")] +unsafe impl BorrowOutBytesInit for Box<[u8]> {} + +impl Buffer { + /// Creates a new buffer using the provided storage and position starting at 0 + /// + /// This version of buffer is guaranteed to be initialized. + #[inline] + pub fn new_from_init(bytes: S) -> Self { + unsafe { + let meta = Meta { + position: 0, + // SAFETY: trait bound on S guarantees that the argument is initialized. + init: I::new_unchecked(bytes.borrow_uninit_slice().len()), + }; + Buffer::new_unchecked(bytes, meta) + } + } +} + +impl, I: FromUninit> Buffer { + /// Creates a new buffer using provided storage and position starting at 0 + /// + /// This can be used to create either `Dynamic` or `Uninit` buffer. + #[inline] + pub fn new(bytes: S) -> Self { + unsafe { + let len = bytes.borrow_uninit_slice().len(); + let meta = Meta { + position: 0, + init: I::new(if bytes.is_init() { len } else { 0 }) + }; + + Buffer::new_unchecked(bytes, meta) + } + } +} + +impl, I: BufInit> Buffer { + /// Creates a new buffer using provided storage and position starting at 0 initializing the + /// buffer if required. + /// + /// This can be used to create any buffer generically but the cost of initialization will not + /// be caught by type errors. This may be an issue in high-performance applications as + /// refactoring the code could cause performance regressions. Such regressions would cause type + /// errors if non-generic methods were used. + #[inline] + pub fn new_maybe_init(mut bytes: S) -> Self { + unsafe { + if I::needs_init() { + bytes.zero_if_needed(); + } + let len = bytes.borrow_uninit_slice().len(); + let meta = Meta { + position: 0, + init: I::new_unchecked(if bytes.is_init() { len } else { 0 }) + }; + + Buffer::new_unchecked(bytes, meta) + } + } +} + +#[cfg(feature = "alloc")] +impl From, Init>> for Vec { + fn from(value: Buffer, Init>) -> Self { + unsafe { + let (bytes, position) = value.into_parts(); + let capacity = bytes.len(); + let ptr = Box::into_raw(bytes) as *mut u8; + Vec::from_raw_parts(ptr, position, capacity) + } + } +} + +#[cfg(feature = "alloc")] +impl From]>, Init>> for Vec { + fn from(value: Buffer]>, Init>) -> Self { + unsafe { + let (bytes, position) = value.into_parts(); + let capacity = bytes.len(); + let ptr = Box::into_raw(bytes) as *mut u8; + Vec::from_raw_parts(ptr, position, capacity) + } + } +} + +/// Extension trait for byte slices implementing helper method(s) +pub trait ByteSliceExt: BorrowOutSlice { + /// The version of buffer created from this slice. + type Init: BufInit; + + /// Treats this slice as a buffer of bytes + fn as_buffer(&mut self) -> Buffer<&mut Self, Self::Init>; +} + +impl ByteSliceExt for [u8] { + type Init = init::Init; + + fn as_buffer(&mut self) -> Buffer<&mut Self, Self::Init> { + Buffer::new_from_init(self) + } +} + +impl ByteSliceExt for [MaybeUninit] { + type Init = init::Uninit; + + fn as_buffer(&mut self) -> Buffer<&mut Self, Self::Init> { + Buffer::new(self) + } +} + +/// Allows generically creating uninitialized byte storages. +/// +/// This is mainly used as implementation detail of [`new_uninit`] and the related helper +/// functions. It is implemented for uninit arrays and boxed arrays (with `alloc` feature) and you +/// can implement it for your own types too. +pub trait NewUninit: BorrowOutSlice + Sized { + /// Creates byte storage with all bytes being uninitialized. + fn new_uninit() -> Self; +} + +/// Helper making it easier to construct uninitialized buffers. +/// +/// You can use this to quickly construct a buffer holding an array on stack (probably the most +/// frequent use case) or other types. It is specifically **not** a method of [`Buffer`] to reduce +/// the boilerplate related to generics. +pub fn new_uninit() -> Buffer { + Buffer::new(T::new_uninit()) +} + +/// Helper making it easier to construct buffers in generic code. +/// +/// This helper can be very useful when writing code that works with buffers generic over their +/// initializedness. It creates uninitialized buffer if possible but will zero it out if required +/// by the `Init` type parameter. +/// +/// You can use this to quickly construct a buffer holding an array on stack (probably the most +/// frequent use case) or other types. It is specifically **not** a method of [`Buffer`] to reduce +/// the boilerplate related to generics. +pub fn new_maybe_init() -> Buffer { + Buffer::new_maybe_init(T::new_uninit()) +} + +/// Helper for creating uninitalized boxes in older versions of Rust. +/// +/// This is used in the following two functions. +#[cfg(feature = "alloc")] +fn uninit_boxed_slice(capacity: usize) -> Box<[MaybeUninit]> { + unsafe { + let mut vec = Vec::>::with_capacity(capacity); + // SAFETY: + // * with_capacity is guaranteed to allocate `capacity` items (bytes in this case) + // * `set_len` means essentially `assume_init` for the part of the buffer up to len + // and `assume_init` is valid on `MaybeUninit>`. + vec.set_len(capacity); + // Since len == capacity this will not reallocate + vec.into_boxed_slice() + } +} + +/// Creates a boxed slice holding uninitialized bytes. +/// +/// This function heap-allocates an uninitialized slice and uses it as a backing storage for a +/// buffer. While allocating on heap is generally slower than on stack, this has the benefit of +/// being faster to move around. +/// +/// If possible it's still better to use `new_uninit::>()` because it doesn't +/// need to store the capacity. Some situations when this isn't possible: +/// +/// * `capacity` is dynamic - not known at compile time +/// * You need collection of buffers with different sizes +/// * You can't use const generics (due to MSRV) and you need unusual capacity. +#[cfg(feature = "alloc")] +pub fn new_uninit_boxed_slice(capacity: usize) -> Buffer]>, init::Uninit> { + Buffer::new(uninit_boxed_slice(capacity)) +} + +/// Creates a boxed slice holding bytes initializing them if required. +/// +/// This function is very similar to [`new_uninit_boxed_slice`] so that documentation applies here +/// too. The main difference is this one can create a buffer of arbitrary initializedness and +/// zeroes out the bytes if required. Thus it may be silently slower than the uninit one but works +/// in generic code. +#[cfg(feature = "alloc")] +pub fn new_maybe_init_boxed_slice(capacity: usize) -> Buffer]>, init::Uninit> { + Buffer::new_maybe_init(uninit_boxed_slice(capacity)) +} + +/// Creates an `OutBuf` from given `Vec` (can be a reference) and calls a closure with it +/// updating the length of `Vec` after the closure returns. +/// +/// This is very useful for safely reading into uninitialized portion of `Vec` without leaking the +/// fact that the underlying storage is actually a `Vec`. +#[cfg(feature = "alloc")] +pub fn with_vec_as_out_buf(mut vec: V, fun: F) -> R where + I: BufInit, + V: BorrowMut>, + F: FnOnce(OutBuf<'_, I>) -> R { + + unsafe { + let vec = vec.borrow_mut(); + let len = vec.len(); + let capacity = vec.capacity(); + let uninit_ptr = vec.as_mut_ptr().add(len).cast::>(); + let vec_uninit: &mut [MaybeUninit] = core::slice::from_raw_parts_mut(uninit_ptr, capacity - len); + let mut buffer = Buffer::<_, I>::new_maybe_init(vec_uninit); + let result = fun(buffer.as_out()); + let new_len = len + buffer.written().len(); + vec.set_len(new_len); + result + } +} + +mod new_uninit_arr_impl { + use super::NewUninit; + use core::mem::MaybeUninit; + #[cfg(feature = "alloc")] + use alloc::boxed::Box; + + macro_rules! impl_new_uninit_array { + ($($n:expr),* $(,)*) => { + $( + impl NewUninit for [MaybeUninit; $n] { + fn new_uninit() -> Self { + unsafe { + MaybeUninit::<[MaybeUninit; $n]>::uninit() + // SAFETY: assume_init on an array of uninitialized items is sound. + .assume_init() + } + } + } + + #[cfg(feature = "alloc")] + impl NewUninit for Box<[MaybeUninit; $n]> { + fn new_uninit() -> Self { + Box::new(NewUninit::new_uninit()) + } + } + )* + } + } + + impl_new_uninit_array! { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, + 65536, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_uninit_basic_ops() { + let mut buffer = new_uninit::<[MaybeUninit; 32]>(); + assert_eq!(buffer.written(), &[]); + assert_eq!(buffer.as_out().remaining(), 32); + buffer.as_out().write_slice(&[]); + assert_eq!(buffer.written(), &[]); + assert_eq!(buffer.as_out().remaining(), 32); + buffer.as_out().write_slice(&[42]); + assert_eq!(buffer.as_out().remaining(), 31); + assert_eq!(buffer.written(), &[42]); + buffer.as_out().write_slice(&[1, 2, 3, 4, 5]); + assert_eq!(buffer.as_out().remaining(), 26); + assert_eq!(buffer.written(), &[42, 1, 2, 3, 4, 5]); + buffer.as_out().write_byte(47); + assert_eq!(buffer.as_out().remaining(), 25); + assert_eq!(buffer.written(), &[42, 1, 2, 3, 4, 5, 47]); + for i in 0..25 { + buffer.as_out().write_byte(255 - i); + assert_eq!(buffer.as_out().remaining(), usize::from(24 - i)); + assert_eq!(*buffer.written().last().unwrap(), 255 - i); + } + assert!(buffer.as_out().is_full()); + } +}