new api (#7)

* convertion to [u8; 32] * rename to MerklizedChunks * work on raw bytes * saner reconstruct API * fix clippy
paritytech · Jan 11, 2024 · 9ab9c74 · 9ab9c74
1 parent e579bfe
commit 9ab9c74
Show file tree

Hide file tree

Showing 9 changed files with 90 additions and 102 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,12 +8,11 @@ license = "Apache-2.0"
 scale-info = { git = "https://github.com/arkpar/scale-info.git" }
 
 [dependencies]
-blake3 = "1.5.0"
-# blake2 = "0.10.6"
+blake2b_simd = { version = "1", default-features = false }
 bounded-collections = { version = "0.1.9", default-features = false }
 novelpoly = { package = "reed-solomon-novelpoly", git = "https://github.com/paritytech/reed-solomon-novelpoly.git", default-features = false }
 scale = { package = "parity-scale-codec", version = "3.6.9", default-features = false,  features = ["derive"] }
-thiserror = "1.0.56"
+thiserror = { version = "1.0.56", default-features = false }
 
 [profile.dev]
 panic = "abort"

diff --git a/benches/all.rs b/benches/all.rs
@@ -2,13 +2,13 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Through
 use erasure_coding::*;
 use std::time::Duration;
 
-fn chunks(n_chunks: u16, pov: &Vec<u8>) -> Vec<Vec<u8>> {
+fn chunks(n_chunks: u16, pov: &[u8]) -> Vec<Vec<u8>> {
 	construct_chunks(n_chunks, pov).unwrap()
 }
 
-fn erasure_root(n_chunks: u16, pov: &Vec<u8>) -> ErasureRoot {
+fn erasure_root(n_chunks: u16, pov: &[u8]) -> ErasureRoot {
 	let chunks = chunks(n_chunks, pov);
-	ErasureRootAndProofs::from(chunks).root()
+	MerklizedChunks::from(chunks).root()
 }
 
 fn bench_all(c: &mut Criterion) {
@@ -38,17 +38,17 @@ fn bench_all(c: &mut Criterion) {
 		let all_chunks = chunks(N_CHUNKS, &pov);
 
 		let chunks: Vec<_> = all_chunks
-			.iter()
+			.into_iter()
 			.enumerate()
 			.rev()
 			.take(recovery_threshold(N_CHUNKS).unwrap() as _)
-			.map(|(i, c)| (&c[..], i))
+			.map(|(i, c)| (ChunkIndex::from(i as u16), c))
 			.collect();
 
 		group.throughput(Throughput::Bytes(pov.len() as u64));
 		group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, &n| {
 			b.iter(|| {
-				let _pov: Vec<u8> = reconstruct(n, chunks.clone()).unwrap();
+				let _pov: Vec<u8> = reconstruct(n, chunks.clone(), pov.len()).unwrap();
 			});
 		});
 	}
@@ -60,15 +60,15 @@ fn bench_all(c: &mut Criterion) {
 		let all_chunks = chunks(N_CHUNKS, &pov);
 
 		let chunks = all_chunks
-			.iter()
+			.into_iter()
 			.take(systematic_recovery_threshold(N_CHUNKS).unwrap() as _)
-			.map(|c| &c[..])
 			.collect::<Vec<_>>();
 
 		group.throughput(Throughput::Bytes(pov.len() as u64));
 		group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, &n| {
 			b.iter(|| {
-				let _pov: Vec<u8> = reconstruct_from_systematic(n, chunks.clone()).unwrap();
+				let _pov: Vec<u8> =
+					reconstruct_from_systematic(n, chunks.clone(), pov.len()).unwrap();
 			});
 		});
 	}
@@ -82,7 +82,7 @@ fn bench_all(c: &mut Criterion) {
 		group.throughput(Throughput::Bytes(pov.len() as u64));
 		group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, _| {
 			b.iter(|| {
-				let iter = ErasureRootAndProofs::from(all_chunks.clone());
+				let iter = MerklizedChunks::from(all_chunks.clone());
 				let n = iter.collect::<Vec<_>>().len();
 				assert_eq!(n, all_chunks.len());
 			});

diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock
diff --git a/fuzz/fuzz_targets/merklize.rs b/fuzz/fuzz_targets/merklize.rs
@@ -12,7 +12,7 @@ fuzz_target!(|data: (Vec<u8>, u16)| {
 	let chunks = construct_chunks(n_chunks, &data).unwrap();
 	assert_eq!(chunks.len() as u16, n_chunks);
 
-	let iter = ErasureRootAndProofs::from(chunks.clone());
+	let iter = MerklizedChunks::from(chunks.clone());
 	let root = iter.root();
 	let erasure_chunks: Vec<_> = iter.collect();
 

diff --git a/fuzz/fuzz_targets/round_trip.rs b/fuzz/fuzz_targets/round_trip.rs
@@ -15,19 +15,20 @@ fuzz_target!(|data: (Vec<u8>, u16)| {
 	let threshold = systematic_recovery_threshold(n_chunks).unwrap();
 	let reconstructed_systematic: Vec<u8> = reconstruct_from_systematic(
 		n_chunks,
-		chunks.iter().take(threshold as usize).map(|v| &v[..]).collect(),
+		chunks.iter().cloned().take(threshold as usize).collect(),
+		data.len(),
 	)
 	.unwrap();
 
 	let threshold = recovery_threshold(n_chunks).unwrap();
-	let last_chunks: Vec<(&[u8], usize)> = chunks
-		.iter()
+	let last_chunks: Vec<(ChunkIndex, Vec<u8>)> = chunks
+		.into_iter()
 		.enumerate()
 		.rev()
 		.take(threshold as usize)
-		.map(|(i, v)| (&v[..], i))
+		.map(|(i, v)| (ChunkIndex::from(i as u16), v))
 		.collect();
-	let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks).unwrap();
+	let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks, data.len()).unwrap();
 
 	assert_eq!(reconstructed, data);
 	assert_eq!(reconstructed_systematic, data);

diff --git a/src/error.rs b/src/error.rs
@@ -1,4 +1,3 @@
-use scale::Error as CodecError;
 use thiserror::Error;
 
 /// Errors in erasure coding.
@@ -16,11 +15,9 @@ pub enum Error {
 	#[error("Uneven length is not valid for field GF(2^16)")]
 	UnevenLength,
 	#[error("Chunk is out of bounds: {chunk_index} not included in 0..{n_chunks}")]
-	ChunkIndexOutOfBounds { chunk_index: usize, n_chunks: usize },
+	ChunkIndexOutOfBounds { chunk_index: u16, n_chunks: u16 },
 	#[error("Reconstructed payload invalid")]
 	BadPayload,
-	#[error("Unable to decode reconstructed payload: {0}")]
-	Decode(CodecError),
 	#[error("Invalid chunk proof")]
 	InvalidChunkProof,
 	#[error("The proof is too large")]

diff --git a/src/lib.rs b/src/lib.rs
@@ -7,7 +7,7 @@ mod merklize;
 
 pub use self::{
 	error::Error,
-	merklize::{ErasureRoot, ErasureRootAndProofs, Proof},
+	merklize::{ErasureRoot, MerklizedChunks, Proof},
 };
 
 use novelpoly::{CodeParams, WrappedShard};
@@ -87,13 +87,18 @@ fn code_params(n_chunks: u16) -> Result<CodeParams, Error> {
 ///
 /// Provide a vector containing the first k chunks in order. If too few chunks are provided,
 /// recovery is not possible.
-pub fn reconstruct_from_systematic<T: Decode>(
+///
+/// Due to the internals of the erasure coding algorithm, the output might be
+/// larger than the original data and padded with zeroes; passing `data_len`
+/// allows to truncate the output to the original data size.
+pub fn reconstruct_from_systematic(
 	n_chunks: u16,
-	systematic_chunks: Vec<&[u8]>,
-) -> Result<T, Error> {
+	systematic_chunks: Vec<Vec<u8>>,
+	data_len: usize,
+) -> Result<Vec<u8>, Error> {
 	if n_chunks == 1 {
 		let chunk_data = systematic_chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
-		return Decode::decode(&mut &chunk_data[..]).map_err(Error::Decode);
+		return Ok(chunk_data.to_vec());
 	}
 	let code_params = code_params(n_chunks)?;
 	let k = code_params.k();
@@ -112,36 +117,32 @@ pub fn reconstruct_from_systematic<T: Decode>(
 		return Err(Error::UnevenLength);
 	}
 
-	let bytes = code_params.make_encoder().reconstruct_from_systematic(
-		systematic_chunks
-			.into_iter()
-			.take(k)
-			.map(|data| WrappedShard::new(data.to_vec()))
-			.collect(),
+	let mut bytes = code_params.make_encoder().reconstruct_from_systematic(
+		systematic_chunks.into_iter().take(k).map(WrappedShard::new).collect(),
 	)?;
 
-	Decode::decode(&mut &bytes[..]).map_err(Error::Decode)
+	bytes.truncate(data_len);
+
+	Ok(bytes)
 }
 
 /// Construct erasure-coded chunks.
 ///
 /// Works only for 1..65536 chunks.
 /// The data must be non-empty.
-pub fn construct_chunks<T: Encode>(n_chunks: u16, data: &T) -> Result<Vec<Vec<u8>>, Error> {
+pub fn construct_chunks(n_chunks: u16, data: &[u8]) -> Result<Vec<Vec<u8>>, Error> {
 	if n_chunks == 1 {
-		let encoded = data.encode();
-		return Ok(vec![encoded]);
+		return Ok(vec![data.to_vec()]);
 	}
 	let params = code_params(n_chunks)?;
-	let encoded = data.encode();
 
-	if encoded.is_empty() {
+	if data.is_empty() {
 		return Err(Error::BadPayload);
 	}
 
 	let shards = params
 		.make_encoder()
-		.encode::<WrappedShard>(&encoded[..])
+		.encode::<WrappedShard>(data)
 		.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");
 
 	Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect())
@@ -154,32 +155,38 @@ pub fn construct_chunks<T: Encode>(n_chunks: u16, data: &T) -> Result<Vec<Vec<u8
 /// are provided, recovery is not possible.
 ///
 /// Works only for 1..65536 chunks.
-pub fn reconstruct<'a, I: 'a, T: Decode>(n_chunks: u16, chunks: I) -> Result<T, Error>
+///
+/// Due to the internals of the erasure coding algorithm, the output might be
+/// larger than the original data and padded with zeroes; passing `data_len`
+/// allows to truncate the output to the original data size.
+pub fn reconstruct<'a, I: 'a>(n_chunks: u16, chunks: I, data_len: usize) -> Result<Vec<u8>, Error>
 where
-	I: IntoIterator<Item = (&'a [u8], usize)>,
+	I: IntoIterator<Item = (ChunkIndex, Vec<u8>)>,
 {
 	if n_chunks == 1 {
-		let (chunk_data, _) = chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
-		return Decode::decode(&mut &chunk_data[..]).map_err(Error::Decode);
+		let (_, chunk_data) = chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
+		return Ok(chunk_data);
 	}
 	let params = code_params(n_chunks)?;
 	let n = n_chunks as usize;
 	let mut received_shards: Vec<Option<WrappedShard>> = vec![None; n];
-	for (chunk_data, chunk_idx) in chunks.into_iter().take(n) {
+	for (chunk_idx, chunk_data) in chunks.into_iter().take(n) {
 		if chunk_data.len() % 2 != 0 {
 			return Err(Error::UnevenLength);
 		}
 
-		if chunk_idx >= n {
-			return Err(Error::ChunkIndexOutOfBounds { chunk_index: chunk_idx, n_chunks: n });
+		if chunk_idx.0 >= n_chunks {
+			return Err(Error::ChunkIndexOutOfBounds { chunk_index: chunk_idx.0, n_chunks });
 		}
 
-		received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
+		received_shards[chunk_idx.0 as usize] = Some(WrappedShard::new(chunk_data));
 	}
 
-	let payload_bytes = params.make_encoder().reconstruct(received_shards)?;
+	let mut payload_bytes = params.make_encoder().reconstruct(received_shards)?;
+
+	payload_bytes.truncate(data_len);
 
-	Decode::decode(&mut &payload_bytes[..]).map_err(Error::Decode)
+	Ok(payload_bytes)
 }
 
 #[cfg(test)]
@@ -219,10 +226,12 @@ mod tests {
 		fn property(available_data: ArbitraryAvailableData, n_chunks: u16) {
 			let n_chunks = n_chunks.max(1);
 			let threshold = systematic_recovery_threshold(n_chunks).unwrap();
+			let data_len = available_data.0.len();
 			let chunks = construct_chunks(n_chunks, &available_data.0).unwrap();
 			let reconstructed: Vec<u8> = reconstruct_from_systematic(
 				n_chunks,
-				chunks.iter().take(threshold as usize).map(|v| &v[..]).collect(),
+				chunks.into_iter().take(threshold as usize).collect(),
+				data_len,
 			)
 			.unwrap();
 			assert_eq!(reconstructed, available_data.0);
@@ -235,17 +244,18 @@ mod tests {
 	fn round_trip_works() {
 		fn property(available_data: ArbitraryAvailableData, n_chunks: u16) {
 			let n_chunks = n_chunks.max(1);
+			let data_len = available_data.0.len();
 			let threshold = recovery_threshold(n_chunks).unwrap();
 			let chunks = construct_chunks(n_chunks, &available_data.0).unwrap();
 			// take the last `threshold` chunks
-			let last_chunks: Vec<(&[u8], usize)> = chunks
-				.iter()
+			let last_chunks: Vec<(ChunkIndex, Vec<u8>)> = chunks
+				.into_iter()
 				.enumerate()
 				.rev()
 				.take(threshold as usize)
-				.map(|(i, v)| (&v[..], i))
+				.map(|(i, v)| (ChunkIndex::from(i as u16), v))
 				.collect();
-			let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks).unwrap();
+			let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks, data_len).unwrap();
 			assert_eq!(reconstructed, available_data.0);
 		}
 
@@ -259,7 +269,7 @@ mod tests {
 			let chunks = construct_chunks(n_chunks, &data.0).unwrap();
 			assert_eq!(chunks.len() as u16, n_chunks);
 
-			let iter = ErasureRootAndProofs::from(chunks.clone());
+			let iter = MerklizedChunks::from(chunks.clone());
 			let root = iter.root();
 			let erasure_chunks: Vec<_> = iter.collect();