Skip to content

Commit

Permalink
new api (#7)
Browse files Browse the repository at this point in the history
* convertion to [u8; 32]

* rename to MerklizedChunks

* work on raw bytes

* saner reconstruct API

* fix clippy
  • Loading branch information
ordian authored Jan 11, 2024
1 parent e579bfe commit 9ab9c74
Show file tree
Hide file tree
Showing 9 changed files with 90 additions and 102 deletions.
21 changes: 5 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ license = "Apache-2.0"
scale-info = { git = "https://github.com/arkpar/scale-info.git" }

[dependencies]
blake3 = "1.5.0"
# blake2 = "0.10.6"
blake2b_simd = { version = "1", default-features = false }
bounded-collections = { version = "0.1.9", default-features = false }
novelpoly = { package = "reed-solomon-novelpoly", git = "https://github.com/paritytech/reed-solomon-novelpoly.git", default-features = false }
scale = { package = "parity-scale-codec", version = "3.6.9", default-features = false, features = ["derive"] }
thiserror = "1.0.56"
thiserror = { version = "1.0.56", default-features = false }

[profile.dev]
panic = "abort"
Expand Down
20 changes: 10 additions & 10 deletions benches/all.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Through
use erasure_coding::*;
use std::time::Duration;

fn chunks(n_chunks: u16, pov: &Vec<u8>) -> Vec<Vec<u8>> {
fn chunks(n_chunks: u16, pov: &[u8]) -> Vec<Vec<u8>> {
construct_chunks(n_chunks, pov).unwrap()
}

fn erasure_root(n_chunks: u16, pov: &Vec<u8>) -> ErasureRoot {
fn erasure_root(n_chunks: u16, pov: &[u8]) -> ErasureRoot {
let chunks = chunks(n_chunks, pov);
ErasureRootAndProofs::from(chunks).root()
MerklizedChunks::from(chunks).root()
}

fn bench_all(c: &mut Criterion) {
Expand Down Expand Up @@ -38,17 +38,17 @@ fn bench_all(c: &mut Criterion) {
let all_chunks = chunks(N_CHUNKS, &pov);

let chunks: Vec<_> = all_chunks
.iter()
.into_iter()
.enumerate()
.rev()
.take(recovery_threshold(N_CHUNKS).unwrap() as _)
.map(|(i, c)| (&c[..], i))
.map(|(i, c)| (ChunkIndex::from(i as u16), c))
.collect();

group.throughput(Throughput::Bytes(pov.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, &n| {
b.iter(|| {
let _pov: Vec<u8> = reconstruct(n, chunks.clone()).unwrap();
let _pov: Vec<u8> = reconstruct(n, chunks.clone(), pov.len()).unwrap();
});
});
}
Expand All @@ -60,15 +60,15 @@ fn bench_all(c: &mut Criterion) {
let all_chunks = chunks(N_CHUNKS, &pov);

let chunks = all_chunks
.iter()
.into_iter()
.take(systematic_recovery_threshold(N_CHUNKS).unwrap() as _)
.map(|c| &c[..])
.collect::<Vec<_>>();

group.throughput(Throughput::Bytes(pov.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, &n| {
b.iter(|| {
let _pov: Vec<u8> = reconstruct_from_systematic(n, chunks.clone()).unwrap();
let _pov: Vec<u8> =
reconstruct_from_systematic(n, chunks.clone(), pov.len()).unwrap();
});
});
}
Expand All @@ -82,7 +82,7 @@ fn bench_all(c: &mut Criterion) {
group.throughput(Throughput::Bytes(pov.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(pov_size), &N_CHUNKS, |b, _| {
b.iter(|| {
let iter = ErasureRootAndProofs::from(all_chunks.clone());
let iter = MerklizedChunks::from(all_chunks.clone());
let n = iter.collect::<Vec<_>>().len();
assert_eq!(n, all_chunks.len());
});
Expand Down
10 changes: 4 additions & 6 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/merklize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fuzz_target!(|data: (Vec<u8>, u16)| {
let chunks = construct_chunks(n_chunks, &data).unwrap();
assert_eq!(chunks.len() as u16, n_chunks);

let iter = ErasureRootAndProofs::from(chunks.clone());
let iter = MerklizedChunks::from(chunks.clone());
let root = iter.root();
let erasure_chunks: Vec<_> = iter.collect();

Expand Down
11 changes: 6 additions & 5 deletions fuzz/fuzz_targets/round_trip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ fuzz_target!(|data: (Vec<u8>, u16)| {
let threshold = systematic_recovery_threshold(n_chunks).unwrap();
let reconstructed_systematic: Vec<u8> = reconstruct_from_systematic(
n_chunks,
chunks.iter().take(threshold as usize).map(|v| &v[..]).collect(),
chunks.iter().cloned().take(threshold as usize).collect(),
data.len(),
)
.unwrap();

let threshold = recovery_threshold(n_chunks).unwrap();
let last_chunks: Vec<(&[u8], usize)> = chunks
.iter()
let last_chunks: Vec<(ChunkIndex, Vec<u8>)> = chunks
.into_iter()
.enumerate()
.rev()
.take(threshold as usize)
.map(|(i, v)| (&v[..], i))
.map(|(i, v)| (ChunkIndex::from(i as u16), v))
.collect();
let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks).unwrap();
let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks, data.len()).unwrap();

assert_eq!(reconstructed, data);
assert_eq!(reconstructed_systematic, data);
Expand Down
5 changes: 1 addition & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use scale::Error as CodecError;
use thiserror::Error;

/// Errors in erasure coding.
Expand All @@ -16,11 +15,9 @@ pub enum Error {
#[error("Uneven length is not valid for field GF(2^16)")]
UnevenLength,
#[error("Chunk is out of bounds: {chunk_index} not included in 0..{n_chunks}")]
ChunkIndexOutOfBounds { chunk_index: usize, n_chunks: usize },
ChunkIndexOutOfBounds { chunk_index: u16, n_chunks: u16 },
#[error("Reconstructed payload invalid")]
BadPayload,
#[error("Unable to decode reconstructed payload: {0}")]
Decode(CodecError),
#[error("Invalid chunk proof")]
InvalidChunkProof,
#[error("The proof is too large")]
Expand Down
78 changes: 44 additions & 34 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod merklize;

pub use self::{
error::Error,
merklize::{ErasureRoot, ErasureRootAndProofs, Proof},
merklize::{ErasureRoot, MerklizedChunks, Proof},
};

use novelpoly::{CodeParams, WrappedShard};
Expand Down Expand Up @@ -87,13 +87,18 @@ fn code_params(n_chunks: u16) -> Result<CodeParams, Error> {
///
/// Provide a vector containing the first k chunks in order. If too few chunks are provided,
/// recovery is not possible.
pub fn reconstruct_from_systematic<T: Decode>(
///
/// Due to the internals of the erasure coding algorithm, the output might be
/// larger than the original data and padded with zeroes; passing `data_len`
/// allows to truncate the output to the original data size.
pub fn reconstruct_from_systematic(
n_chunks: u16,
systematic_chunks: Vec<&[u8]>,
) -> Result<T, Error> {
systematic_chunks: Vec<Vec<u8>>,
data_len: usize,
) -> Result<Vec<u8>, Error> {
if n_chunks == 1 {
let chunk_data = systematic_chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
return Decode::decode(&mut &chunk_data[..]).map_err(Error::Decode);
return Ok(chunk_data.to_vec());
}
let code_params = code_params(n_chunks)?;
let k = code_params.k();
Expand All @@ -112,36 +117,32 @@ pub fn reconstruct_from_systematic<T: Decode>(
return Err(Error::UnevenLength);
}

let bytes = code_params.make_encoder().reconstruct_from_systematic(
systematic_chunks
.into_iter()
.take(k)
.map(|data| WrappedShard::new(data.to_vec()))
.collect(),
let mut bytes = code_params.make_encoder().reconstruct_from_systematic(
systematic_chunks.into_iter().take(k).map(WrappedShard::new).collect(),
)?;

Decode::decode(&mut &bytes[..]).map_err(Error::Decode)
bytes.truncate(data_len);

Ok(bytes)
}

/// Construct erasure-coded chunks.
///
/// Works only for 1..65536 chunks.
/// The data must be non-empty.
pub fn construct_chunks<T: Encode>(n_chunks: u16, data: &T) -> Result<Vec<Vec<u8>>, Error> {
pub fn construct_chunks(n_chunks: u16, data: &[u8]) -> Result<Vec<Vec<u8>>, Error> {
if n_chunks == 1 {
let encoded = data.encode();
return Ok(vec![encoded]);
return Ok(vec![data.to_vec()]);
}
let params = code_params(n_chunks)?;
let encoded = data.encode();

if encoded.is_empty() {
if data.is_empty() {
return Err(Error::BadPayload);
}

let shards = params
.make_encoder()
.encode::<WrappedShard>(&encoded[..])
.encode::<WrappedShard>(data)
.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");

Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect())
Expand All @@ -154,32 +155,38 @@ pub fn construct_chunks<T: Encode>(n_chunks: u16, data: &T) -> Result<Vec<Vec<u8
/// are provided, recovery is not possible.
///
/// Works only for 1..65536 chunks.
pub fn reconstruct<'a, I: 'a, T: Decode>(n_chunks: u16, chunks: I) -> Result<T, Error>
///
/// Due to the internals of the erasure coding algorithm, the output might be
/// larger than the original data and padded with zeroes; passing `data_len`
/// allows to truncate the output to the original data size.
pub fn reconstruct<'a, I: 'a>(n_chunks: u16, chunks: I, data_len: usize) -> Result<Vec<u8>, Error>
where
I: IntoIterator<Item = (&'a [u8], usize)>,
I: IntoIterator<Item = (ChunkIndex, Vec<u8>)>,
{
if n_chunks == 1 {
let (chunk_data, _) = chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
return Decode::decode(&mut &chunk_data[..]).map_err(Error::Decode);
let (_, chunk_data) = chunks.into_iter().next().ok_or(Error::NotEnoughChunks)?;
return Ok(chunk_data);
}
let params = code_params(n_chunks)?;
let n = n_chunks as usize;
let mut received_shards: Vec<Option<WrappedShard>> = vec![None; n];
for (chunk_data, chunk_idx) in chunks.into_iter().take(n) {
for (chunk_idx, chunk_data) in chunks.into_iter().take(n) {
if chunk_data.len() % 2 != 0 {
return Err(Error::UnevenLength);
}

if chunk_idx >= n {
return Err(Error::ChunkIndexOutOfBounds { chunk_index: chunk_idx, n_chunks: n });
if chunk_idx.0 >= n_chunks {
return Err(Error::ChunkIndexOutOfBounds { chunk_index: chunk_idx.0, n_chunks });
}

received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
received_shards[chunk_idx.0 as usize] = Some(WrappedShard::new(chunk_data));
}

let payload_bytes = params.make_encoder().reconstruct(received_shards)?;
let mut payload_bytes = params.make_encoder().reconstruct(received_shards)?;

payload_bytes.truncate(data_len);

Decode::decode(&mut &payload_bytes[..]).map_err(Error::Decode)
Ok(payload_bytes)
}

#[cfg(test)]
Expand Down Expand Up @@ -219,10 +226,12 @@ mod tests {
fn property(available_data: ArbitraryAvailableData, n_chunks: u16) {
let n_chunks = n_chunks.max(1);
let threshold = systematic_recovery_threshold(n_chunks).unwrap();
let data_len = available_data.0.len();
let chunks = construct_chunks(n_chunks, &available_data.0).unwrap();
let reconstructed: Vec<u8> = reconstruct_from_systematic(
n_chunks,
chunks.iter().take(threshold as usize).map(|v| &v[..]).collect(),
chunks.into_iter().take(threshold as usize).collect(),
data_len,
)
.unwrap();
assert_eq!(reconstructed, available_data.0);
Expand All @@ -235,17 +244,18 @@ mod tests {
fn round_trip_works() {
fn property(available_data: ArbitraryAvailableData, n_chunks: u16) {
let n_chunks = n_chunks.max(1);
let data_len = available_data.0.len();
let threshold = recovery_threshold(n_chunks).unwrap();
let chunks = construct_chunks(n_chunks, &available_data.0).unwrap();
// take the last `threshold` chunks
let last_chunks: Vec<(&[u8], usize)> = chunks
.iter()
let last_chunks: Vec<(ChunkIndex, Vec<u8>)> = chunks
.into_iter()
.enumerate()
.rev()
.take(threshold as usize)
.map(|(i, v)| (&v[..], i))
.map(|(i, v)| (ChunkIndex::from(i as u16), v))
.collect();
let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks).unwrap();
let reconstructed: Vec<u8> = reconstruct(n_chunks, last_chunks, data_len).unwrap();
assert_eq!(reconstructed, available_data.0);
}

Expand All @@ -259,7 +269,7 @@ mod tests {
let chunks = construct_chunks(n_chunks, &data.0).unwrap();
assert_eq!(chunks.len() as u16, n_chunks);

let iter = ErasureRootAndProofs::from(chunks.clone());
let iter = MerklizedChunks::from(chunks.clone());
let root = iter.root();
let erasure_chunks: Vec<_> = iter.collect();

Expand Down
Loading

0 comments on commit 9ab9c74

Please sign in to comment.