From 896e8bd76e76a54e156d1f9fddb1a736e48ca535 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Thu, 11 Jan 2024 16:29:02 -0500 Subject: [PATCH 01/59] WIP --- firewood/src/merkle/stream.rs | 185 ++++++++++++++++++++++++---------- 1 file changed, 131 insertions(+), 54 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 8eea2f680..0c0675f16 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -6,19 +6,24 @@ use crate::{ shale::{DiskAddress, ShaleStore}, v2::api, }; +use core::slice::Iter; use futures::{stream::FusedStream, Stream}; use helper_types::{Either, MustUse}; use std::task::Poll; - type Key = Box<[u8]>; type Value = Vec; +struct NodeIterator<'a> { + node: NodeObjRef<'a>, + children_iter: Box>, +} + enum IteratorState<'a> { /// Start iterating at the specified key StartAtKey(Key), /// Continue iterating after the last node in the `visited_node_path` Iterating { - visited_node_path: Vec<(NodeObjRef<'a>, u8)>, + node_iter_stack: Vec>, }, } @@ -41,7 +46,7 @@ pub struct MerkleKeyValueStream<'a, S, T> { impl<'a, S: ShaleStore + Send + Sync, T> FusedStream for MerkleKeyValueStream<'a, S, T> { fn is_terminated(&self) -> bool { - matches!(&self.key_state, IteratorState::Iterating { visited_node_path } if visited_node_path.is_empty()) + matches!(&self.key_state, IteratorState::Iterating { node_iter_stack } if node_iter_stack.is_empty()) } } @@ -88,72 +93,144 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // traverse the trie along each nibble until we find a node with a value - // TODO: merkle.iter_by_key(key) will simplify this entire code-block. - let (found_node, mut visited_node_path) = { - let mut visited_node_path = vec![]; - - let found_node = merkle - .get_node_by_key_with_callbacks( - root_node, - &key, - |node_addr, i| visited_node_path.push((node_addr, i)), - |_, _| {}, - ) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; + todo!(); - let mut visited_node_path = visited_node_path - .into_iter() - .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - .collect::, _>>() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; + // TODO remove + // // traverse the trie along each nibble until we find a node with a value + // // TODO: merkle.iter_by_key(key) will simplify this entire code-block. + // let (found_node, mut visited_node_path) = { + // let mut visited_node_path = vec![]; - let last_visited_node_not_branch = visited_node_path - .last() - .map(|(node, _)| { - matches!(node.inner(), NodeType::Leaf(_) | NodeType::Extension(_)) - }) - .unwrap_or_default(); + // let found_node = merkle + // .get_node_by_key_with_callbacks( + // root_node, + // &key, + // |node_addr, i| visited_node_path.push((node_addr, i)), + // |_, _| {}, + // ) + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // we only want branch in the visited node-path to start - if last_visited_node_not_branch { - visited_node_path.pop(); - } + // let mut visited_node_path = visited_node_path + // .into_iter() + // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + // .collect::, _>>() + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - (found_node, visited_node_path) - }; + // let last_visited_node_not_branch = visited_node_path + // .last() + // .map(|(node, _)| { + // matches!(node.inner(), NodeType::Leaf(_) | NodeType::Extension(_)) + // }) + // .unwrap_or_default(); - if let Some(found_node) = found_node { - let value = match found_node.inner() { - NodeType::Branch(branch) => branch.value.as_ref(), - NodeType::Leaf(leaf) => Some(&leaf.data), - NodeType::Extension(_) => None, - }; + // // we only want branch in the visited node-path to start + // if last_visited_node_not_branch { + // visited_node_path.pop(); + // } - let next_result = value.map(|value| { - let value = value.to_vec(); + // (found_node, visited_node_path) + // }; - Ok((std::mem::take(key), value)) - }); + // if let Some(found_node) = found_node { + // let value = match found_node.inner() { + // NodeType::Branch(branch) => branch.value.as_ref(), + // NodeType::Leaf(leaf) => Some(&leaf.data), + // NodeType::Extension(_) => None, + // }; - visited_node_path.push((found_node, 0)); + // let next_result = value.map(|value| { + // let value = value.to_vec(); - self.key_state = IteratorState::Iterating { visited_node_path }; + // Ok((std::mem::take(key), value)) + // }); - return Poll::Ready(next_result); - } + // visited_node_path.push((found_node, 0)); - self.key_state = IteratorState::Iterating { visited_node_path }; + // self.key_state = IteratorState::Iterating { visited_node_path }; - self.poll_next(_cx) + // return Poll::Ready(next_result); + // } + + // self.key_state = IteratorState::Iterating { visited_node_path }; + + // self.poll_next(_cx) } + IteratorState::Iterating { node_iter_stack } => { + loop { + let Some(mut node_iter) = node_iter_stack.pop() else { + return Poll::Ready(None); + }; + + let Some(next) = node_iter.children_iter.next() else { + // This node iterator is exhausted. + continue; + }; - IteratorState::Iterating { visited_node_path } => { - let next = find_next_result(merkle, visited_node_path) - .map_err(|e| api::Error::InternalError(Box::new(e))) - .transpose(); + // Handle the next child. + // Grab the next node along the traversal path. + let node = merkle + .get_node(next.clone()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + match node.inner() { + NodeType::Branch(branch) => { + // Make a NodeIterator for this branch. + let children_iter = Box::new(branch.children.into_iter().flatten()); + + let key: Box<[u8]> = todo!(); + let value = branch.value.clone(); + + let node_iter = NodeIterator { + node, + children_iter, + }; + + node_iter_stack.push(node_iter); + + if let Some(value) = value { + // This branch has a value, so return it. + return Poll::Ready(Some(Ok((key, value.to_vec())))); + } + } + NodeType::Leaf(leaf) => return Poll::Ready(Some(Ok((todo!(), todo!())))), + NodeType::Extension(extension) => { + // Make a NodeIterator for this extension. + let children_iter = Box::new(std::iter::once(extension.chd())); + + let node_iter = NodeIterator { + node, + children_iter, + }; + + node_iter_stack.push(node_iter); + } + } + } - Poll::Ready(next) + // TODO remove + // let foo2: Iter<'a, DiskAddress> = node_iter + // .node + // .inner() + // .as_branch() + // .unwrap() + // .children + // .iter() + // .filter_map(|addr| if addr.is_some() { *addr } else { None }) + // .collect::>() + // .iter(); + + // let foo = NodeIterator { + // node: node_iter.node, + // children_iter: foo2, + // }; + + // TODO remove + // let next = find_next_result(merkle, visited_node_path) + // .map_err(|e| api::Error::InternalError(Box::new(e))) + // .transpose(); + + // Poll::Ready(next) + // } } } } From 0e968838f57c8a844e1db4b01f44e6b4eb70aede Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Thu, 11 Jan 2024 17:15:13 -0500 Subject: [PATCH 02/59] WIP --- firewood/src/merkle/stream.rs | 93 ++++++++++++----------------------- 1 file changed, 31 insertions(+), 62 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 0c0675f16..86980ff86 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -6,16 +6,16 @@ use crate::{ shale::{DiskAddress, ShaleStore}, v2::api, }; -use core::slice::Iter; use futures::{stream::FusedStream, Stream}; use helper_types::{Either, MustUse}; +use std::slice::Iter; use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; struct NodeIterator<'a> { node: NodeObjRef<'a>, - children_iter: Box>, + children_iter: Option>, } enum IteratorState<'a> { @@ -161,76 +161,45 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' return Poll::Ready(None); }; - let Some(next) = node_iter.children_iter.next() else { - // This node iterator is exhausted. - continue; + // Check whether the children iterator is set up. + let mut children_iter = match node_iter.children_iter { + Some(iter) => iter, + None => { + // Make a new children iterator. + match node_iter.node.inner() { + NodeType::Branch(branch) => branch + .children + .into_iter() + .flatten() + .collect::>() + .iter(), + NodeType::Leaf(_) => todo!(), // TODO is this unreachable? + NodeType::Extension(extension) => [extension.chd()].iter(), + } + } }; // Handle the next child. - // Grab the next node along the traversal path. + let next_child = match children_iter.next() { + Some(child) => child, + None => { + // We've finished iterating over this node's children. + // Go back to the parent node. + continue; + } + }; + + // Get the node for the next child. let node = merkle - .get_node(next.clone()) + .get_node(*next_child) .map_err(|e| api::Error::InternalError(Box::new(e)))?; match node.inner() { - NodeType::Branch(branch) => { - // Make a NodeIterator for this branch. - let children_iter = Box::new(branch.children.into_iter().flatten()); - - let key: Box<[u8]> = todo!(); - let value = branch.value.clone(); - - let node_iter = NodeIterator { - node, - children_iter, - }; - - node_iter_stack.push(node_iter); - - if let Some(value) = value { - // This branch has a value, so return it. - return Poll::Ready(Some(Ok((key, value.to_vec())))); - } - } + NodeType::Branch(branch) => {} NodeType::Leaf(leaf) => return Poll::Ready(Some(Ok((todo!(), todo!())))), - NodeType::Extension(extension) => { - // Make a NodeIterator for this extension. - let children_iter = Box::new(std::iter::once(extension.chd())); - - let node_iter = NodeIterator { - node, - children_iter, - }; - - node_iter_stack.push(node_iter); - } + NodeType::Extension(extension) => {} } } - - // TODO remove - // let foo2: Iter<'a, DiskAddress> = node_iter - // .node - // .inner() - // .as_branch() - // .unwrap() - // .children - // .iter() - // .filter_map(|addr| if addr.is_some() { *addr } else { None }) - // .collect::>() - // .iter(); - - // let foo = NodeIterator { - // node: node_iter.node, - // children_iter: foo2, - // }; - - // TODO remove - // let next = find_next_result(merkle, visited_node_path) - // .map_err(|e| api::Error::InternalError(Box::new(e))) - // .transpose(); - - // Poll::Ready(next) - // } } } } From 6c6581ca46669298108a165d3b83be2b2b0632d9 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 12:04:38 -0500 Subject: [PATCH 03/59] WIP --- firewood/src/merkle/stream.rs | 120 ++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 48 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 86980ff86..ee3f42319 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -1,21 +1,44 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType}; +use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType, PartialPath}; use crate::{ + merkle::node, shale::{DiskAddress, ShaleStore}, v2::api, }; use futures::{stream::FusedStream, Stream}; use helper_types::{Either, MustUse}; -use std::slice::Iter; use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; +struct ChildIter<'a> { + children: &'a [Option], + pos: u8, +} + +impl<'a> Iterator for ChildIter<'a> { + type Item = DiskAddress; + + fn next(&mut self) -> Option { + while self.pos < BranchNode::MAX_CHILDREN as u8 { + let child = self.children[self.pos as usize]; + + self.pos += 1; + + if let Some(child) = child { + return Some(child); + } + } + + None + } +} + struct NodeIterator<'a> { - node: NodeObjRef<'a>, - children_iter: Option>, + partial_path: PartialPath, + children_iter: ChildIter<'a>, } enum IteratorState<'a> { @@ -155,52 +178,53 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // self.poll_next(_cx) } - IteratorState::Iterating { node_iter_stack } => { - loop { - let Some(mut node_iter) = node_iter_stack.pop() else { - return Poll::Ready(None); - }; - - // Check whether the children iterator is set up. - let mut children_iter = match node_iter.children_iter { - Some(iter) => iter, - None => { - // Make a new children iterator. - match node_iter.node.inner() { - NodeType::Branch(branch) => branch - .children - .into_iter() - .flatten() - .collect::>() - .iter(), - NodeType::Leaf(_) => todo!(), // TODO is this unreachable? - NodeType::Extension(extension) => [extension.chd()].iter(), - } - } - }; - - // Handle the next child. - let next_child = match children_iter.next() { - Some(child) => child, - None => { - // We've finished iterating over this node's children. - // Go back to the parent node. - continue; - } - }; - - // Get the node for the next child. - let node = merkle - .get_node(*next_child) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - match node.inner() { - NodeType::Branch(branch) => {} - NodeType::Leaf(leaf) => return Poll::Ready(Some(Ok((todo!(), todo!())))), - NodeType::Extension(extension) => {} + IteratorState::Iterating { node_iter_stack } => loop { + let Some(mut node_iter) = node_iter_stack.pop() else { + return Poll::Ready(None); + }; + + let next_node_addr = node_iter.children_iter.next(); + node_iter_stack.push(node_iter); + + let Some(next_node_addr) = next_node_addr else { + // We visited all this node's descendants. + // Go back to its parent. + continue; + }; + + let next_node = merkle + .get_node(next_node_addr) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + match next_node.inner() { + NodeType::Branch(_) => todo!(), + NodeType::Leaf(leaf) => { + let key: Box<[u8]> = Box::new([]); // TODO return actual key + let value = leaf.data.to_vec(); + + return Poll::Ready(Some(Ok((key, value)))); + } + NodeType::Extension(extension) => { + // Follow the extension node to its child. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // TODO confirm that an extension node's child is always a branch node + let child = child.inner().as_branch().unwrap(); + + let child_iter = NodeIterator { + partial_path: extension.path.clone(), // TODO is this correct? + children_iter: ChildIter { + children: &child.children.clone(), + pos: 0, + }, + }; + + node_iter_stack.push(child_iter); } } - } + }, } } } From 9669108b6787c4ab63713ddbb295308ccba8eefa Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 17:09:02 -0500 Subject: [PATCH 04/59] WIP --- firewood/src/merkle/stream.rs | 53 +++++++++++------------------------ 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ee3f42319..40e936e6b 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -13,44 +13,22 @@ use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; -struct ChildIter<'a> { - children: &'a [Option], - pos: u8, -} - -impl<'a> Iterator for ChildIter<'a> { - type Item = DiskAddress; - - fn next(&mut self) -> Option { - while self.pos < BranchNode::MAX_CHILDREN as u8 { - let child = self.children[self.pos as usize]; - - self.pos += 1; - - if let Some(child) = child { - return Some(child); - } - } - - None - } -} - -struct NodeIterator<'a> { +struct NodeIterator +where + F: FnMut() -> Option, +{ partial_path: PartialPath, - children_iter: ChildIter<'a>, + children_iter: F, } -enum IteratorState<'a> { +enum IteratorState { /// Start iterating at the specified key StartAtKey(Key), /// Continue iterating after the last node in the `visited_node_path` - Iterating { - node_iter_stack: Vec>, - }, + Iterating { node_iter_stack: Vec }, } -impl IteratorState<'_> { +impl IteratorState { fn new() -> Self { Self::StartAtKey(vec![].into_boxed_slice()) } @@ -62,7 +40,7 @@ impl IteratorState<'_> { /// A MerkleKeyValueStream iterates over keys/values for a merkle trie. pub struct MerkleKeyValueStream<'a, S, T> { - key_state: IteratorState<'a>, + key_state: IteratorState, merkle_root: DiskAddress, merkle: &'a Merkle, } @@ -211,14 +189,17 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // TODO confirm that an extension node's child is always a branch node - let child = child.inner().as_branch().unwrap(); + let child_iter = child + .inner() + .as_branch() + .unwrap() + .children + .into_iter() + .filter_map(|addr| addr); let child_iter = NodeIterator { partial_path: extension.path.clone(), // TODO is this correct? - children_iter: ChildIter { - children: &child.children.clone(), - pos: 0, - }, + children_iter: child_iter, }; node_iter_stack.push(child_iter); From 53a3f41cb3533c21f0cfe76f9b6fc61580b3103c Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 17:22:32 -0500 Subject: [PATCH 05/59] WIP --- firewood/src/merkle/stream.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 40e936e6b..a8b1da17a 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -13,12 +13,24 @@ use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; -struct NodeIterator +struct NodeIterator where - F: FnMut() -> Option, + I: Iterator, { partial_path: PartialPath, - children_iter: F, + children_iter: I, +} + +impl NodeIterator +where + I: Iterator, +{ + fn new(partial_path: PartialPath, children_iter: I) -> Self { + NodeIterator { + partial_path, + children_iter, + } + } } enum IteratorState { From 1ea329e7fbd0fa97994fc105a4eb824c2d893671 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 18:21:11 -0500 Subject: [PATCH 06/59] WIP --- firewood/src/merkle/stream.rs | 59 +++++++++++++++-------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index a8b1da17a..e048c8923 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -3,34 +3,19 @@ use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType, PartialPath}; use crate::{ - merkle::node, + nibbles::Nibbles, shale::{DiskAddress, ShaleStore}, v2::api, }; use futures::{stream::FusedStream, Stream}; use helper_types::{Either, MustUse}; -use std::task::Poll; +use std::{borrow::BorrowMut, task::Poll}; type Key = Box<[u8]>; type Value = Vec; -struct NodeIterator -where - I: Iterator, -{ - partial_path: PartialPath, - children_iter: I, -} - -impl NodeIterator -where - I: Iterator, -{ - fn new(partial_path: PartialPath, children_iter: I) -> Self { - NodeIterator { - partial_path, - children_iter, - } - } +struct NodeIterator { + key: Box<[u8]>, + children_iter: Box>, } enum IteratorState { @@ -173,10 +158,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' return Poll::Ready(None); }; - let next_node_addr = node_iter.children_iter.next(); - node_iter_stack.push(node_iter); - - let Some(next_node_addr) = next_node_addr else { + let Some(next_node_addr) = node_iter.children_iter.next() else { // We visited all this node's descendants. // Go back to its parent. continue; @@ -187,11 +169,18 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; match next_node.inner() { - NodeType::Branch(_) => todo!(), + NodeType::Branch(branch) => { + let children_iter = branch.children.into_iter().filter_map(|addr| addr); + + node_iter_stack.push(NodeIterator { + key: Box::new([]), // TODO get actual key + children_iter: Box::new(children_iter), + }); + } NodeType::Leaf(leaf) => { - let key: Box<[u8]> = Box::new([]); // TODO return actual key + let key = node_iter.key.clone(); let value = leaf.data.to_vec(); - + node_iter_stack.push(node_iter); return Poll::Ready(Some(Ok((key, value)))); } NodeType::Extension(extension) => { @@ -201,7 +190,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // TODO confirm that an extension node's child is always a branch node - let child_iter = child + let children_iter = child .inner() .as_branch() .unwrap() @@ -209,14 +198,16 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .into_iter() .filter_map(|addr| addr); - let child_iter = NodeIterator { - partial_path: extension.path.clone(), // TODO is this correct? - children_iter: child_iter, - }; + // TODO I feel like there's a better way to do this + let mut child_key = node_iter.key.to_vec(); + child_key.extend(extension.path.iter()); - node_iter_stack.push(child_iter); + node_iter_stack.push(NodeIterator { + key: child_key.into_boxed_slice(), + children_iter: Box::new(children_iter), + }); } - } + }; }, } } From c721e64135a2cabdc0138d9f27da3d44d7ca4ad9 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 18:35:30 -0500 Subject: [PATCH 07/59] compiles --- firewood/src/merkle/stream.rs | 350 ++++++++++++++++++---------------- 1 file changed, 181 insertions(+), 169 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e048c8923..6178e5b4d 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -1,21 +1,20 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType, PartialPath}; +use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType}; use crate::{ - nibbles::Nibbles, shale::{DiskAddress, ShaleStore}, v2::api, }; use futures::{stream::FusedStream, Stream}; use helper_types::{Either, MustUse}; -use std::{borrow::BorrowMut, task::Poll}; +use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; struct NodeIterator { key: Box<[u8]>, - children_iter: Box>, + children_iter: Box + Send>, } enum IteratorState { @@ -158,17 +157,21 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' return Poll::Ready(None); }; - let Some(next_node_addr) = node_iter.children_iter.next() else { + let Some(node_addr) = node_iter.children_iter.next() else { // We visited all this node's descendants. // Go back to its parent. continue; }; - let next_node = merkle - .get_node(next_node_addr) + let node = merkle + .get_node(node_addr) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - match next_node.inner() { + let node_key = node_iter.key.clone(); + + node_iter_stack.push(node_iter); + + match node.inner() { NodeType::Branch(branch) => { let children_iter = branch.children.into_iter().filter_map(|addr| addr); @@ -176,11 +179,17 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' key: Box::new([]), // TODO get actual key children_iter: Box::new(children_iter), }); + + // If there's a value, return it. + if let Some(value) = branch.value.as_ref() { + let key = node_key; // TODO extend with child index + let value = value.to_vec(); + return Poll::Ready(Some(Ok((key, value)))); + } } NodeType::Leaf(leaf) => { - let key = node_iter.key.clone(); + let key = node_key; // TODO extend with child index let value = leaf.data.to_vec(); - node_iter_stack.push(node_iter); return Poll::Ready(Some(Ok((key, value)))); } NodeType::Extension(extension) => { @@ -199,7 +208,8 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .filter_map(|addr| addr); // TODO I feel like there's a better way to do this - let mut child_key = node_iter.key.to_vec(); + let mut child_key = node_key.to_vec(); + // TODO extend with child index child_key.extend(extension.path.iter()); node_iter_stack.push(NodeIterator { @@ -213,133 +223,134 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } } -enum NodeRef<'a> { - New(NodeObjRef<'a>), - Visited(NodeObjRef<'a>), -} - -#[derive(Debug)] -enum InnerNode<'a> { - New(&'a NodeType), - Visited(&'a NodeType), -} - -impl<'a> NodeRef<'a> { - fn inner(&self) -> InnerNode<'_> { - match self { - Self::New(node) => InnerNode::New(node.inner()), - Self::Visited(node) => InnerNode::Visited(node.inner()), - } - } - - fn into_node(self) -> NodeObjRef<'a> { - match self { - Self::New(node) => node, - Self::Visited(node) => node, - } - } -} - -fn find_next_result<'a, S: ShaleStore, T>( - merkle: &'a Merkle, - visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, -) -> Result, super::MerkleError> { - let next = find_next_node_with_data(merkle, visited_path)?.map(|(next_node, value)| { - let partial_path = match next_node.inner() { - NodeType::Leaf(leaf) => leaf.path.iter().copied(), - NodeType::Extension(extension) => extension.path.iter().copied(), - _ => [].iter().copied(), - }; - - let key = key_from_nibble_iter(nibble_iter_from_parents(visited_path).chain(partial_path)); - - visited_path.push((next_node, 0)); - - (key, value) - }); - - Ok(next) -} - -fn find_next_node_with_data<'a, S: ShaleStore, T>( - merkle: &'a Merkle, - visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, -) -> Result, Vec)>, super::MerkleError> { - use InnerNode::*; - - let Some((visited_parent, visited_pos)) = visited_path.pop() else { - return Ok(None); - }; - - let mut node = NodeRef::Visited(visited_parent); - let mut pos = visited_pos; - let mut first_loop = true; - - loop { - match node.inner() { - New(NodeType::Leaf(leaf)) => { - let value = leaf.data.to_vec(); - return Ok(Some((node.into_node(), value))); - } - - Visited(NodeType::Leaf(_)) | Visited(NodeType::Extension(_)) => { - let Some((next_parent, next_pos)) = visited_path.pop() else { - return Ok(None); - }; - - node = NodeRef::Visited(next_parent); - pos = next_pos; - } - - New(NodeType::Extension(extension)) => { - let child = merkle.get_node(extension.chd())?; - - pos = 0; - visited_path.push((node.into_node(), pos)); - - node = NodeRef::New(child); - } - - Visited(NodeType::Branch(branch)) => { - // if the first node that we check is a visited branch, that means that the branch had a value - // and we need to visit the first child, for all other cases, we need to visit the next child - let compare_op = if first_loop { - ::ge // >= - } else { - ::gt - }; - - let children = get_children_iter(branch) - .filter(move |(_, child_pos)| compare_op(child_pos, &pos)); - - let found_next_node = - next_node(merkle, children, visited_path, &mut node, &mut pos)?; - - if !found_next_node { - return Ok(None); - } - } - - New(NodeType::Branch(branch)) => { - if let Some(value) = branch.value.as_ref() { - let value = value.to_vec(); - return Ok(Some((node.into_node(), value))); - } - - let children = get_children_iter(branch); - - let found_next_node = - next_node(merkle, children, visited_path, &mut node, &mut pos)?; - - if !found_next_node { - return Ok(None); - } - } - } - - first_loop = false; - } -} +// TODO remove +// enum NodeRef<'a> { +// New(NodeObjRef<'a>), +// Visited(NodeObjRef<'a>), +// } + +// #[derive(Debug)] +// enum InnerNode<'a> { +// New(&'a NodeType), +// Visited(&'a NodeType), +// } + +// impl<'a> NodeRef<'a> { +// fn inner(&self) -> InnerNode<'_> { +// match self { +// Self::New(node) => InnerNode::New(node.inner()), +// Self::Visited(node) => InnerNode::Visited(node.inner()), +// } +// } + +// fn into_node(self) -> NodeObjRef<'a> { +// match self { +// Self::New(node) => node, +// Self::Visited(node) => node, +// } +// } +// } + +// fn find_next_result<'a, S: ShaleStore, T>( +// merkle: &'a Merkle, +// visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, +// ) -> Result, super::MerkleError> { +// let next = find_next_node_with_data(merkle, visited_path)?.map(|(next_node, value)| { +// let partial_path = match next_node.inner() { +// NodeType::Leaf(leaf) => leaf.path.iter().copied(), +// NodeType::Extension(extension) => extension.path.iter().copied(), +// _ => [].iter().copied(), +// }; + +// let key = key_from_nibble_iter(nibble_iter_from_parents(visited_path).chain(partial_path)); + +// visited_path.push((next_node, 0)); + +// (key, value) +// }); + +// Ok(next) +// } + +// fn find_next_node_with_data<'a, S: ShaleStore, T>( +// merkle: &'a Merkle, +// visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, +// ) -> Result, Vec)>, super::MerkleError> { +// use InnerNode::*; + +// let Some((visited_parent, visited_pos)) = visited_path.pop() else { +// return Ok(None); +// }; + +// let mut node = NodeRef::Visited(visited_parent); +// let mut pos = visited_pos; +// let mut first_loop = true; + +// loop { +// match node.inner() { +// New(NodeType::Leaf(leaf)) => { +// let value = leaf.data.to_vec(); +// return Ok(Some((node.into_node(), value))); +// } + +// Visited(NodeType::Leaf(_)) | Visited(NodeType::Extension(_)) => { +// let Some((next_parent, next_pos)) = visited_path.pop() else { +// return Ok(None); +// }; + +// node = NodeRef::Visited(next_parent); +// pos = next_pos; +// } + +// New(NodeType::Extension(extension)) => { +// let child = merkle.get_node(extension.chd())?; + +// pos = 0; +// visited_path.push((node.into_node(), pos)); + +// node = NodeRef::New(child); +// } + +// Visited(NodeType::Branch(branch)) => { +// // if the first node that we check is a visited branch, that means that the branch had a value +// // and we need to visit the first child, for all other cases, we need to visit the next child +// let compare_op = if first_loop { +// ::ge // >= +// } else { +// ::gt +// }; + +// let children = get_children_iter(branch) +// .filter(move |(_, child_pos)| compare_op(child_pos, &pos)); + +// let found_next_node = +// next_node(merkle, children, visited_path, &mut node, &mut pos)?; + +// if !found_next_node { +// return Ok(None); +// } +// } + +// New(NodeType::Branch(branch)) => { +// if let Some(value) = branch.value.as_ref() { +// let value = value.to_vec(); +// return Ok(Some((node.into_node(), value))); +// } + +// let children = get_children_iter(branch); + +// let found_next_node = +// next_node(merkle, children, visited_path, &mut node, &mut pos)?; + +// if !found_next_node { +// return Ok(None); +// } +// } +// } + +// first_loop = false; +// } +// } fn get_children_iter(branch: &BranchNode) -> impl Iterator { branch @@ -349,37 +360,38 @@ fn get_children_iter(branch: &BranchNode) -> impl Iterator( - merkle: &'a Merkle, - mut children: Iter, - parents: &mut Vec<(NodeObjRef<'a>, u8)>, - node: &mut NodeRef<'a>, - pos: &mut u8, -) -> Result, super::MerkleError> -where - Iter: Iterator, - S: ShaleStore, -{ - if let Some((child_addr, child_pos)) = children.next() { - let child = merkle.get_node(child_addr)?; - - *pos = child_pos; - let node = std::mem::replace(node, NodeRef::New(child)); - parents.push((node.into_node(), *pos)); - } else { - let Some((next_parent, next_pos)) = parents.pop() else { - return Ok(false.into()); - }; - - *node = NodeRef::Visited(next_parent); - *pos = next_pos; - } - - Ok(true.into()) -} +// TODO remove +// /// This function is a little complicated because we need to be able to early return from the parent +// /// when we return `false`. `MustUse` forces the caller to check the inner value of `Result::Ok`. +// /// It also replaces `node` +// fn next_node<'a, S, T, Iter>( +// merkle: &'a Merkle, +// mut children: Iter, +// parents: &mut Vec<(NodeObjRef<'a>, u8)>, +// node: &mut NodeRef<'a>, +// pos: &mut u8, +// ) -> Result, super::MerkleError> +// where +// Iter: Iterator, +// S: ShaleStore, +// { +// if let Some((child_addr, child_pos)) = children.next() { +// let child = merkle.get_node(child_addr)?; + +// *pos = child_pos; +// let node = std::mem::replace(node, NodeRef::New(child)); +// parents.push((node.into_node(), *pos)); +// } else { +// let Some((next_parent, next_pos)) = parents.pop() else { +// return Ok(false.into()); +// }; + +// *node = NodeRef::Visited(next_parent); +// *pos = next_pos; +// } + +// Ok(true.into()) +// } /// create an iterator over the key-nibbles from all parents _excluding_ the sentinal node. fn nibble_iter_from_parents<'a>(parents: &'a [(NodeObjRef, u8)]) -> impl Iterator + 'a { From 84163c8fca411b3ebf669980d71e05a65fc6cbc0 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 18:42:49 -0500 Subject: [PATCH 08/59] put child index in key --- firewood/src/merkle/stream.rs | 152 +++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 68 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 6178e5b4d..6137e916e 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -7,14 +7,14 @@ use crate::{ v2::api, }; use futures::{stream::FusedStream, Stream}; -use helper_types::{Either, MustUse}; +use helper_types::Either; use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; struct NodeIterator { key: Box<[u8]>, - children_iter: Box + Send>, + children_iter: Box + Send>, } enum IteratorState { @@ -152,73 +152,89 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // self.poll_next(_cx) } - IteratorState::Iterating { node_iter_stack } => loop { - let Some(mut node_iter) = node_iter_stack.pop() else { - return Poll::Ready(None); - }; - - let Some(node_addr) = node_iter.children_iter.next() else { - // We visited all this node's descendants. - // Go back to its parent. - continue; - }; - - let node = merkle - .get_node(node_addr) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let node_key = node_iter.key.clone(); - - node_iter_stack.push(node_iter); - - match node.inner() { - NodeType::Branch(branch) => { - let children_iter = branch.children.into_iter().filter_map(|addr| addr); - - node_iter_stack.push(NodeIterator { - key: Box::new([]), // TODO get actual key - children_iter: Box::new(children_iter), - }); - - // If there's a value, return it. - if let Some(value) = branch.value.as_ref() { - let key = node_key; // TODO extend with child index - let value = value.to_vec(); - return Poll::Ready(Some(Ok((key, value)))); + IteratorState::Iterating { node_iter_stack } => { + loop { + let Some(mut node_iter) = node_iter_stack.pop() else { + return Poll::Ready(None); + }; + + let Some((node_addr, pos)) = node_iter.children_iter.next() else { + // We visited all this node's descendants. + // Go back to its parent. + continue; + }; + + let node = merkle + .get_node(node_addr) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let node_key = node_iter.key.clone(); + + node_iter_stack.push(node_iter); + + match node.inner() { + NodeType::Branch(branch) => { + let children_iter = branch.children.into_iter().enumerate().filter_map( + |(pos, addr)| { + if addr.is_some() { + Some((addr.unwrap(), pos as u8)) + } else { + None + } + }, + ); + + node_iter_stack.push(NodeIterator { + key: Box::new([]), // TODO get actual key + children_iter: Box::new(children_iter), + }); + + // If there's a value, return it. + if let Some(value) = branch.value.as_ref() { + let mut key = node_key.to_vec(); + key.push(pos); + + let value = value.to_vec(); + return Poll::Ready(Some(Ok((key.into_boxed_slice(), value)))); + } + } + NodeType::Leaf(leaf) => { + let mut key = node_key.to_vec(); + key.push(pos); + let value = leaf.data.to_vec(); + return Poll::Ready(Some(Ok((key.into_boxed_slice(), value)))); } - } - NodeType::Leaf(leaf) => { - let key = node_key; // TODO extend with child index - let value = leaf.data.to_vec(); - return Poll::Ready(Some(Ok((key, value)))); - } - NodeType::Extension(extension) => { - // Follow the extension node to its child. - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // TODO confirm that an extension node's child is always a branch node - let children_iter = child - .inner() - .as_branch() - .unwrap() - .children - .into_iter() - .filter_map(|addr| addr); - - // TODO I feel like there's a better way to do this - let mut child_key = node_key.to_vec(); - // TODO extend with child index - child_key.extend(extension.path.iter()); - - node_iter_stack.push(NodeIterator { - key: child_key.into_boxed_slice(), - children_iter: Box::new(children_iter), - }); - } - }; - }, + NodeType::Extension(extension) => { + // Follow the extension node to its child. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // TODO confirm that an extension node's child is always a branch node + let children_iter = child + .inner() + .as_branch() + .unwrap() + .children + .into_iter() + .enumerate() + .filter_map(|(pos, child_addr)| { + child_addr.map(|child_addr| (child_addr, pos as u8)) + }); + + // TODO I feel like there's a better way to do this + let mut child_key = node_key.to_vec(); + child_key.push(pos); + child_key.extend(extension.path.iter()); + + node_iter_stack.push(NodeIterator { + key: child_key.into_boxed_slice(), + children_iter: Box::new(children_iter), + }); + } + }; + } + } } } } From dd050e7af888b201392d3a32327f2fee56e19167 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 19:30:46 -0500 Subject: [PATCH 09/59] make key vector in node iterator --- firewood/src/merkle/stream.rs | 46 ++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 6137e916e..e3024e60c 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -13,7 +13,7 @@ type Key = Box<[u8]>; type Value = Vec; struct NodeIterator { - key: Box<[u8]>, + key: Vec, children_iter: Box + Send>, } @@ -90,8 +90,26 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - todo!(); + let mut path_to_key = vec![]; + let node_at_key = merkle + .get_node_by_key_with_callbacks( + root_node, + &key, + |node_addr, i| path_to_key.push((node_addr, i)), + |_, _| {}, + ) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut path_to_key = path_to_key + .into_iter() + .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + .collect::, _>>() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + todo!() + + // // TODO remove // TODO remove // // traverse the trie along each nibble until we find a node with a value // // TODO: merkle.iter_by_key(key) will simplify this entire code-block. @@ -158,6 +176,8 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' return Poll::Ready(None); }; + // [node_addr] is the next node to visit. + // It's the child at index [pos] of [node_iter]. let Some((node_addr, pos)) = node_iter.children_iter.next() else { // We visited all this node's descendants. // Go back to its parent. @@ -168,7 +188,8 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(node_addr) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let node_key = node_iter.key.clone(); + let mut child_key = node_iter.key.clone(); + child_key.push(pos); node_iter_stack.push(node_iter); @@ -185,24 +206,22 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' ); node_iter_stack.push(NodeIterator { - key: Box::new([]), // TODO get actual key + key: child_key.clone(), // TODO remove cloning children_iter: Box::new(children_iter), }); // If there's a value, return it. if let Some(value) = branch.value.as_ref() { - let mut key = node_key.to_vec(); - key.push(pos); - let value = value.to_vec(); - return Poll::Ready(Some(Ok((key.into_boxed_slice(), value)))); + return Poll::Ready(Some(Ok(( + child_key.into_boxed_slice(), + value, + )))); } } NodeType::Leaf(leaf) => { - let mut key = node_key.to_vec(); - key.push(pos); let value = leaf.data.to_vec(); - return Poll::Ready(Some(Ok((key.into_boxed_slice(), value)))); + return Poll::Ready(Some(Ok((child_key.into_boxed_slice(), value)))); } NodeType::Extension(extension) => { // Follow the extension node to its child. @@ -223,12 +242,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }); // TODO I feel like there's a better way to do this - let mut child_key = node_key.to_vec(); - child_key.push(pos); + let mut child_key = child_key; child_key.extend(extension.path.iter()); node_iter_stack.push(NodeIterator { - key: child_key.into_boxed_slice(), + key: child_key, children_iter: Box::new(children_iter), }); } From 56ab548fb7b64a887ea5551dbe973896478a8af4 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 21:30:54 -0500 Subject: [PATCH 10/59] no-op implementation for iterator initialization --- firewood/src/merkle/stream.rs | 57 ++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e3024e60c..e2c157a0e 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -90,24 +90,45 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let mut path_to_key = vec![]; - - let node_at_key = merkle - .get_node_by_key_with_callbacks( - root_node, - &key, - |node_addr, i| path_to_key.push((node_addr, i)), - |_, _| {}, - ) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let mut path_to_key = path_to_key - .into_iter() - .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - .collect::, _>>() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - todo!() + let mut node_iter_stack: Vec = vec![]; + node_iter_stack.push(NodeIterator { + key: vec![], + children_iter: Box::new(get_children_iter( + root_node.inner().as_branch().unwrap(), + )), + }); + + self.key_state = IteratorState::Iterating { node_iter_stack }; + + self.poll_next(_cx) + + // let mut path_to_key = vec![]; + + // let node_at_key = merkle + // .get_node_by_key_with_callbacks( + // root_node, + // &key, + // |node_addr, i| path_to_key.push((node_addr, i)), + // |_, _| {}, + // ) + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // let mut path_to_key = path_to_key + // .into_iter() + // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + // .collect::, _>>() + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // let remaining_key = key.iter(); + + // let mut node_iter_stack: Vec = vec![]; + + // loop { + // let Some((node, pos)) = path_to_key.first() else { + // break; + // }; + // path_to_key.remove(0); + // } // // TODO remove // TODO remove From 8c3af4a4db08d037a4e1dbcb31f5986e754a634c Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 21:58:32 -0500 Subject: [PATCH 11/59] cleanup; add test no_start_key --- firewood/src/merkle/stream.rs | 86 ++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e2c157a0e..c7eeccf3c 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -13,7 +13,7 @@ type Key = Box<[u8]>; type Value = Vec; struct NodeIterator { - key: Vec, + key_nibbles: Vec, children_iter: Box + Send>, } @@ -92,7 +92,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut node_iter_stack: Vec = vec![]; node_iter_stack.push(NodeIterator { - key: vec![], + key_nibbles: vec![], children_iter: Box::new(get_children_iter( root_node.inner().as_branch().unwrap(), )), @@ -209,13 +209,16 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(node_addr) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let mut child_key = node_iter.key.clone(); - child_key.push(pos); + let mut child_key_nibbles = node_iter.key_nibbles.clone(); // TODO reduce¸cloning + child_key_nibbles.push(pos); node_iter_stack.push(node_iter); match node.inner() { NodeType::Branch(branch) => { + // [children_iter] returns (child_addr, pos) + // for every non-empty child in [node] where + // [pos] is the child's index in [node.children]. let children_iter = branch.children.into_iter().enumerate().filter_map( |(pos, addr)| { if addr.is_some() { @@ -227,7 +230,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' ); node_iter_stack.push(NodeIterator { - key: child_key.clone(), // TODO remove cloning + key_nibbles: child_key_nibbles.clone(), // TODO reduce¸cloning children_iter: Box::new(children_iter), }); @@ -235,14 +238,17 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' if let Some(value) = branch.value.as_ref() { let value = value.to_vec(); return Poll::Ready(Some(Ok(( - child_key.into_boxed_slice(), + key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), value, )))); } } NodeType::Leaf(leaf) => { let value = leaf.data.to_vec(); - return Poll::Ready(Some(Ok((child_key.into_boxed_slice(), value)))); + return Poll::Ready(Some(Ok(( + key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), + value, + )))); } NodeType::Extension(extension) => { // Follow the extension node to its child. @@ -263,11 +269,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }); // TODO I feel like there's a better way to do this - let mut child_key = child_key; + let mut child_key = child_key_nibbles; child_key.extend(extension.path.iter()); node_iter_stack.push(NodeIterator { - key: child_key, + key_nibbles: child_key, children_iter: Box::new(children_iter), }); } @@ -415,39 +421,6 @@ fn get_children_iter(branch: &BranchNode) -> impl Iterator( -// merkle: &'a Merkle, -// mut children: Iter, -// parents: &mut Vec<(NodeObjRef<'a>, u8)>, -// node: &mut NodeRef<'a>, -// pos: &mut u8, -// ) -> Result, super::MerkleError> -// where -// Iter: Iterator, -// S: ShaleStore, -// { -// if let Some((child_addr, child_pos)) = children.next() { -// let child = merkle.get_node(child_addr)?; - -// *pos = child_pos; -// let node = std::mem::replace(node, NodeRef::New(child)); -// parents.push((node.into_node(), *pos)); -// } else { -// let Some((next_parent, next_pos)) = parents.pop() else { -// return Ok(false.into()); -// }; - -// *node = NodeRef::Visited(next_parent); -// *pos = next_pos; -// } - -// Ok(true.into()) -// } - /// create an iterator over the key-nibbles from all parents _excluding_ the sentinal node. fn nibble_iter_from_parents<'a>(parents: &'a [(NodeObjRef, u8)]) -> impl Iterator + 'a { parents @@ -591,6 +564,35 @@ mod tests { check_stream_is_done(merkle.iter(root)).await; } + #[tokio::test] + async fn no_start_key() { + let mut merkle = create_test_merkle(); + let root = merkle.init_root().unwrap(); + + for i in 0..256 { + let key = vec![i as u8]; + let value = vec![0x00]; + + merkle + .insert(key.clone(), value.clone(), root.clone()) + .unwrap(); + } + + let mut stream = merkle.iter(root); + + for i in 0..256 { + let expected_key = vec![i as u8]; + let expected_value = vec![0x00]; + + assert_eq!( + stream.next().await.unwrap().unwrap(), + (expected_key.into_boxed_slice(), expected_value), + ); + } + + check_stream_is_done(stream).await; + } + #[tokio::test] async fn fused_full() { let mut merkle = create_test_merkle(); From 49a93c657736e98a3848b3b1d897a41db13bd833 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 22:01:34 -0500 Subject: [PATCH 12/59] improve no_start_key test --- firewood/src/merkle/stream.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index c7eeccf3c..e2bc765c1 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -569,25 +569,29 @@ mod tests { let mut merkle = create_test_merkle(); let root = merkle.init_root().unwrap(); - for i in 0..256 { - let key = vec![i as u8]; - let value = vec![0x00]; - - merkle - .insert(key.clone(), value.clone(), root.clone()) - .unwrap(); + for i in (0..256).rev() { + for j in (0..256).rev() { + let key = vec![i as u8, j as u8]; + let value = vec![0x00]; + + merkle + .insert(key.clone(), value.clone(), root.clone()) + .unwrap(); + } } let mut stream = merkle.iter(root); for i in 0..256 { - let expected_key = vec![i as u8]; - let expected_value = vec![0x00]; - - assert_eq!( - stream.next().await.unwrap().unwrap(), - (expected_key.into_boxed_slice(), expected_value), - ); + for j in 0..256 { + let expected_key = vec![i as u8, j as u8]; + let expected_value = vec![0x00]; + + assert_eq!( + stream.next().await.unwrap().unwrap(), + (expected_key.into_boxed_slice(), expected_value), + ); + } } check_stream_is_done(stream).await; From 4a682829b42bf1e14fc38c6686b33c92726d7eaf Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 22:05:53 -0500 Subject: [PATCH 13/59] use get_children_iter helper --- firewood/src/merkle/stream.rs | 170 ++-------------------------------- 1 file changed, 6 insertions(+), 164 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e2bc765c1..4907f871b 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -1,13 +1,12 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use super::{node::Node, BranchNode, Merkle, NodeObjRef, NodeType}; +use super::{node::Node, BranchNode, Merkle, NodeType}; use crate::{ shale::{DiskAddress, ShaleStore}, v2::api, }; use futures::{stream::FusedStream, Stream}; -use helper_types::Either; use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; @@ -85,7 +84,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } = &mut *self; match key_state { - IteratorState::StartAtKey(key) => { + IteratorState::StartAtKey(_) => { let root_node = merkle .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; @@ -219,15 +218,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // [children_iter] returns (child_addr, pos) // for every non-empty child in [node] where // [pos] is the child's index in [node.children]. - let children_iter = branch.children.into_iter().enumerate().filter_map( - |(pos, addr)| { - if addr.is_some() { - Some((addr.unwrap(), pos as u8)) - } else { - None - } - }, - ); + let children_iter = get_children_iter(branch); node_iter_stack.push(NodeIterator { key_nibbles: child_key_nibbles.clone(), // TODO reduce¸cloning @@ -257,16 +248,8 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // TODO confirm that an extension node's child is always a branch node - let children_iter = child - .inner() - .as_branch() - .unwrap() - .children - .into_iter() - .enumerate() - .filter_map(|(pos, child_addr)| { - child_addr.map(|child_addr| (child_addr, pos as u8)) - }); + let branch = child.inner().as_branch().unwrap(); + let branch_iter = get_children_iter(branch); // TODO I feel like there's a better way to do this let mut child_key = child_key_nibbles; @@ -274,7 +257,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' node_iter_stack.push(NodeIterator { key_nibbles: child_key, - children_iter: Box::new(children_iter), + children_iter: Box::new(branch_iter), }); } }; @@ -284,135 +267,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } } -// TODO remove -// enum NodeRef<'a> { -// New(NodeObjRef<'a>), -// Visited(NodeObjRef<'a>), -// } - -// #[derive(Debug)] -// enum InnerNode<'a> { -// New(&'a NodeType), -// Visited(&'a NodeType), -// } - -// impl<'a> NodeRef<'a> { -// fn inner(&self) -> InnerNode<'_> { -// match self { -// Self::New(node) => InnerNode::New(node.inner()), -// Self::Visited(node) => InnerNode::Visited(node.inner()), -// } -// } - -// fn into_node(self) -> NodeObjRef<'a> { -// match self { -// Self::New(node) => node, -// Self::Visited(node) => node, -// } -// } -// } - -// fn find_next_result<'a, S: ShaleStore, T>( -// merkle: &'a Merkle, -// visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, -// ) -> Result, super::MerkleError> { -// let next = find_next_node_with_data(merkle, visited_path)?.map(|(next_node, value)| { -// let partial_path = match next_node.inner() { -// NodeType::Leaf(leaf) => leaf.path.iter().copied(), -// NodeType::Extension(extension) => extension.path.iter().copied(), -// _ => [].iter().copied(), -// }; - -// let key = key_from_nibble_iter(nibble_iter_from_parents(visited_path).chain(partial_path)); - -// visited_path.push((next_node, 0)); - -// (key, value) -// }); - -// Ok(next) -// } - -// fn find_next_node_with_data<'a, S: ShaleStore, T>( -// merkle: &'a Merkle, -// visited_path: &mut Vec<(NodeObjRef<'a>, u8)>, -// ) -> Result, Vec)>, super::MerkleError> { -// use InnerNode::*; - -// let Some((visited_parent, visited_pos)) = visited_path.pop() else { -// return Ok(None); -// }; - -// let mut node = NodeRef::Visited(visited_parent); -// let mut pos = visited_pos; -// let mut first_loop = true; - -// loop { -// match node.inner() { -// New(NodeType::Leaf(leaf)) => { -// let value = leaf.data.to_vec(); -// return Ok(Some((node.into_node(), value))); -// } - -// Visited(NodeType::Leaf(_)) | Visited(NodeType::Extension(_)) => { -// let Some((next_parent, next_pos)) = visited_path.pop() else { -// return Ok(None); -// }; - -// node = NodeRef::Visited(next_parent); -// pos = next_pos; -// } - -// New(NodeType::Extension(extension)) => { -// let child = merkle.get_node(extension.chd())?; - -// pos = 0; -// visited_path.push((node.into_node(), pos)); - -// node = NodeRef::New(child); -// } - -// Visited(NodeType::Branch(branch)) => { -// // if the first node that we check is a visited branch, that means that the branch had a value -// // and we need to visit the first child, for all other cases, we need to visit the next child -// let compare_op = if first_loop { -// ::ge // >= -// } else { -// ::gt -// }; - -// let children = get_children_iter(branch) -// .filter(move |(_, child_pos)| compare_op(child_pos, &pos)); - -// let found_next_node = -// next_node(merkle, children, visited_path, &mut node, &mut pos)?; - -// if !found_next_node { -// return Ok(None); -// } -// } - -// New(NodeType::Branch(branch)) => { -// if let Some(value) = branch.value.as_ref() { -// let value = value.to_vec(); -// return Ok(Some((node.into_node(), value))); -// } - -// let children = get_children_iter(branch); - -// let found_next_node = -// next_node(merkle, children, visited_path, &mut node, &mut pos)?; - -// if !found_next_node { -// return Ok(None); -// } -// } -// } - -// first_loop = false; -// } -// } - fn get_children_iter(branch: &BranchNode) -> impl Iterator { branch .children @@ -421,18 +275,6 @@ fn get_children_iter(branch: &BranchNode) -> impl Iterator(parents: &'a [(NodeObjRef, u8)]) -> impl Iterator + 'a { - parents - .iter() - .skip(1) // always skip the sentinal node - .flat_map(|(parent, child_nibble)| match parent.inner() { - NodeType::Branch(_) => Either::Left(std::iter::once(*child_nibble)), - NodeType::Extension(extension) => Either::Right(extension.path.iter().copied()), - NodeType::Leaf(leaf) => Either::Right(leaf.path.iter().copied()), - }) -} - fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { let mut data = Vec::with_capacity(nibbles.size_hint().0 / 2); From 009bfe90af6388b0decc6c51f1a358ae5fe6ea79 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 22:07:15 -0500 Subject: [PATCH 14/59] add comment --- firewood/src/merkle/stream.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 4907f871b..c4aa75d0a 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -89,6 +89,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; + // TODO implement this code path. + // Right now we always start iterating from the root. + // We need to populate the initial state of [node_iter_stack]. let mut node_iter_stack: Vec = vec![]; node_iter_stack.push(NodeIterator { key_nibbles: vec![], From b8dddb4d39771258f3de6317edab74895893ba9f Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 22:14:36 -0500 Subject: [PATCH 15/59] comments and cleanup --- firewood/src/merkle/stream.rs | 52 ++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index c4aa75d0a..4e7564797 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -11,8 +11,11 @@ use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; -struct NodeIterator { +struct BranchIterator { + // The nibbles of the key at this node. key_nibbles: Vec, + // Returns the non-empty children of this node + // and their positions in the node's children array. children_iter: Box + Send>, } @@ -20,7 +23,14 @@ enum IteratorState { /// Start iterating at the specified key StartAtKey(Key), /// Continue iterating after the last node in the `visited_node_path` - Iterating { node_iter_stack: Vec }, + Iterating { + // Each element is an iterator over a branch node we've visited + // along our traversal of the key-value pairs in the trie. + // We pop an iterator off the stack and call next on it to + // get the next child node to visit. When an iterator is empty, + // we pop it off the stack and go back up to its parent. + traversal_stack: Vec, + }, } impl IteratorState { @@ -42,7 +52,7 @@ pub struct MerkleKeyValueStream<'a, S, T> { impl<'a, S: ShaleStore + Send + Sync, T> FusedStream for MerkleKeyValueStream<'a, S, T> { fn is_terminated(&self) -> bool { - matches!(&self.key_state, IteratorState::Iterating { node_iter_stack } if node_iter_stack.is_empty()) + matches!(&self.key_state, IteratorState::Iterating { traversal_stack } if traversal_stack.is_empty()) } } @@ -89,21 +99,25 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // TODO implement this code path. + // TODO implement this branch. // Right now we always start iterating from the root. // We need to populate the initial state of [node_iter_stack]. - let mut node_iter_stack: Vec = vec![]; - node_iter_stack.push(NodeIterator { + let mut node_iter_stack: Vec = vec![]; + node_iter_stack.push(BranchIterator { key_nibbles: vec![], children_iter: Box::new(get_children_iter( root_node.inner().as_branch().unwrap(), )), }); - self.key_state = IteratorState::Iterating { node_iter_stack }; + self.key_state = IteratorState::Iterating { + traversal_stack: node_iter_stack, + }; self.poll_next(_cx) + // TODO remove this code from the old implementation + // // let mut path_to_key = vec![]; // let node_at_key = merkle @@ -132,8 +146,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // path_to_key.remove(0); // } - // // TODO remove - // TODO remove // // traverse the trie along each nibble until we find a node with a value // // TODO: merkle.iter_by_key(key) will simplify this entire code-block. // let (found_node, mut visited_node_path) = { @@ -193,7 +205,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // self.poll_next(_cx) } - IteratorState::Iterating { node_iter_stack } => { + IteratorState::Iterating { + traversal_stack: node_iter_stack, + } => { loop { let Some(mut node_iter) = node_iter_stack.pop() else { return Poll::Ready(None); @@ -223,7 +237,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // [pos] is the child's index in [node.children]. let children_iter = get_children_iter(branch); - node_iter_stack.push(NodeIterator { + node_iter_stack.push(BranchIterator { key_nibbles: child_key_nibbles.clone(), // TODO reduce¸cloning children_iter: Box::new(children_iter), }); @@ -232,7 +246,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' if let Some(value) = branch.value.as_ref() { let value = value.to_vec(); return Poll::Ready(Some(Ok(( - key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), + key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), // skip the sentinel node leading 0 value, )))); } @@ -240,27 +254,27 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' NodeType::Leaf(leaf) => { let value = leaf.data.to_vec(); return Poll::Ready(Some(Ok(( - key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), + key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), // skip the sentinel node leading 0 value, )))); } NodeType::Extension(extension) => { - // Follow the extension node to its child. + // Follow the extension node to its child, which is a branch. + // TODO confirm that an extension node's child is always a branch node. let child = merkle .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // TODO confirm that an extension node's child is always a branch node let branch = child.inner().as_branch().unwrap(); - let branch_iter = get_children_iter(branch); + let children_iter = get_children_iter(branch); - // TODO I feel like there's a better way to do this + // TODO reduce cloning let mut child_key = child_key_nibbles; child_key.extend(extension.path.iter()); - node_iter_stack.push(NodeIterator { + node_iter_stack.push(BranchIterator { key_nibbles: child_key, - children_iter: Box::new(branch_iter), + children_iter: Box::new(children_iter), }); } }; From 54fa0432abfb5a30685c00bcfd252bb8f54ebf1b Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 12 Jan 2024 22:16:06 -0500 Subject: [PATCH 16/59] comments and cleanup --- firewood/src/merkle/stream.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 4e7564797..8c24d542a 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -29,7 +29,7 @@ enum IteratorState { // We pop an iterator off the stack and call next on it to // get the next child node to visit. When an iterator is empty, // we pop it off the stack and go back up to its parent. - traversal_stack: Vec, + branch_iter_stack: Vec, }, } @@ -52,7 +52,7 @@ pub struct MerkleKeyValueStream<'a, S, T> { impl<'a, S: ShaleStore + Send + Sync, T> FusedStream for MerkleKeyValueStream<'a, S, T> { fn is_terminated(&self) -> bool { - matches!(&self.key_state, IteratorState::Iterating { traversal_stack } if traversal_stack.is_empty()) + matches!(&self.key_state, IteratorState::Iterating { branch_iter_stack } if branch_iter_stack.is_empty()) } } @@ -101,9 +101,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // TODO implement this branch. // Right now we always start iterating from the root. - // We need to populate the initial state of [node_iter_stack]. - let mut node_iter_stack: Vec = vec![]; - node_iter_stack.push(BranchIterator { + // We need to populate the initial state of [branch_iter_stack]. + let mut branch_iter_stack: Vec = vec![]; + branch_iter_stack.push(BranchIterator { key_nibbles: vec![], children_iter: Box::new(get_children_iter( root_node.inner().as_branch().unwrap(), @@ -111,7 +111,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }); self.key_state = IteratorState::Iterating { - traversal_stack: node_iter_stack, + branch_iter_stack: branch_iter_stack, }; self.poll_next(_cx) @@ -205,17 +205,15 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // self.poll_next(_cx) } - IteratorState::Iterating { - traversal_stack: node_iter_stack, - } => { + IteratorState::Iterating { branch_iter_stack } => { loop { - let Some(mut node_iter) = node_iter_stack.pop() else { + let Some(mut branch_iter) = branch_iter_stack.pop() else { return Poll::Ready(None); }; // [node_addr] is the next node to visit. // It's the child at index [pos] of [node_iter]. - let Some((node_addr, pos)) = node_iter.children_iter.next() else { + let Some((node_addr, pos)) = branch_iter.children_iter.next() else { // We visited all this node's descendants. // Go back to its parent. continue; @@ -225,10 +223,10 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(node_addr) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let mut child_key_nibbles = node_iter.key_nibbles.clone(); // TODO reduce¸cloning + let mut child_key_nibbles = branch_iter.key_nibbles.clone(); // TODO reduce¸cloning child_key_nibbles.push(pos); - node_iter_stack.push(node_iter); + branch_iter_stack.push(branch_iter); match node.inner() { NodeType::Branch(branch) => { @@ -237,7 +235,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // [pos] is the child's index in [node.children]. let children_iter = get_children_iter(branch); - node_iter_stack.push(BranchIterator { + branch_iter_stack.push(BranchIterator { key_nibbles: child_key_nibbles.clone(), // TODO reduce¸cloning children_iter: Box::new(children_iter), }); @@ -272,7 +270,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut child_key = child_key_nibbles; child_key.extend(extension.path.iter()); - node_iter_stack.push(BranchIterator { + branch_iter_stack.push(BranchIterator { key_nibbles: child_key, children_iter: Box::new(children_iter), }); From 8dfd728686be840bea2229b9a205b719f98fb1a2 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Sat, 13 Jan 2024 11:40:36 -0500 Subject: [PATCH 17/59] WIP --- firewood/src/merkle/stream.rs | 204 ++++++++++++++++------------------ 1 file changed, 97 insertions(+), 107 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 8c24d542a..57f695f7e 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -1,13 +1,17 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use super::{node::Node, BranchNode, Merkle, NodeType}; +use super::{ + node::{self, Node}, + BranchNode, Merkle, NodeType, +}; use crate::{ + merkle::NodeObjRef, shale::{DiskAddress, ShaleStore}, v2::api, }; use futures::{stream::FusedStream, Stream}; -use std::task::Poll; +use std::{collections::VecDeque, task::Poll}; type Key = Box<[u8]>; type Value = Vec; @@ -94,116 +98,97 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } = &mut *self; match key_state { - IteratorState::StartAtKey(_) => { + IteratorState::StartAtKey(key) => { let root_node = merkle .get_node(*merkle_root) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // TODO implement this branch. - // Right now we always start iterating from the root. - // We need to populate the initial state of [branch_iter_stack]. let mut branch_iter_stack: Vec = vec![]; - branch_iter_stack.push(BranchIterator { - key_nibbles: vec![], - children_iter: Box::new(get_children_iter( - root_node.inner().as_branch().unwrap(), - )), - }); - - self.key_state = IteratorState::Iterating { - branch_iter_stack: branch_iter_stack, - }; - - self.poll_next(_cx) - - // TODO remove this code from the old implementation - // - // let mut path_to_key = vec![]; - - // let node_at_key = merkle - // .get_node_by_key_with_callbacks( - // root_node, - // &key, - // |node_addr, i| path_to_key.push((node_addr, i)), - // |_, _| {}, - // ) - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let mut path_to_key = path_to_key - // .into_iter() - // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - // .collect::, _>>() - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let remaining_key = key.iter(); - - // let mut node_iter_stack: Vec = vec![]; - - // loop { - // let Some((node, pos)) = path_to_key.first() else { - // break; - // }; - // path_to_key.remove(0); - // } - - // // traverse the trie along each nibble until we find a node with a value - // // TODO: merkle.iter_by_key(key) will simplify this entire code-block. - // let (found_node, mut visited_node_path) = { - // let mut visited_node_path = vec![]; - - // let found_node = merkle - // .get_node_by_key_with_callbacks( - // root_node, - // &key, - // |node_addr, i| visited_node_path.push((node_addr, i)), - // |_, _| {}, - // ) - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let mut visited_node_path = visited_node_path - // .into_iter() - // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - // .collect::, _>>() - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let last_visited_node_not_branch = visited_node_path - // .last() - // .map(|(node, _)| { - // matches!(node.inner(), NodeType::Leaf(_) | NodeType::Extension(_)) - // }) - // .unwrap_or_default(); - - // // we only want branch in the visited node-path to start - // if last_visited_node_not_branch { - // visited_node_path.pop(); - // } - - // (found_node, visited_node_path) - // }; - - // if let Some(found_node) = found_node { - // let value = match found_node.inner() { - // NodeType::Branch(branch) => branch.value.as_ref(), - // NodeType::Leaf(leaf) => Some(&leaf.data), - // NodeType::Extension(_) => None, - // }; - - // let next_result = value.map(|value| { - // let value = value.to_vec(); - - // Ok((std::mem::take(key), value)) - // }); - - // visited_node_path.push((found_node, 0)); - - // self.key_state = IteratorState::Iterating { visited_node_path }; - - // return Poll::Ready(next_result); - // } - - // self.key_state = IteratorState::Iterating { visited_node_path }; - - // self.poll_next(_cx) + + // TODO remove + // branch_iter_stack.push(BranchIterator { + // key_nibbles: vec![], + // children_iter: Box::new(get_children_iter( + // root_node.inner().as_branch().unwrap(), + // )), + // }); + + // (disk address, index) for each node we visit along the path to the node + // with [key], where [index] is the next nibble in the key. + let mut path_to_key = vec![]; + + let node_at_key = merkle + .get_node_by_key_with_callbacks( + root_node, + &key, + |node_addr, i| path_to_key.push((node_addr, i)), + |_, _| {}, + ) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key + .into_iter() + .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + .collect::, _>>() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut key_nibbles_so_far: Vec = vec![]; + + loop { + let Some((node, pos)) = path_to_key.pop_front() else { + break; + }; + + match node.inner() { + NodeType::Branch(branch) => { + if path_to_key.len() == 0 { + // TODO is this right? + // This is the last node so there are 2 possibilities: + // 1. This node's child is at the [key] + // 2. There is no node with [key]. + + if node_at_key.is_none() { + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } + } + + key_nibbles_so_far.push(pos); + } + NodeType::Leaf(_) => (), + NodeType::Extension(extension) => { + // Add the extension node's path to the key nibbles. + key_nibbles_so_far.extend(extension.path.iter()); + + if path_to_key.len() == 0 { + // This is the last node in the path to the key. + if node_at_key.is_some() { + // Get the branch node child + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let children_iter = + get_children_iter(child.inner().as_branch().unwrap()); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } + } + } + } + } + + self.key_state = IteratorState::Iterating { branch_iter_stack }; + + return self.poll_next(_cx); } IteratorState::Iterating { branch_iter_stack } => { loop { @@ -444,9 +429,14 @@ mod tests { let expected_key = vec![i as u8, j as u8]; let expected_value = vec![0x00]; + println!("i: {}, j: {}", i, j); + assert_eq!( stream.next().await.unwrap().unwrap(), (expected_key.into_boxed_slice(), expected_value), + "i: {}, j: {}", + i, + j, ); } } From 082a797e0a923cfa22191f576eb70095aa1aeafc Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Sat, 13 Jan 2024 22:14:49 -0500 Subject: [PATCH 18/59] comment WIP code --- firewood/src/merkle/stream.rs | 172 +++++++++++++++++----------------- 1 file changed, 87 insertions(+), 85 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 57f695f7e..11526b8b8 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -5,6 +5,7 @@ use super::{ node::{self, Node}, BranchNode, Merkle, NodeType, }; +use crate::nibbles::Nibbles; use crate::{ merkle::NodeObjRef, shale::{DiskAddress, ShaleStore}, @@ -105,86 +106,91 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; - // TODO remove - // branch_iter_stack.push(BranchIterator { - // key_nibbles: vec![], - // children_iter: Box::new(get_children_iter( - // root_node.inner().as_branch().unwrap(), - // )), - // }); - - // (disk address, index) for each node we visit along the path to the node - // with [key], where [index] is the next nibble in the key. - let mut path_to_key = vec![]; - - let node_at_key = merkle - .get_node_by_key_with_callbacks( - root_node, - &key, - |node_addr, i| path_to_key.push((node_addr, i)), - |_, _| {}, - ) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key - .into_iter() - .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - .collect::, _>>() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let mut key_nibbles_so_far: Vec = vec![]; - - loop { - let Some((node, pos)) = path_to_key.pop_front() else { - break; - }; - - match node.inner() { - NodeType::Branch(branch) => { - if path_to_key.len() == 0 { - // TODO is this right? - // This is the last node so there are 2 possibilities: - // 1. This node's child is at the [key] - // 2. There is no node with [key]. - - if node_at_key.is_none() { - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos > &pos); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); - } - } - - key_nibbles_so_far.push(pos); - } - NodeType::Leaf(_) => (), - NodeType::Extension(extension) => { - // Add the extension node's path to the key nibbles. - key_nibbles_so_far.extend(extension.path.iter()); - - if path_to_key.len() == 0 { - // This is the last node in the path to the key. - if node_at_key.is_some() { - // Get the branch node child - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let children_iter = - get_children_iter(child.inner().as_branch().unwrap()); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); - } - } - } - } - } + branch_iter_stack.push(BranchIterator { + key_nibbles: vec![], + children_iter: Box::new(get_children_iter( + root_node.inner().as_branch().unwrap(), + )), + }); + + // let key_nibbles_len = Nibbles::<1>::new(key.as_ref()).len(); + // // TODO remove + + // // (disk address, index) for each node we visit along the path to the node + // // with [key], where [index] is the next nibble in the key. + // let mut path_to_key = vec![]; + + // let node_at_key = merkle + // .get_node_by_key_with_callbacks( + // root_node, + // &key, + // |node_addr, i| path_to_key.push((node_addr, i)), + // |_, _| {}, + // ) + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key + // .into_iter() + // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + // .collect::, _>>() + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // let mut key_nibbles_so_far: Vec = vec![]; + + // loop { + // let Some((node, pos)) = path_to_key.pop_front() else { + // break; + // }; + + // match node.inner() { + // NodeType::Branch(branch) => { + // if path_to_key.len() == 0 { + // // TODO is this right? + // // This is the last node so there are 2 possibilities: + // // 1. This node's child is at the [key] + // // 2. There is no node with [key]. + + // if key_nibbles_so_far.len() == key_nibbles_len { + // // This node is the node with [key]. + // } + + // // Start from [pos] + 1 since [key] isn't in tree. + // let children_iter = get_children_iter(branch) + // .filter(move |(_, child_pos)| child_pos > &next_child_index); + + // branch_iter_stack.push(BranchIterator { + // key_nibbles: key_nibbles_so_far.clone(), + // children_iter: Box::new(children_iter), + // }); + // } + + // key_nibbles_so_far.push(pos); + // } + // NodeType::Leaf(_) => (), + // NodeType::Extension(extension) => { + // // Add the extension node's path to the key nibbles. + // key_nibbles_so_far.extend(extension.path.iter()); + + // if path_to_key.len() == 0 { + // // This is the last node in the path to the key. + // if node_at_key.is_some() { + // // Get the branch node child + // let child = merkle + // .get_node(extension.chd()) + // .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // let children_iter = + // get_children_iter(child.inner().as_branch().unwrap()); + + // branch_iter_stack.push(BranchIterator { + // key_nibbles: key_nibbles_so_far.clone(), + // children_iter: Box::new(children_iter), + // }); + // } + // } + // } + // } + // } self.key_state = IteratorState::Iterating { branch_iter_stack }; @@ -416,9 +422,7 @@ mod tests { let key = vec![i as u8, j as u8]; let value = vec![0x00]; - merkle - .insert(key.clone(), value.clone(), root.clone()) - .unwrap(); + merkle.insert(key, value, root).unwrap(); } } @@ -429,8 +433,6 @@ mod tests { let expected_key = vec![i as u8, j as u8]; let expected_value = vec![0x00]; - println!("i: {}, j: {}", i, j); - assert_eq!( stream.next().await.unwrap().unwrap(), (expected_key.into_boxed_slice(), expected_value), From fe2de9b5135b0cff6f7f417cd517fdb8f8cc64d9 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Sun, 14 Jan 2024 07:35:37 -0500 Subject: [PATCH 19/59] uncomment WIP code --- firewood/src/merkle/stream.rs | 178 ++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 85 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 11526b8b8..5d8296b33 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -106,91 +106,99 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; - branch_iter_stack.push(BranchIterator { - key_nibbles: vec![], - children_iter: Box::new(get_children_iter( - root_node.inner().as_branch().unwrap(), - )), - }); - - // let key_nibbles_len = Nibbles::<1>::new(key.as_ref()).len(); - // // TODO remove - - // // (disk address, index) for each node we visit along the path to the node - // // with [key], where [index] is the next nibble in the key. - // let mut path_to_key = vec![]; - - // let node_at_key = merkle - // .get_node_by_key_with_callbacks( - // root_node, - // &key, - // |node_addr, i| path_to_key.push((node_addr, i)), - // |_, _| {}, - // ) - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key - // .into_iter() - // .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - // .collect::, _>>() - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let mut key_nibbles_so_far: Vec = vec![]; - - // loop { - // let Some((node, pos)) = path_to_key.pop_front() else { - // break; - // }; - - // match node.inner() { - // NodeType::Branch(branch) => { - // if path_to_key.len() == 0 { - // // TODO is this right? - // // This is the last node so there are 2 possibilities: - // // 1. This node's child is at the [key] - // // 2. There is no node with [key]. - - // if key_nibbles_so_far.len() == key_nibbles_len { - // // This node is the node with [key]. - // } - - // // Start from [pos] + 1 since [key] isn't in tree. - // let children_iter = get_children_iter(branch) - // .filter(move |(_, child_pos)| child_pos > &next_child_index); - - // branch_iter_stack.push(BranchIterator { - // key_nibbles: key_nibbles_so_far.clone(), - // children_iter: Box::new(children_iter), - // }); - // } - - // key_nibbles_so_far.push(pos); - // } - // NodeType::Leaf(_) => (), - // NodeType::Extension(extension) => { - // // Add the extension node's path to the key nibbles. - // key_nibbles_so_far.extend(extension.path.iter()); - - // if path_to_key.len() == 0 { - // // This is the last node in the path to the key. - // if node_at_key.is_some() { - // // Get the branch node child - // let child = merkle - // .get_node(extension.chd()) - // .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // let children_iter = - // get_children_iter(child.inner().as_branch().unwrap()); - - // branch_iter_stack.push(BranchIterator { - // key_nibbles: key_nibbles_so_far.clone(), - // children_iter: Box::new(children_iter), - // }); - // } - // } - // } - // } - // } + // branch_iter_stack.push(BranchIterator { + // key_nibbles: vec![], + // children_iter: Box::new(get_children_iter( + // root_node.inner().as_branch().unwrap(), + // )), + // }); + + // (disk address, index) for each node we visit along the path to the node + // with [key], where [index] is the next nibble in the key. + let mut path_to_key = vec![]; + + let node_at_key = merkle + .get_node_by_key_with_callbacks( + root_node, + &key, + |node_addr, i| path_to_key.push((node_addr, i)), + |_, _| {}, + ) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key + .into_iter() + .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) + .collect::, _>>() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut key_nibbles_so_far: Vec = vec![]; + + loop { + let Some((node, pos)) = path_to_key.pop_front() else { + break; + }; + + match node.inner() { + NodeType::Branch(branch) => { + if path_to_key.len() != 0 { + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } else { + // TODO: + // This is the last node in the path to the key. + // That means that either this node's child is at the [key] + // or the [key] is not in the trie. + // We need to figure out whether the first iteration of [children_iter] + // should be the child at [pos] or the child at [pos + 1]. + + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos >= &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } + + key_nibbles_so_far.push(pos); + } + NodeType::Leaf(_) => (), + NodeType::Extension(extension) => { + // Add the extension node's path to the key nibbles. + key_nibbles_so_far.extend(extension.path.iter()); + + if path_to_key.len() == 0 { + // TODO: + // This is the last node in the path to the key. + // That means that either this node's child is at the [key] + // or the [key] is not in the trie. + // We need to figure out whether the first iteration of [children_iter] + // should be the child at [pos] or the child at [pos + 1]. + + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let children_iter = + get_children_iter(child.inner().as_branch().unwrap()); + + let children_iter = + children_iter.filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } + } + } + } self.key_state = IteratorState::Iterating { branch_iter_stack }; From e3e5adc91d353cc196f459008043ca181a1f84ef Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Mon, 29 Jan 2024 12:47:20 -0500 Subject: [PATCH 20/59] WIP fixing initialization code --- firewood/src/merkle.rs | 3 ++- firewood/src/merkle/stream.rs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index 7c9414ab7..00da6404e 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -1181,7 +1181,8 @@ impl + Send + Sync, T> Merkle { node_ref = self.get_node(next_ptr)?; } - // when we're done iterating over nibbles, check if the node we're at has a value + // We're done iterating over nibbles. + // Check if the node we're at has a value. let node_ref = match &node_ref.inner { NodeType::Branch(n) if n.value.as_ref().is_some() => Some(node_ref), NodeType::Leaf(n) if n.path.len() == 0 => Some(node_ref), diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 6154d710a..7cde35ba2 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -153,6 +153,23 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // We need to figure out whether the first iteration of [children_iter] // should be the child at [pos] or the child at [pos + 1]. + // Get the child at [pos], if any. + let child = branch.children[pos as usize] + .map(|child_addr| merkle.get_node(child_addr)) + .transpose() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // If the child doesn't exist, the key isn't in the trie. + if child.is_none() { + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + break; + } let children_iter = get_children_iter(branch) .filter(move |(_, child_pos)| child_pos >= &pos); From c1fd9062e84a535472c01de1239e686c1fbbc218 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Mon, 29 Jan 2024 12:58:09 -0500 Subject: [PATCH 21/59] WIP fixing initialization code --- firewood/src/merkle/stream.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 7cde35ba2..2de2d18c3 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -168,15 +168,15 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' key_nibbles: key_nibbles_so_far.clone(), children_iter: Box::new(children_iter), }); - break; - } - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos >= &pos); + } else { + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos >= &pos); - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + } } key_nibbles_so_far.push(pos); From 66a715e226a5946bc66aa70009dd2c7296237271 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Mon, 29 Jan 2024 16:41:42 -0500 Subject: [PATCH 22/59] 18 of 20 UT pass --- firewood/src/merkle/stream.rs | 130 +++++++++++++++++++++++++++++----- 1 file changed, 111 insertions(+), 19 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 2de2d18c3..7a78d8bba 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -4,6 +4,7 @@ use super::{node::Node, BranchNode, Merkle, NodeType}; use crate::{ merkle::NodeObjRef, + nibbles::Nibbles, shale::{DiskAddress, ShaleStore}, v2::api, }; @@ -183,9 +184,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } NodeType::Leaf(_) => (), NodeType::Extension(extension) => { - // Add the extension node's path to the key nibbles. - key_nibbles_so_far.extend(extension.path.iter()); - if path_to_key.len() == 0 { // TODO: // This is the last node in the path to the key. @@ -194,20 +192,68 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // We need to figure out whether the first iteration of [children_iter] // should be the child at [pos] or the child at [pos + 1]. - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let children_iter = - get_children_iter(child.inner().as_branch().unwrap()); - - let children_iter = - children_iter.filter(move |(_, child_pos)| child_pos > &pos); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); + // Figure out if this extension node's child is befpre, at or after [key]. + let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); + + let mut remaining_key = key_nibbles.skip(key_nibbles_so_far.len()); + println!("{:?}", key.iter().skip(key_nibbles_so_far.len()).len()); + println!("key len {:?}", key.len()); + println!("key {:?}", key); + + let mut extension_iter = extension.path.iter(); + + while let Some(next_extension_nibble) = extension_iter.next() { + let next_key_nibble = remaining_key.next(); + + if next_key_nibble.is_none() { + // We ran out of nibbles of [key] so + // the extension node is after [key]. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut key_nibbles = key_nibbles_so_far.clone(); + key_nibbles.push(*next_extension_nibble); + key_nibbles.extend(extension_iter); + let prev_index = key_nibbles.pop().unwrap(); + + let iter = std::iter::once((extension.chd(), prev_index)); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles, + children_iter: Box::new(iter), + }); + break; + } + + let next_key_nibble = next_key_nibble.unwrap(); + + if next_extension_nibble < &next_key_nibble { + // The extension node's child is before [key]. + // Don't visit it. + break; + } else if next_extension_nibble > &next_key_nibble { + // The extension node's child is after [key]. + // Visit it. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let branch = child.inner().as_branch().unwrap(); + + let children_iter = get_children_iter(branch); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + break; + } + key_nibbles_so_far.push(next_key_nibble); + } + } else { + // Add the extension node's path to the key nibbles. + key_nibbles_so_far.extend(extension.path.iter()); } } } @@ -262,6 +308,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } } NodeType::Leaf(leaf) => { + child_key_nibbles.extend(leaf.path.iter()); let value = leaf.data.to_vec(); return Poll::Ready(Some(Ok(( key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), // skip the sentinel node leading 0 @@ -378,6 +425,8 @@ use super::tests::create_test_merkle; #[cfg(test)] #[allow(clippy::indexing_slicing, clippy::unwrap_used)] mod tests { + use std::vec; + use crate::nibbles::Nibbles; use super::*; @@ -441,7 +490,7 @@ mod tests { for i in (0..256).rev() { for j in (0..256).rev() { let key = vec![i as u8, j as u8]; - let value = vec![0x00]; + let value = vec![i as u8, j as u8]; merkle.insert(key, value, root).unwrap(); } @@ -452,7 +501,7 @@ mod tests { for i in 0..256 { for j in 0..256 { let expected_key = vec![i as u8, j as u8]; - let expected_value = vec![0x00]; + let expected_value = vec![i as u8, j as u8]; assert_eq!( stream.next().await.unwrap().unwrap(), @@ -467,6 +516,49 @@ mod tests { check_stream_is_done(stream).await; } + #[tokio::test] + async fn with_start_key() { + let mut merkle = create_test_merkle(); + let root = merkle.init_root().unwrap(); + + for i in (0..256).rev() { + for j in (0..256).rev() { + let key = vec![i as u8, j as u8]; + let value = vec![i as u8, j as u8]; + + merkle.insert(key, value, root).unwrap(); + } + } + + for i in 0..256 { + let mut stream = merkle.iter_from(root, vec![i as u8].into_boxed_slice()); + for j in 0..256 { + let expected_key = vec![i as u8, j as u8]; + let expected_value = vec![i as u8, j as u8]; + assert_eq!( + stream.next().await.unwrap().unwrap(), + (expected_key.into_boxed_slice(), expected_value), + "i: {}, j: {}", + i, + j, + ); + } + if i == 255 { + check_stream_is_done(stream).await; + } else { + assert_eq!( + stream.next().await.unwrap().unwrap(), + ( + vec![i as u8 + 1, 0].into_boxed_slice(), + vec![i as u8 + 1, 0] + ), + "i: {}", + i, + ); + } + } + } + #[tokio::test] async fn fused_full() { let mut merkle = create_test_merkle(); From 9b82d907d1286da13f1668c37a6a782999fc9e52 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Mon, 29 Jan 2024 17:43:35 -0500 Subject: [PATCH 23/59] readability nit --- firewood/src/merkle/stream.rs | 129 ++++++++++++++++------------------ 1 file changed, 61 insertions(+), 68 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 7a78d8bba..4bd7d112b 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -184,76 +184,69 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } NodeType::Leaf(_) => (), NodeType::Extension(extension) => { - if path_to_key.len() == 0 { - // TODO: - // This is the last node in the path to the key. - // That means that either this node's child is at the [key] - // or the [key] is not in the trie. - // We need to figure out whether the first iteration of [children_iter] - // should be the child at [pos] or the child at [pos + 1]. - - // Figure out if this extension node's child is befpre, at or after [key]. - let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); - - let mut remaining_key = key_nibbles.skip(key_nibbles_so_far.len()); - println!("{:?}", key.iter().skip(key_nibbles_so_far.len()).len()); - println!("key len {:?}", key.len()); - println!("key {:?}", key); - - let mut extension_iter = extension.path.iter(); - - while let Some(next_extension_nibble) = extension_iter.next() { - let next_key_nibble = remaining_key.next(); - - if next_key_nibble.is_none() { - // We ran out of nibbles of [key] so - // the extension node is after [key]. - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let mut key_nibbles = key_nibbles_so_far.clone(); - key_nibbles.push(*next_extension_nibble); - key_nibbles.extend(extension_iter); - let prev_index = key_nibbles.pop().unwrap(); - - let iter = std::iter::once((extension.chd(), prev_index)); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles, - children_iter: Box::new(iter), - }); - break; - } - - let next_key_nibble = next_key_nibble.unwrap(); - - if next_extension_nibble < &next_key_nibble { - // The extension node's child is before [key]. - // Don't visit it. - break; - } else if next_extension_nibble > &next_key_nibble { - // The extension node's child is after [key]. - // Visit it. - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let branch = child.inner().as_branch().unwrap(); - - let children_iter = get_children_iter(branch); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); - break; - } - key_nibbles_so_far.push(next_key_nibble); - } - } else { + if path_to_key.len() != 0 { // Add the extension node's path to the key nibbles. key_nibbles_so_far.extend(extension.path.iter()); + continue; + } + // TODO: + // This is the last node in the path to the key. + // That means that either this node's child is at the [key] + // or the [key] is not in the trie. + // We need to figure out whether the first iteration of [children_iter] + // should be the child at [pos] or the child at [pos + 1]. + + // Figure out if this extension node's child is befpre, at or after [key]. + let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); + + let mut remaining_key = key_nibbles.skip(key_nibbles_so_far.len()); + + let mut extension_iter = extension.path.iter(); + + while let Some(next_extension_nibble) = extension_iter.next() { + let next_key_nibble = remaining_key.next(); + + if next_key_nibble.is_none() { + // We ran out of nibbles of [key] so + // the extension node is after [key]. + let mut key_nibbles = key_nibbles_so_far.clone(); + key_nibbles.push(*next_extension_nibble); + key_nibbles.extend(extension_iter); + let prev_index = key_nibbles.pop().unwrap(); + + let iter = std::iter::once((extension.chd(), prev_index)); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles, + children_iter: Box::new(iter), + }); + break; + } + + let next_key_nibble = next_key_nibble.unwrap(); + + if next_extension_nibble < &next_key_nibble { + // The extension node's child is before [key]. + // Don't visit it. + break; + } else if next_extension_nibble > &next_key_nibble { + // The extension node's child is after [key]. + // Visit it. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let branch = child.inner().as_branch().unwrap(); + + let children_iter = get_children_iter(branch); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(children_iter), + }); + break; + } + key_nibbles_so_far.push(next_key_nibble); } } } From 902afe0a6e24f0e6d314d8fcd00d4a6863852832 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Mon, 29 Jan 2024 17:43:57 -0500 Subject: [PATCH 24/59] nit remove useless assignment --- firewood/src/merkle/stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 4bd7d112b..f03bb6246 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -114,7 +114,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // with [key], where [index] is the next nibble in the key. let mut path_to_key = vec![]; - let node_at_key = merkle + merkle .get_node_by_key_with_callbacks( root_node, &key, From 580762ac29d9ce6a5fd03e9d09841bd12e03457a Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 10:31:37 -0500 Subject: [PATCH 25/59] comments and nits --- firewood/src/merkle/stream.rs | 42 +++++++++++++++-------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index f03bb6246..ef206ab93 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -103,17 +103,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; - // branch_iter_stack.push(BranchIterator { - // key_nibbles: vec![], - // children_iter: Box::new(get_children_iter( - // root_node.inner().as_branch().unwrap(), - // )), - // }); - // (disk address, index) for each node we visit along the path to the node // with [key], where [index] is the next nibble in the key. let mut path_to_key = vec![]; + // Populate [path_to_key]. merkle .get_node_by_key_with_callbacks( root_node, @@ -123,12 +117,14 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' ) .map_err(|e| api::Error::InternalError(Box::new(e)))?; + // Convert (address, index) paiors to (node, index) pairs. let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key .into_iter() .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) .collect::, _>>() .map_err(|e| api::Error::InternalError(Box::new(e)))?; + // Tracks how much of the key has been traversed so far. let mut key_nibbles_so_far: Vec = vec![]; loop { @@ -147,11 +143,10 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' children_iter: Box::new(children_iter), }); } else { - // TODO: // This is the last node in the path to the key. - // That means that either this node's child is at the [key] - // or the [key] is not in the trie. - // We need to figure out whether the first iteration of [children_iter] + // That means that either this node's child is at [key] + // or [key] isn't in the trie. + // Figure out whether the first iteration of [children_iter] // should be the child at [pos] or the child at [pos + 1]. // Get the child at [pos], if any. @@ -189,31 +184,34 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' key_nibbles_so_far.extend(extension.path.iter()); continue; } - // TODO: // This is the last node in the path to the key. - // That means that either this node's child is at the [key] - // or the [key] is not in the trie. - // We need to figure out whether the first iteration of [children_iter] + // That means that either this node's child is at [key] + // or [key] is not in the trie. + // Figure out whether the first iteration of [children_iter] // should be the child at [pos] or the child at [pos + 1]. - // Figure out if this extension node's child is befpre, at or after [key]. + // Figure out if [extension]'s child is before, at or after [key]. let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); + // Unmatched portion of [key]. let mut remaining_key = key_nibbles.skip(key_nibbles_so_far.len()); let mut extension_iter = extension.path.iter(); while let Some(next_extension_nibble) = extension_iter.next() { + // Check whether the next nibble of [extension]'s path + // matches the next nibble of [key]. let next_key_nibble = remaining_key.next(); if next_key_nibble.is_none() { // We ran out of nibbles of [key] so // the extension node is after [key]. + // We want to iterate over the extension node's child. let mut key_nibbles = key_nibbles_so_far.clone(); key_nibbles.push(*next_extension_nibble); key_nibbles.extend(extension_iter); - let prev_index = key_nibbles.pop().unwrap(); + let prev_index = key_nibbles.pop().unwrap(); let iter = std::iter::once((extension.chd(), prev_index)); branch_iter_stack.push(BranchIterator { @@ -226,23 +224,19 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let next_key_nibble = next_key_nibble.unwrap(); if next_extension_nibble < &next_key_nibble { - // The extension node's child is before [key]. - // Don't visit it. + // [extension]'s child is before [key]. Visit it. break; } else if next_extension_nibble > &next_key_nibble { - // The extension node's child is after [key]. - // Visit it. + // [extension]'s child is after [key]. Visit it. let child = merkle .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; let branch = child.inner().as_branch().unwrap(); - let children_iter = get_children_iter(branch); - branch_iter_stack.push(BranchIterator { key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), + children_iter: Box::new(get_children_iter(branch)), }); break; } From bdfd326520bb206d73aa789152476993e9361989 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 11:23:56 -0500 Subject: [PATCH 26/59] all UT pass --- firewood/src/merkle/stream.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ef206ab93..37b0bff41 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -103,6 +103,22 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; + // if key.len() == 0 { + // let root_branch = root_node.inner().as_branch().unwrap(); + // let child: DiskAddress = root_branch.children[0].unwrap(); + + // let children_iter = std::iter::once((child, 0)); + + // branch_iter_stack.push(BranchIterator { + // key_nibbles: vec![], + // children_iter: Box::new(children_iter), + // }); + + // self.key_state = IteratorState::Iterating { branch_iter_stack }; + + // return self.poll_next(_cx); + // } + // (disk address, index) for each node we visit along the path to the node // with [key], where [index] is the next nibble in the key. let mut path_to_key = vec![]; @@ -317,9 +333,18 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' child_key.extend(extension.path.iter()); branch_iter_stack.push(BranchIterator { - key_nibbles: child_key, + key_nibbles: child_key.clone(), children_iter: Box::new(children_iter), }); + + // If there's a value, return it. + if let Some(value) = branch.value.as_ref() { + let value = value.to_vec(); + return Poll::Ready(Some(Ok(( + key_from_nibble_iter(child_key.into_iter().skip(1)), // skip the sentinel node leading 0 + value, + )))); + } } }; } @@ -349,6 +374,7 @@ fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { mod helper_types { use std::ops::Not; + /// TODO how can we use enums instead of Box? /// Enums enable stack-based dynamic-dispatch as opposed to heap-based `Box`. /// This helps us with match arms that return different types that implement the same trait. /// It's possible that [rust-lang/rust#63065](https://github.com/rust-lang/rust/issues/63065) will make this unnecessary. From a8a5cf8ee3450afe43a750f06e530219837d983a Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 11:41:09 -0500 Subject: [PATCH 27/59] appease linter --- firewood/src/merkle/stream.rs | 42 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 37b0bff41..8a023c9f4 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -150,7 +150,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' match node.inner() { NodeType::Branch(branch) => { - if path_to_key.len() != 0 { + if !path_to_key.is_empty() { let children_iter = get_children_iter(branch) .filter(move |(_, child_pos)| child_pos > &pos); @@ -195,7 +195,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } NodeType::Leaf(_) => (), NodeType::Extension(extension) => { - if path_to_key.len() != 0 { + if !path_to_key.is_empty() { // Add the extension node's path to the key nibbles. key_nibbles_so_far.extend(extension.path.iter()); continue; @@ -231,7 +231,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let iter = std::iter::once((extension.chd(), prev_index)); branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles, + key_nibbles, children_iter: Box::new(iter), }); break; @@ -239,23 +239,25 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let next_key_nibble = next_key_nibble.unwrap(); - if next_extension_nibble < &next_key_nibble { - // [extension]'s child is before [key]. Visit it. - break; - } else if next_extension_nibble > &next_key_nibble { - // [extension]'s child is after [key]. Visit it. - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let branch = child.inner().as_branch().unwrap(); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(get_children_iter(branch)), - }); - break; + match next_extension_nibble.cmp(&next_key_nibble) { + std::cmp::Ordering::Less => break, // [extension]'s child is before [key]. Skip it. + std::cmp::Ordering::Equal => (), + std::cmp::Ordering::Greater => { + // [extension]'s child is after [key]. Visit it. + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let branch = child.inner().as_branch().unwrap(); + + branch_iter_stack.push(BranchIterator { + key_nibbles: key_nibbles_so_far.clone(), + children_iter: Box::new(get_children_iter(branch)), + }); + break; + } } + key_nibbles_so_far.push(next_key_nibble); } } @@ -264,7 +266,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' self.key_state = IteratorState::Iterating { branch_iter_stack }; - return self.poll_next(_cx); + self.poll_next(_cx) } IteratorState::Iterating { branch_iter_stack } => { loop { From 4d06a4989284fc021e98b61772d161740cea5acc Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 12:16:35 -0500 Subject: [PATCH 28/59] check for index OOB (which should never happen) --- firewood/src/merkle/stream.rs | 27 ++++++++++----------------- firewood/src/v2/api.rs | 3 +++ 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 8a023c9f4..befb40186 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -11,6 +11,7 @@ use crate::{ use futures::{stream::FusedStream, Stream}; use std::{collections::VecDeque, task::Poll}; type Key = Box<[u8]>; +use crate::v2::api::Error::ChildNotFound; type Value = Vec; struct BranchIterator { @@ -103,22 +104,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; - // if key.len() == 0 { - // let root_branch = root_node.inner().as_branch().unwrap(); - // let child: DiskAddress = root_branch.children[0].unwrap(); - - // let children_iter = std::iter::once((child, 0)); - - // branch_iter_stack.push(BranchIterator { - // key_nibbles: vec![], - // children_iter: Box::new(children_iter), - // }); - - // self.key_state = IteratorState::Iterating { branch_iter_stack }; - - // return self.poll_next(_cx); - // } - // (disk address, index) for each node we visit along the path to the node // with [key], where [index] is the next nibble in the key. let mut path_to_key = vec![]; @@ -166,7 +151,15 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // should be the child at [pos] or the child at [pos + 1]. // Get the child at [pos], if any. - let child = branch.children[pos as usize] + let Some(child) = branch.children.get(pos as usize) else { + // This should never happen -- [pos] should never be OOB. + return Poll::Ready(Some(Err(api::Error::InternalError( + Box::new(ChildNotFound), + )))); + }; + + // Convert child from address --> node. + let child = child .map(|child_addr| merkle.get_node(child_addr)) .transpose() .map_err(|e| api::Error::InternalError(Box::new(e)))?; diff --git a/firewood/src/v2/api.rs b/firewood/src/v2/api.rs index 866ccadd6..22b146f65 100644 --- a/firewood/src/v2/api.rs +++ b/firewood/src/v2/api.rs @@ -82,6 +82,9 @@ pub enum Error { #[error("Range too small")] RangeTooSmall, + + #[error("Child not found")] + ChildNotFound, } /// A range proof, consisting of a proof of the first key and the last key, From 3a282085049031a929bd06b3644226f6f802f90e Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 13:26:36 -0500 Subject: [PATCH 29/59] clean up branch node logic in setup --- firewood/src/merkle/stream.rs | 53 +++++++++++++++-------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index befb40186..8b05b2cff 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -126,7 +126,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // Tracks how much of the key has been traversed so far. - let mut key_nibbles_so_far: Vec = vec![]; + let mut matched_key_nibbles: Vec = vec![]; loop { let Some((node, pos)) = path_to_key.pop_front() else { @@ -140,17 +140,13 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .filter(move |(_, child_pos)| child_pos > &pos); branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), + key_nibbles: matched_key_nibbles.clone(), children_iter: Box::new(children_iter), }); } else { - // This is the last node in the path to the key. - // That means that either this node's child is at [key] - // or [key] isn't in the trie. - // Figure out whether the first iteration of [children_iter] - // should be the child at [pos] or the child at [pos + 1]. + // Figure out whether we want to start iterating at [pos] or [pos + 1]. - // Get the child at [pos], if any. + // Get the address of the child at [pos], if any. let Some(child) = branch.children.get(pos as usize) else { // This should never happen -- [pos] should never be OOB. return Poll::Ready(Some(Err(api::Error::InternalError( @@ -164,33 +160,30 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .transpose() .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // If the child doesn't exist, the key isn't in the trie. - if child.is_none() { - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos > &pos); - - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); + let comparer = if child.is_none() { + // The child doesn't exist; we don't need to iterator over this index. + |a: &u8, b: &u8| a > b } else { - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos >= &pos); + // The child does exist; the first key to iterate over must be at/under [pos]. + |a: &u8, b: &u8| a >= b + }; - branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), - children_iter: Box::new(children_iter), - }); - } + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| comparer(child_pos, &pos)); + + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(children_iter), + }); } - key_nibbles_so_far.push(pos); + matched_key_nibbles.push(pos); } NodeType::Leaf(_) => (), NodeType::Extension(extension) => { if !path_to_key.is_empty() { // Add the extension node's path to the key nibbles. - key_nibbles_so_far.extend(extension.path.iter()); + matched_key_nibbles.extend(extension.path.iter()); continue; } // This is the last node in the path to the key. @@ -203,7 +196,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); // Unmatched portion of [key]. - let mut remaining_key = key_nibbles.skip(key_nibbles_so_far.len()); + let mut remaining_key = key_nibbles.skip(matched_key_nibbles.len()); let mut extension_iter = extension.path.iter(); @@ -216,7 +209,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // We ran out of nibbles of [key] so // the extension node is after [key]. // We want to iterate over the extension node's child. - let mut key_nibbles = key_nibbles_so_far.clone(); + let mut key_nibbles = matched_key_nibbles.clone(); key_nibbles.push(*next_extension_nibble); key_nibbles.extend(extension_iter); @@ -244,14 +237,14 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let branch = child.inner().as_branch().unwrap(); branch_iter_stack.push(BranchIterator { - key_nibbles: key_nibbles_so_far.clone(), + key_nibbles: matched_key_nibbles.clone(), children_iter: Box::new(get_children_iter(branch)), }); break; } } - key_nibbles_so_far.push(next_key_nibble); + matched_key_nibbles.push(next_key_nibble); } } } From 0fe8baa615ad584b6f56f052d85896170e3bcb5d Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 13:32:13 -0500 Subject: [PATCH 30/59] comment nit --- firewood/src/merkle/stream.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 8b05b2cff..761b08cdb 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -186,13 +186,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' matched_key_nibbles.extend(extension.path.iter()); continue; } - // This is the last node in the path to the key. - // That means that either this node's child is at [key] - // or [key] is not in the trie. - // Figure out whether the first iteration of [children_iter] - // should be the child at [pos] or the child at [pos + 1]. - // Figure out if [extension]'s child is before, at or after [key]. + // Figure out whether we want to start iterating at [pos] or [pos + 1]. + // See if [extension]'s child is before, at or after [key]. let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); // Unmatched portion of [key]. @@ -206,9 +202,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let next_key_nibble = remaining_key.next(); if next_key_nibble.is_none() { - // We ran out of nibbles of [key] so - // the extension node is after [key]. - // We want to iterate over the extension node's child. + // We ran out of nibbles of [key] so [extension]'s child is after [key]. let mut key_nibbles = matched_key_nibbles.clone(); key_nibbles.push(*next_extension_nibble); key_nibbles.extend(extension_iter); From 56019317f3cbd94d1a6aefc22cc84445dbee8b0f Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 13:47:21 -0500 Subject: [PATCH 31/59] cleanup --- firewood/src/merkle/stream.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 761b08cdb..57ea4b4a9 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -118,7 +118,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' ) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // Convert (address, index) paiors to (node, index) pairs. + // Convert (address, index) pairs to (node, index) pairs. let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key .into_iter() .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) @@ -135,7 +135,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' match node.inner() { NodeType::Branch(branch) => { + // Figure out whether we want to start iterating over this + // node's children at [pos] or [pos + 1]. if !path_to_key.is_empty() { + // The next element in [path_to_key] will handle the child at [pos] + // so we can start iterating at [pos + 1]. let children_iter = get_children_iter(branch) .filter(move |(_, child_pos)| child_pos > &pos); @@ -144,9 +148,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' children_iter: Box::new(children_iter), }); } else { - // Figure out whether we want to start iterating at [pos] or [pos + 1]. - - // Get the address of the child at [pos], if any. + // Get the child at [pos], if any. let Some(child) = branch.children.get(pos as usize) else { // This should never happen -- [pos] should never be OOB. return Poll::Ready(Some(Err(api::Error::InternalError( @@ -154,7 +156,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' )))); }; - // Convert child from address --> node. let child = child .map(|child_addr| merkle.get_node(child_addr)) .transpose() @@ -164,7 +165,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // The child doesn't exist; we don't need to iterator over this index. |a: &u8, b: &u8| a > b } else { - // The child does exist; the first key to iterate over must be at/under [pos]. + // The child does exist; the first key to iterate over must be at [pos]. |a: &u8, b: &u8| a >= b }; @@ -220,8 +221,8 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let next_key_nibble = next_key_nibble.unwrap(); match next_extension_nibble.cmp(&next_key_nibble) { + std::cmp::Ordering::Equal => (), // The nibbles match; check the next one. std::cmp::Ordering::Less => break, // [extension]'s child is before [key]. Skip it. - std::cmp::Ordering::Equal => (), std::cmp::Ordering::Greater => { // [extension]'s child is after [key]. Visit it. let child = merkle @@ -294,10 +295,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } NodeType::Leaf(leaf) => { child_key_nibbles.extend(leaf.path.iter()); - let value = leaf.data.to_vec(); return Poll::Ready(Some(Ok(( key_from_nibble_iter(child_key_nibbles.into_iter().skip(1)), // skip the sentinel node leading 0 - value, + leaf.data.to_vec(), )))); } NodeType::Extension(extension) => { From a67f5333b76ee2a763a5ec0e3f145793cd417ab9 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 13:59:40 -0500 Subject: [PATCH 32/59] remove unwrap calls --- firewood/src/merkle/stream.rs | 17 ++++++++--------- firewood/src/v2/api.rs | 3 +++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 57ea4b4a9..5664ff338 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -11,7 +11,6 @@ use crate::{ use futures::{stream::FusedStream, Stream}; use std::{collections::VecDeque, task::Poll}; type Key = Box<[u8]>; -use crate::v2::api::Error::ChildNotFound; type Value = Vec; struct BranchIterator { @@ -152,7 +151,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let Some(child) = branch.children.get(pos as usize) else { // This should never happen -- [pos] should never be OOB. return Poll::Ready(Some(Err(api::Error::InternalError( - Box::new(ChildNotFound), + Box::new(api::Error::ChildNotFound), )))); }; @@ -200,15 +199,17 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' while let Some(next_extension_nibble) = extension_iter.next() { // Check whether the next nibble of [extension]'s path // matches the next nibble of [key]. - let next_key_nibble = remaining_key.next(); - - if next_key_nibble.is_none() { + let Some(next_key_nibble) = remaining_key.next() else { // We ran out of nibbles of [key] so [extension]'s child is after [key]. let mut key_nibbles = matched_key_nibbles.clone(); key_nibbles.push(*next_extension_nibble); key_nibbles.extend(extension_iter); - let prev_index = key_nibbles.pop().unwrap(); + let Some(prev_index) = key_nibbles.pop() else { + return Poll::Ready(Some(Err(api::Error::InternalError( + Box::new(api::Error::InvalidPathToExtension), + )))); + }; let iter = std::iter::once((extension.chd(), prev_index)); branch_iter_stack.push(BranchIterator { @@ -216,9 +217,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' children_iter: Box::new(iter), }); break; - } - - let next_key_nibble = next_key_nibble.unwrap(); + }; match next_extension_nibble.cmp(&next_key_nibble) { std::cmp::Ordering::Equal => (), // The nibbles match; check the next one. diff --git a/firewood/src/v2/api.rs b/firewood/src/v2/api.rs index 22b146f65..fdf9cc3fe 100644 --- a/firewood/src/v2/api.rs +++ b/firewood/src/v2/api.rs @@ -85,6 +85,9 @@ pub enum Error { #[error("Child not found")] ChildNotFound, + + #[error("Extension node is at the root; should be branch node")] + InvalidPathToExtension, } /// A range proof, consisting of a proof of the first key and the last key, From 00cc302b38b4df782fa1f1023c3c9387de774326 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:01:26 -0500 Subject: [PATCH 33/59] remove unwrap call --- firewood/src/merkle/stream.rs | 11 ++++++++--- firewood/src/v2/api.rs | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 5664ff338..2cff4a627 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -207,7 +207,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let Some(prev_index) = key_nibbles.pop() else { return Poll::Ready(Some(Err(api::Error::InternalError( - Box::new(api::Error::InvalidPathToExtension), + Box::new(api::Error::ExtensionNodeAtRoot), )))); }; let iter = std::iter::once((extension.chd(), prev_index)); @@ -228,7 +228,13 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let branch = child.inner().as_branch().unwrap(); + let Some(branch) = child.inner().as_branch() else { + return Poll::Ready(Some(Err( + api::Error::InternalError(Box::new( + api::Error::InvalidExtensionChild, + )), + ))); + }; branch_iter_stack.push(BranchIterator { key_nibbles: matched_key_nibbles.clone(), @@ -301,7 +307,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } NodeType::Extension(extension) => { // Follow the extension node to its child, which is a branch. - // TODO confirm that an extension node's child is always a branch node. let child = merkle .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; diff --git a/firewood/src/v2/api.rs b/firewood/src/v2/api.rs index fdf9cc3fe..5ea6da9b0 100644 --- a/firewood/src/v2/api.rs +++ b/firewood/src/v2/api.rs @@ -87,7 +87,10 @@ pub enum Error { ChildNotFound, #[error("Extension node is at the root; should be branch node")] - InvalidPathToExtension, + ExtensionNodeAtRoot, + + #[error("Extension node has non-branch child")] + InvalidExtensionChild, } /// A range proof, consisting of a proof of the first key and the last key, From d1fffc852dfee0737af4af26b60c0764d0d97489 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:02:01 -0500 Subject: [PATCH 34/59] remove unwrap call --- firewood/src/merkle/stream.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 2cff4a627..d4c6a62cb 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -311,7 +311,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let branch = child.inner().as_branch().unwrap(); + let Some(branch) = child.inner().as_branch() else { + return Poll::Ready(Some(Err(api::Error::InternalError( + Box::new(api::Error::InvalidExtensionChild), + )))); + }; let children_iter = get_children_iter(branch); // TODO reduce cloning From 039200a4c12757ed1e9ce91a16c19b103bcb6a58 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:03:05 -0500 Subject: [PATCH 35/59] appease linter --- firewood/src/merkle/stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index d4c6a62cb..56ef1509a 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -364,7 +364,7 @@ fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { mod helper_types { use std::ops::Not; - /// TODO how can we use enums instead of Box? + /// TODO how can we use enums instead of `Box`? /// Enums enable stack-based dynamic-dispatch as opposed to heap-based `Box`. /// This helps us with match arms that return different types that implement the same trait. /// It's possible that [rust-lang/rust#63065](https://github.com/rust-lang/rust/issues/63065) will make this unnecessary. From 8dc0e1ebeac55dadea561429ccf3829e50fdbe38 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:06:56 -0500 Subject: [PATCH 36/59] appease linter --- firewood/src/merkle/stream.rs | 94 ++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 56ef1509a..0cdffad8c 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -361,52 +361,54 @@ fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { data.into_boxed_slice() } -mod helper_types { - use std::ops::Not; - - /// TODO how can we use enums instead of `Box`? - /// Enums enable stack-based dynamic-dispatch as opposed to heap-based `Box`. - /// This helps us with match arms that return different types that implement the same trait. - /// It's possible that [rust-lang/rust#63065](https://github.com/rust-lang/rust/issues/63065) will make this unnecessary. - /// - /// And this can be replaced by the `either` crate from crates.io if we ever need more functionality. - pub(super) enum Either { - Left(T), - Right(U), - } - - impl Iterator for Either - where - T: Iterator, - U: Iterator, - { - type Item = T::Item; - - fn next(&mut self) -> Option { - match self { - Self::Left(left) => left.next(), - Self::Right(right) => right.next(), - } - } - } - - #[must_use] - pub(super) struct MustUse(T); - - impl From for MustUse { - fn from(t: T) -> Self { - Self(t) - } - } - - impl Not for MustUse { - type Output = T::Output; - - fn not(self) -> Self::Output { - self.0.not() - } - } -} +// This code should be either used or removed. +// TODO how can we use Either instead of `Box`? +// mod helper_types { +// use std::ops::Not; + +// +// /// Enums enable stack-based dynamic-dispatch as opposed to heap-based `Box`. +// /// This helps us with match arms that return different types that implement the same trait. +// /// It's possible that [rust-lang/rust#63065](https://github.com/rust-lang/rust/issues/63065) will make this unnecessary. +// /// +// /// And this can be replaced by the `either` crate from crates.io if we ever need more functionality. +// pub(super) enum Either { +// Left(T), +// Right(U), +// } + +// impl Iterator for Either +// where +// T: Iterator, +// U: Iterator, +// { +// type Item = T::Item; + +// fn next(&mut self) -> Option { +// match self { +// Self::Left(left) => left.next(), +// Self::Right(right) => right.next(), +// } +// } +// } + +// #[must_use] +// pub(super) struct MustUse(T); + +// impl From for MustUse { +// fn from(t: T) -> Self { +// Self(t) +// } +// } + +// impl Not for MustUse { +// type Output = T::Output; + +// fn not(self) -> Self::Output { +// self.0.not() +// } +// } +// } // CAUTION: only use with nibble iterators trait IntoBytes: Iterator { From 3206ee1eecfd9ad1219b047343d2e73883b60018 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:08:45 -0500 Subject: [PATCH 37/59] comment nit --- firewood/src/merkle/stream.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 0cdffad8c..07c1d6e8c 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -103,8 +103,9 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut branch_iter_stack: Vec = vec![]; - // (disk address, index) for each node we visit along the path to the node - // with [key], where [index] is the next nibble in the key. + // (disk address, index) for each node we visit along the path to + // [key], where [index] is the next nibble in the key after the node + // at [disk address]. let mut path_to_key = vec![]; // Populate [path_to_key]. From 3644181c6253926d6123a499aaf99ceb93d12b14 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:26:15 -0500 Subject: [PATCH 38/59] /// for doc comments --- firewood/src/merkle/stream.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 07c1d6e8c..32bd6a863 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -14,10 +14,10 @@ type Key = Box<[u8]>; type Value = Vec; struct BranchIterator { - // The nibbles of the key at this node. + /// The nibbles of the key at this node. key_nibbles: Vec, - // Returns the non-empty children of this node - // and their positions in the node's children array. + /// Returns the non-empty children of this node + /// and their positions in the node's children array. children_iter: Box + Send>, } @@ -26,11 +26,11 @@ enum IteratorState { StartAtKey(Key), /// Continue iterating after the last node in the `visited_node_path` Iterating { - // Each element is an iterator over a branch node we've visited - // along our traversal of the key-value pairs in the trie. - // We pop an iterator off the stack and call next on it to - // get the next child node to visit. When an iterator is empty, - // we pop it off the stack and go back up to its parent. + /// Each element is an iterator over a branch node we've visited + /// along our traversal of the key-value pairs in the trie. + /// We pop an iterator off the stack and call next on it to + /// get the next child node to visit. When an iterator is empty, + /// we pop it off the stack and go back up to its parent. branch_iter_stack: Vec, }, } @@ -162,7 +162,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; let comparer = if child.is_none() { - // The child doesn't exist; we don't need to iterator over this index. + // The child doesn't exist; we don't need to iterate over [pos]. |a: &u8, b: &u8| a > b } else { // The child does exist; the first key to iterate over must be at [pos]. From f4f40ecb85ea960117121dc6a2068244f54c7d35 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:28:12 -0500 Subject: [PATCH 39/59] use type inference instead of declaring --- firewood/src/merkle/stream.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 32bd6a863..fffad0807 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -3,7 +3,6 @@ use super::{node::Node, BranchNode, Merkle, NodeType}; use crate::{ - merkle::NodeObjRef, nibbles::Nibbles, shale::{DiskAddress, ShaleStore}, v2::api, @@ -119,7 +118,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // Convert (address, index) pairs to (node, index) pairs. - let mut path_to_key: VecDeque<(NodeObjRef<'a>, u8)> = path_to_key + let mut path_to_key = path_to_key .into_iter() .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) .collect::, _>>() From 99814b9cb103797388106e031ccb3e0cf9cb1978 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:33:00 -0500 Subject: [PATCH 40/59] style nits from PR comments --- firewood/src/merkle/stream.rs | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index fffad0807..ef32235f6 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -133,6 +133,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }; match node.inner() { + NodeType::Leaf(_) => (), NodeType::Branch(branch) => { // Figure out whether we want to start iterating over this // node's children at [pos] or [pos + 1]. @@ -179,7 +180,6 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' matched_key_nibbles.push(pos); } - NodeType::Leaf(_) => (), NodeType::Extension(extension) => { if !path_to_key.is_empty() { // Add the extension node's path to the key nibbles. @@ -255,11 +255,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' self.poll_next(_cx) } IteratorState::Iterating { branch_iter_stack } => { - loop { - let Some(mut branch_iter) = branch_iter_stack.pop() else { - return Poll::Ready(None); - }; - + while let Some(mut branch_iter) = branch_iter_stack.pop() { // [node_addr] is the next node to visit. // It's the child at index [pos] of [node_iter]. let Some((node_addr, pos)) = branch_iter.children_iter.next() else { @@ -338,6 +334,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } }; } + return Poll::Ready(None); } } } @@ -492,8 +489,10 @@ mod tests { let mut merkle = create_test_merkle(); let root = merkle.init_root().unwrap(); - for i in (0..256).rev() { - for j in (0..256).rev() { + // Insert key-values in reverse order to ensure iterator + // doesn't just return the keys in insertion order. + for i in (0..u8::MAX).rev() { + for j in (0..u8::MAX).rev() { let key = vec![i as u8, j as u8]; let value = vec![i as u8, j as u8]; @@ -503,8 +502,8 @@ mod tests { let mut stream = merkle.iter(root); - for i in 0..256 { - for j in 0..256 { + for i in 0..u8::MAX { + for j in 0..u8::MAX { let expected_key = vec![i as u8, j as u8]; let expected_value = vec![i as u8, j as u8]; @@ -526,8 +525,10 @@ mod tests { let mut merkle = create_test_merkle(); let root = merkle.init_root().unwrap(); - for i in (0..256).rev() { - for j in (0..256).rev() { + // Insert key-values in reverse order to ensure iterator + // doesn't just return the keys in insertion order. + for i in (0..=u8::MAX).rev() { + for j in (0..=u8::MAX).rev() { let key = vec![i as u8, j as u8]; let value = vec![i as u8, j as u8]; @@ -535,9 +536,9 @@ mod tests { } } - for i in 0..256 { + for i in 0..=u8::MAX { let mut stream = merkle.iter_from(root, vec![i as u8].into_boxed_slice()); - for j in 0..256 { + for j in 0..=u8::MAX { let expected_key = vec![i as u8, j as u8]; let expected_value = vec![i as u8, j as u8]; assert_eq!( @@ -548,7 +549,7 @@ mod tests { j, ); } - if i == 255 { + if i == u8::MAX { check_stream_is_done(stream).await; } else { assert_eq!( From 4889c7a78c0f1981c416d04710f1d8164ec308d9 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:49:39 -0500 Subject: [PATCH 41/59] appease linter --- firewood/src/merkle/stream.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ef32235f6..2136cfcea 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -334,7 +334,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } }; } - return Poll::Ready(None); + Poll::Ready(None) } } } @@ -493,8 +493,8 @@ mod tests { // doesn't just return the keys in insertion order. for i in (0..u8::MAX).rev() { for j in (0..u8::MAX).rev() { - let key = vec![i as u8, j as u8]; - let value = vec![i as u8, j as u8]; + let key = vec![i, j]; + let value = vec![i, j]; merkle.insert(key, value, root).unwrap(); } @@ -504,8 +504,8 @@ mod tests { for i in 0..u8::MAX { for j in 0..u8::MAX { - let expected_key = vec![i as u8, j as u8]; - let expected_value = vec![i as u8, j as u8]; + let expected_key = vec![i, j]; + let expected_value = vec![i, j]; assert_eq!( stream.next().await.unwrap().unwrap(), @@ -529,18 +529,18 @@ mod tests { // doesn't just return the keys in insertion order. for i in (0..=u8::MAX).rev() { for j in (0..=u8::MAX).rev() { - let key = vec![i as u8, j as u8]; - let value = vec![i as u8, j as u8]; + let key = vec![i, j]; + let value = vec![i, j]; merkle.insert(key, value, root).unwrap(); } } for i in 0..=u8::MAX { - let mut stream = merkle.iter_from(root, vec![i as u8].into_boxed_slice()); + let mut stream = merkle.iter_from(root, vec![i].into_boxed_slice()); for j in 0..=u8::MAX { - let expected_key = vec![i as u8, j as u8]; - let expected_value = vec![i as u8, j as u8]; + let expected_key = vec![i, j]; + let expected_value = vec![i, j]; assert_eq!( stream.next().await.unwrap().unwrap(), (expected_key.into_boxed_slice(), expected_value), @@ -554,10 +554,7 @@ mod tests { } else { assert_eq!( stream.next().await.unwrap().unwrap(), - ( - vec![i as u8 + 1, 0].into_boxed_slice(), - vec![i as u8 + 1, 0] - ), + (vec![i + 1, 0].into_boxed_slice(), vec![i + 1, 0]), "i: {}", i, ); From d50db3779da10182ac048dca35949cd9c8d23575 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Tue, 30 Jan 2024 14:58:59 -0500 Subject: [PATCH 42/59] convert loop to while let Some --- firewood/src/merkle/stream.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 2136cfcea..657f2c9e9 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -127,11 +127,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // Tracks how much of the key has been traversed so far. let mut matched_key_nibbles: Vec = vec![]; - loop { - let Some((node, pos)) = path_to_key.pop_front() else { - break; - }; - + while let Some((node, pos)) = path_to_key.pop_front() { match node.inner() { NodeType::Leaf(_) => (), NodeType::Branch(branch) => { From c24c665ed675842ac37218a43a3905d5f9dd2e8e Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 11:37:15 -0500 Subject: [PATCH 43/59] remove commented code; change match arm with if statement into 2 match arms --- firewood/src/merkle/stream.rs | 130 +++++++++++----------------------- 1 file changed, 41 insertions(+), 89 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 657f2c9e9..aa50ed8b5 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -130,50 +130,51 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' while let Some((node, pos)) = path_to_key.pop_front() { match node.inner() { NodeType::Leaf(_) => (), - NodeType::Branch(branch) => { - // Figure out whether we want to start iterating over this - // node's children at [pos] or [pos + 1]. - if !path_to_key.is_empty() { - // The next element in [path_to_key] will handle the child at [pos] - // so we can start iterating at [pos + 1]. - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos > &pos); - - branch_iter_stack.push(BranchIterator { - key_nibbles: matched_key_nibbles.clone(), - children_iter: Box::new(children_iter), - }); - } else { - // Get the child at [pos], if any. - let Some(child) = branch.children.get(pos as usize) else { - // This should never happen -- [pos] should never be OOB. - return Poll::Ready(Some(Err(api::Error::InternalError( - Box::new(api::Error::ChildNotFound), - )))); - }; + NodeType::Branch(branch) if path_to_key.is_empty() => { + // This is the last node in the path to [key]. + // Figure out whether to start iterating over this node's + // children at [pos] or [pos + 1]. + + // Get the child at [pos], if any. + let Some(child) = branch.children.get(pos as usize) else { + // This should never happen -- [pos] should never be OOB. + return Poll::Ready(Some(Err(api::Error::InternalError( + Box::new(api::Error::ChildNotFound), + )))); + }; - let child = child - .map(|child_addr| merkle.get_node(child_addr)) - .transpose() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let comparer = if child.is_none() { - // The child doesn't exist; we don't need to iterate over [pos]. - |a: &u8, b: &u8| a > b - } else { - // The child does exist; the first key to iterate over must be at [pos]. - |a: &u8, b: &u8| a >= b - }; + let child = child + .map(|child_addr| merkle.get_node(child_addr)) + .transpose() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| comparer(child_pos, &pos)); + let comparer = if child.is_none() { + // The child doesn't exist; we don't need to iterate over [pos]. + |a: &u8, b: &u8| a > b + } else { + // The child does exist; the first key to iterate over must be at [pos]. + |a: &u8, b: &u8| a >= b + }; - branch_iter_stack.push(BranchIterator { - key_nibbles: matched_key_nibbles.clone(), - children_iter: Box::new(children_iter), - }); - } + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| comparer(child_pos, &pos)); + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(children_iter), + }); + matched_key_nibbles.push(pos); + } + NodeType::Branch(branch) => { + // The next element in [path_to_key] will handle the child at [pos] + // so we can start iterating at [pos + 1]. + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(children_iter), + }); matched_key_nibbles.push(pos); } NodeType::Extension(extension) => { @@ -354,55 +355,6 @@ fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { data.into_boxed_slice() } -// This code should be either used or removed. -// TODO how can we use Either instead of `Box`? -// mod helper_types { -// use std::ops::Not; - -// -// /// Enums enable stack-based dynamic-dispatch as opposed to heap-based `Box`. -// /// This helps us with match arms that return different types that implement the same trait. -// /// It's possible that [rust-lang/rust#63065](https://github.com/rust-lang/rust/issues/63065) will make this unnecessary. -// /// -// /// And this can be replaced by the `either` crate from crates.io if we ever need more functionality. -// pub(super) enum Either { -// Left(T), -// Right(U), -// } - -// impl Iterator for Either -// where -// T: Iterator, -// U: Iterator, -// { -// type Item = T::Item; - -// fn next(&mut self) -> Option { -// match self { -// Self::Left(left) => left.next(), -// Self::Right(right) => right.next(), -// } -// } -// } - -// #[must_use] -// pub(super) struct MustUse(T); - -// impl From for MustUse { -// fn from(t: T) -> Self { -// Self(t) -// } -// } - -// impl Not for MustUse { -// type Output = T::Output; - -// fn not(self) -> Self::Output { -// self.0.not() -// } -// } -// } - // CAUTION: only use with nibble iterators trait IntoBytes: Iterator { fn nibbles_into_bytes(&mut self) -> Vec { From d6c078135cecad7f14b2c05a49e65fdc0c930f27 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 11:39:35 -0500 Subject: [PATCH 44/59] remove unused import --- firewood/src/merkle/stream.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index aa50ed8b5..239826d80 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -375,8 +375,6 @@ use super::tests::create_test_merkle; #[cfg(test)] #[allow(clippy::indexing_slicing, clippy::unwrap_used)] mod tests { - use std::vec; - use crate::nibbles::Nibbles; use super::*; From f47d748218c927d7a23a38a5d705e8823fb899ef Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 11:42:03 -0500 Subject: [PATCH 45/59] nit move comments --- firewood/src/merkle/stream.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 239826d80..736ec6033 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -217,10 +217,12 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }; match next_extension_nibble.cmp(&next_key_nibble) { - std::cmp::Ordering::Equal => (), // The nibbles match; check the next one. - std::cmp::Ordering::Less => break, // [extension]'s child is before [key]. Skip it. + // The nibbles match; check the next one. + std::cmp::Ordering::Equal => (), + // [extension]'s child is before [key]. Skip it. + std::cmp::Ordering::Less => break, + // [extension]'s child is after [key]. Visit it. std::cmp::Ordering::Greater => { - // [extension]'s child is after [key]. Visit it. let child = merkle .get_node(extension.chd()) .map_err(|e| api::Error::InternalError(Box::new(e)))?; From cf0936cb240137b84fb42c98287e3c270db16712 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 11:54:31 -0500 Subject: [PATCH 46/59] change match arm with if statement into 2 match arms --- firewood/src/merkle/stream.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 736ec6033..ac74dafcf 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -177,15 +177,11 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }); matched_key_nibbles.push(pos); } - NodeType::Extension(extension) => { - if !path_to_key.is_empty() { - // Add the extension node's path to the key nibbles. - matched_key_nibbles.extend(extension.path.iter()); - continue; - } - - // Figure out whether we want to start iterating at [pos] or [pos + 1]. - // See if [extension]'s child is before, at or after [key]. + NodeType::Extension(extension) if path_to_key.is_empty() => { + // Figure out whether we want to start iterating over the children + // of [extension.child] at [pos] or [pos + 1]. + // Note that an extension node's child is always a branch node. + // See if [extension]'s child is before, at, or after [key]. let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); // Unmatched portion of [key]. @@ -198,6 +194,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // matches the next nibble of [key]. let Some(next_key_nibble) = remaining_key.next() else { // We ran out of nibbles of [key] so [extension]'s child is after [key]. + // We want to visit all children of [extension]'s child. let mut key_nibbles = matched_key_nibbles.clone(); key_nibbles.push(*next_extension_nibble); key_nibbles.extend(extension_iter); @@ -246,6 +243,10 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' matched_key_nibbles.push(next_key_nibble); } } + NodeType::Extension(extension) => { + // Add the extension node's path to the matched key nibbles. + matched_key_nibbles.extend(extension.path.iter()); + } } } From 9b4eb4beb98db28bc1e61ef7a89695db2ef47183 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 12:42:45 -0500 Subject: [PATCH 47/59] use PartialOrd instead of custom closure --- firewood/src/merkle/stream.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ac74dafcf..e2362a836 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -150,10 +150,10 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let comparer = if child.is_none() { // The child doesn't exist; we don't need to iterate over [pos]. - |a: &u8, b: &u8| a > b + ::gt } else { // The child does exist; the first key to iterate over must be at [pos]. - |a: &u8, b: &u8| a >= b + ::ge }; let children_iter = get_children_iter(branch) From 620fd21ed28e2d61013e3f03cea227de8a6ff281 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 12:45:43 -0500 Subject: [PATCH 48/59] let compiler infer type --- firewood/src/merkle/stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e2362a836..cc12acfc9 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -125,7 +125,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' .map_err(|e| api::Error::InternalError(Box::new(e)))?; // Tracks how much of the key has been traversed so far. - let mut matched_key_nibbles: Vec = vec![]; + let mut matched_key_nibbles = vec![]; while let Some((node, pos)) = path_to_key.pop_front() { match node.inner() { From ec4786276aa36dac9a3e076cffa83e36ec5973c7 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 13:12:52 -0500 Subject: [PATCH 49/59] remove use of vecdeque --- firewood/src/merkle/stream.rs | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index cc12acfc9..13457d58d 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -8,7 +8,7 @@ use crate::{ v2::api, }; use futures::{stream::FusedStream, Stream}; -use std::{collections::VecDeque, task::Poll}; +use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; @@ -117,20 +117,31 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' ) .map_err(|e| api::Error::InternalError(Box::new(e)))?; - // Convert (address, index) pairs to (node, index) pairs. - let mut path_to_key = path_to_key - .into_iter() - .map(|(node, pos)| merkle.get_node(node).map(|node| (node, pos))) - .collect::, _>>() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; + // Get each node in [path_to_key]. Mark the last node as being the last + // so we can use that information in the while loop below. + let mut path_to_key = + path_to_key + .into_iter() + .rev() + .enumerate() + .rev() + .map(|(i, (node, pos))| { + merkle.get_node(node).map(|node| (i == 0, (node, pos))) + }); - // Tracks how much of the key has been traversed so far. let mut matched_key_nibbles = vec![]; - while let Some((node, pos)) = path_to_key.pop_front() { + while let Some(result) = path_to_key.next() { + let (is_last, (node, pos)) = match result { + Ok(result) => result, + Err(e) => { + return Poll::Ready(Some(Err(api::Error::InternalError(Box::new(e))))) + } + }; + match node.inner() { NodeType::Leaf(_) => (), - NodeType::Branch(branch) if path_to_key.is_empty() => { + NodeType::Branch(branch) if is_last => { // This is the last node in the path to [key]. // Figure out whether to start iterating over this node's // children at [pos] or [pos + 1]. @@ -177,7 +188,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' }); matched_key_nibbles.push(pos); } - NodeType::Extension(extension) if path_to_key.is_empty() => { + NodeType::Extension(extension) if is_last => { // Figure out whether we want to start iterating over the children // of [extension.child] at [pos] or [pos + 1]. // Note that an extension node's child is always a branch node. From 001874bc69253a0ae4d76140eb48c7328d892899 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Wed, 31 Jan 2024 13:25:34 -0500 Subject: [PATCH 50/59] appease linter --- firewood/src/merkle/stream.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 13457d58d..ded7ebf95 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -119,7 +119,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' // Get each node in [path_to_key]. Mark the last node as being the last // so we can use that information in the while loop below. - let mut path_to_key = + let path_to_key = path_to_key .into_iter() .rev() @@ -131,7 +131,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' let mut matched_key_nibbles = vec![]; - while let Some(result) = path_to_key.next() { + for result in path_to_key { let (is_last, (node, pos)) = match result { Ok(result) => result, Err(e) => { From 6f2de3cb6d101d45b69ee5b196087fbb97d565e7 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 09:14:06 -0500 Subject: [PATCH 51/59] separate out initialization of key state into function --- firewood/src/merkle/stream.rs | 334 +++++++++++++++++----------------- 1 file changed, 168 insertions(+), 166 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ded7ebf95..bcb44b3de 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -51,6 +51,173 @@ pub struct MerkleKeyValueStream<'a, S, T> { merkle: &'a Merkle, } +// Returns the initial state for +fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( + merkle: &'a Merkle, + root_node: DiskAddress, + key: &mut Box<[u8]>, +) -> Result { + let root_node = merkle + .get_node(root_node) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let mut branch_iter_stack: Vec = vec![]; + + // (disk address, index) for each node we visit along the path to + // [key], where [index] is the next nibble in the key after the node + // at [disk address]. + let mut path_to_key = vec![]; + + // Populate [path_to_key]. + merkle + .get_node_by_key_with_callbacks( + root_node, + &key, + |node_addr, i| path_to_key.push((node_addr, i)), + |_, _| {}, + ) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + // Get each node in [path_to_key]. Mark the last node as being the last + // so we can use that information in the while loop below. + let path_to_key = path_to_key + .into_iter() + .rev() + .enumerate() + .rev() + .map(|(i, (node, pos))| merkle.get_node(node).map(|node| (i == 0, (node, pos)))); + + let mut matched_key_nibbles = vec![]; + + for result in path_to_key { + let (is_last, (node, pos)) = match result { + Ok(result) => result, + Err(e) => return Err(api::Error::InternalError(Box::new(e))), + }; + + match node.inner() { + NodeType::Leaf(_) => (), + NodeType::Branch(branch) if is_last => { + // This is the last node in the path to [key]. + // Figure out whether to start iterating over this node's + // children at [pos] or [pos + 1]. + + // Get the child at [pos], if any. + let Some(child) = branch.children.get(pos as usize) else { + // This should never happen -- [pos] should never be OOB. + return Err(api::Error::InternalError(Box::new( + api::Error::ChildNotFound, + ))); + }; + + let child = child + .map(|child_addr| merkle.get_node(child_addr)) + .transpose() + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let comparer = if child.is_none() { + // The child doesn't exist; we don't need to iterate over [pos]. + ::gt + } else { + // The child does exist; the first key to iterate over must be at [pos]. + ::ge + }; + + let children_iter = get_children_iter(branch) + .filter(move |(_, child_pos)| comparer(child_pos, &pos)); + + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(children_iter), + }); + matched_key_nibbles.push(pos); + } + NodeType::Branch(branch) => { + // The next element in [path_to_key] will handle the child at [pos] + // so we can start iterating at [pos + 1]. + let children_iter = + get_children_iter(branch).filter(move |(_, child_pos)| child_pos > &pos); + + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(children_iter), + }); + matched_key_nibbles.push(pos); + } + NodeType::Extension(extension) if is_last => { + // Figure out whether we want to start iterating over the children + // of [extension.child] at [pos] or [pos + 1]. + // Note that an extension node's child is always a branch node. + // See if [extension]'s child is before, at, or after [key]. + let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); + + // Unmatched portion of [key]. + let mut remaining_key = key_nibbles.skip(matched_key_nibbles.len()); + + let mut extension_iter = extension.path.iter(); + + while let Some(next_extension_nibble) = extension_iter.next() { + // Check whether the next nibble of [extension]'s path + // matches the next nibble of [key]. + let Some(next_key_nibble) = remaining_key.next() else { + // We ran out of nibbles of [key] so [extension]'s child is after [key]. + // We want to visit all children of [extension]'s child. + let mut key_nibbles = matched_key_nibbles.clone(); + key_nibbles.push(*next_extension_nibble); + key_nibbles.extend(extension_iter); + + let Some(prev_index) = key_nibbles.pop() else { + return Err(api::Error::InternalError(Box::new( + api::Error::ExtensionNodeAtRoot, + ))); + }; + let iter = std::iter::once((extension.chd(), prev_index)); + + branch_iter_stack.push(BranchIterator { + key_nibbles, + children_iter: Box::new(iter), + }); + break; + }; + + match next_extension_nibble.cmp(&next_key_nibble) { + // The nibbles match; check the next one. + std::cmp::Ordering::Equal => (), + // [extension]'s child is before [key]. Skip it. + std::cmp::Ordering::Less => break, + // [extension]'s child is after [key]. Visit it. + std::cmp::Ordering::Greater => { + let child = merkle + .get_node(extension.chd()) + .map_err(|e| api::Error::InternalError(Box::new(e)))?; + + let Some(branch) = child.inner().as_branch() else { + return Err(api::Error::InternalError(Box::new( + api::Error::InvalidExtensionChild, + ))); + }; + + branch_iter_stack.push(BranchIterator { + key_nibbles: matched_key_nibbles.clone(), + children_iter: Box::new(get_children_iter(branch)), + }); + break; + } + } + + matched_key_nibbles.push(next_key_nibble); + } + } + NodeType::Extension(extension) => { + // Add the extension node's path to the matched key nibbles. + matched_key_nibbles.extend(extension.path.iter()); + } + } + } + + Ok(IteratorState::Iterating { branch_iter_stack }) +} + impl<'a, S: ShaleStore + Send + Sync, T> FusedStream for MerkleKeyValueStream<'a, S, T> { fn is_terminated(&self) -> bool { matches!(&self.key_state, IteratorState::Iterating { branch_iter_stack } if branch_iter_stack.is_empty()) @@ -96,172 +263,7 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' match key_state { IteratorState::StartAtKey(key) => { - let root_node = merkle - .get_node(*merkle_root) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let mut branch_iter_stack: Vec = vec![]; - - // (disk address, index) for each node we visit along the path to - // [key], where [index] is the next nibble in the key after the node - // at [disk address]. - let mut path_to_key = vec![]; - - // Populate [path_to_key]. - merkle - .get_node_by_key_with_callbacks( - root_node, - &key, - |node_addr, i| path_to_key.push((node_addr, i)), - |_, _| {}, - ) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - // Get each node in [path_to_key]. Mark the last node as being the last - // so we can use that information in the while loop below. - let path_to_key = - path_to_key - .into_iter() - .rev() - .enumerate() - .rev() - .map(|(i, (node, pos))| { - merkle.get_node(node).map(|node| (i == 0, (node, pos))) - }); - - let mut matched_key_nibbles = vec![]; - - for result in path_to_key { - let (is_last, (node, pos)) = match result { - Ok(result) => result, - Err(e) => { - return Poll::Ready(Some(Err(api::Error::InternalError(Box::new(e))))) - } - }; - - match node.inner() { - NodeType::Leaf(_) => (), - NodeType::Branch(branch) if is_last => { - // This is the last node in the path to [key]. - // Figure out whether to start iterating over this node's - // children at [pos] or [pos + 1]. - - // Get the child at [pos], if any. - let Some(child) = branch.children.get(pos as usize) else { - // This should never happen -- [pos] should never be OOB. - return Poll::Ready(Some(Err(api::Error::InternalError( - Box::new(api::Error::ChildNotFound), - )))); - }; - - let child = child - .map(|child_addr| merkle.get_node(child_addr)) - .transpose() - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let comparer = if child.is_none() { - // The child doesn't exist; we don't need to iterate over [pos]. - ::gt - } else { - // The child does exist; the first key to iterate over must be at [pos]. - ::ge - }; - - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| comparer(child_pos, &pos)); - - branch_iter_stack.push(BranchIterator { - key_nibbles: matched_key_nibbles.clone(), - children_iter: Box::new(children_iter), - }); - matched_key_nibbles.push(pos); - } - NodeType::Branch(branch) => { - // The next element in [path_to_key] will handle the child at [pos] - // so we can start iterating at [pos + 1]. - let children_iter = get_children_iter(branch) - .filter(move |(_, child_pos)| child_pos > &pos); - - branch_iter_stack.push(BranchIterator { - key_nibbles: matched_key_nibbles.clone(), - children_iter: Box::new(children_iter), - }); - matched_key_nibbles.push(pos); - } - NodeType::Extension(extension) if is_last => { - // Figure out whether we want to start iterating over the children - // of [extension.child] at [pos] or [pos + 1]. - // Note that an extension node's child is always a branch node. - // See if [extension]'s child is before, at, or after [key]. - let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); - - // Unmatched portion of [key]. - let mut remaining_key = key_nibbles.skip(matched_key_nibbles.len()); - - let mut extension_iter = extension.path.iter(); - - while let Some(next_extension_nibble) = extension_iter.next() { - // Check whether the next nibble of [extension]'s path - // matches the next nibble of [key]. - let Some(next_key_nibble) = remaining_key.next() else { - // We ran out of nibbles of [key] so [extension]'s child is after [key]. - // We want to visit all children of [extension]'s child. - let mut key_nibbles = matched_key_nibbles.clone(); - key_nibbles.push(*next_extension_nibble); - key_nibbles.extend(extension_iter); - - let Some(prev_index) = key_nibbles.pop() else { - return Poll::Ready(Some(Err(api::Error::InternalError( - Box::new(api::Error::ExtensionNodeAtRoot), - )))); - }; - let iter = std::iter::once((extension.chd(), prev_index)); - - branch_iter_stack.push(BranchIterator { - key_nibbles, - children_iter: Box::new(iter), - }); - break; - }; - - match next_extension_nibble.cmp(&next_key_nibble) { - // The nibbles match; check the next one. - std::cmp::Ordering::Equal => (), - // [extension]'s child is before [key]. Skip it. - std::cmp::Ordering::Less => break, - // [extension]'s child is after [key]. Visit it. - std::cmp::Ordering::Greater => { - let child = merkle - .get_node(extension.chd()) - .map_err(|e| api::Error::InternalError(Box::new(e)))?; - - let Some(branch) = child.inner().as_branch() else { - return Poll::Ready(Some(Err( - api::Error::InternalError(Box::new( - api::Error::InvalidExtensionChild, - )), - ))); - }; - - branch_iter_stack.push(BranchIterator { - key_nibbles: matched_key_nibbles.clone(), - children_iter: Box::new(get_children_iter(branch)), - }); - break; - } - } - - matched_key_nibbles.push(next_key_nibble); - } - } - NodeType::Extension(extension) => { - // Add the extension node's path to the matched key nibbles. - matched_key_nibbles.extend(extension.path.iter()); - } - } - } - - self.key_state = IteratorState::Iterating { branch_iter_stack }; + self.key_state = get_iterator_intial_state(merkle, *merkle_root, key)?; self.poll_next(_cx) } From a32e770070dc3cd9198c1874d9cb488072b822d4 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 09:22:02 -0500 Subject: [PATCH 52/59] naming nits --- firewood/src/merkle/stream.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index bcb44b3de..ee54e6254 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -12,19 +12,20 @@ use std::task::Poll; type Key = Box<[u8]>; type Value = Vec; +/// Represents an ongoing iteration over a branch node's children. struct BranchIterator { - /// The nibbles of the key at this node. + /// The nibbles of the key at this branch node. key_nibbles: Vec, - /// Returns the non-empty children of this node - /// and their positions in the node's children array. + /// Returns the non-empty children of this node and their positions + /// in the node's children array . children_iter: Box + Send>, } enum IteratorState { - /// Start iterating at the specified key - StartAtKey(Key), - /// Continue iterating after the last node in the `visited_node_path` - Iterating { + /// The iterator state is lazily initialized when poll_next is called + /// for the first time. The iteration start key is stored here. + Uninitialized(Key), + Initialized { /// Each element is an iterator over a branch node we've visited /// along our traversal of the key-value pairs in the trie. /// We pop an iterator off the stack and call next on it to @@ -36,11 +37,11 @@ enum IteratorState { impl IteratorState { fn new() -> Self { - Self::StartAtKey(vec![].into_boxed_slice()) + Self::Uninitialized(vec![].into_boxed_slice()) } fn with_key(key: Key) -> Self { - Self::StartAtKey(key) + Self::Uninitialized(key) } } @@ -215,12 +216,12 @@ fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( } } - Ok(IteratorState::Iterating { branch_iter_stack }) + Ok(IteratorState::Initialized { branch_iter_stack }) } impl<'a, S: ShaleStore + Send + Sync, T> FusedStream for MerkleKeyValueStream<'a, S, T> { fn is_terminated(&self) -> bool { - matches!(&self.key_state, IteratorState::Iterating { branch_iter_stack } if branch_iter_stack.is_empty()) + matches!(&self.key_state, IteratorState::Initialized { branch_iter_stack } if branch_iter_stack.is_empty()) } } @@ -262,12 +263,12 @@ impl<'a, S: ShaleStore + Send + Sync, T> Stream for MerkleKeyValueStream<' } = &mut *self; match key_state { - IteratorState::StartAtKey(key) => { + IteratorState::Uninitialized(key) => { self.key_state = get_iterator_intial_state(merkle, *merkle_root, key)?; self.poll_next(_cx) } - IteratorState::Iterating { branch_iter_stack } => { + IteratorState::Initialized { branch_iter_stack } => { while let Some(mut branch_iter) = branch_iter_stack.pop() { // [node_addr] is the next node to visit. // It's the child at index [pos] of [node_iter]. From 7799284547d69bb669aaab3bdaa66d1bbde97a6f Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:06:05 -0500 Subject: [PATCH 53/59] &mut --> & --- firewood/src/merkle/stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index ee54e6254..dfb5a728b 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -56,7 +56,7 @@ pub struct MerkleKeyValueStream<'a, S, T> { fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( merkle: &'a Merkle, root_node: DiskAddress, - key: &mut Box<[u8]>, + key: &Box<[u8]>, ) -> Result { let root_node = merkle .get_node(root_node) From 15eaa7790bab7fc5369a3e758fdbef10e18f5a17 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:32:39 -0500 Subject: [PATCH 54/59] appease linter --- firewood/src/merkle/stream.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index dfb5a728b..e223478dd 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -52,11 +52,12 @@ pub struct MerkleKeyValueStream<'a, S, T> { merkle: &'a Merkle, } -// Returns the initial state for -fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( - merkle: &'a Merkle, +/// Returns the state of an iterator over [merkle], which has the given +/// [root_node], after it's been initialized. Iteration starts at [key]. +fn get_iterator_intial_state + Send + Sync, T>( + merkle: &Merkle, root_node: DiskAddress, - key: &Box<[u8]>, + key: &[u8], ) -> Result { let root_node = merkle .get_node(root_node) @@ -73,7 +74,7 @@ fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( merkle .get_node_by_key_with_callbacks( root_node, - &key, + key, |node_addr, i| path_to_key.push((node_addr, i)), |_, _| {}, ) @@ -90,8 +91,8 @@ fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( let mut matched_key_nibbles = vec![]; - for result in path_to_key { - let (is_last, (node, pos)) = match result { + for node in path_to_key { + let (is_last, (node, pos)) = match node { Ok(result) => result, Err(e) => return Err(api::Error::InternalError(Box::new(e))), }; @@ -150,7 +151,7 @@ fn get_iterator_intial_state<'a, S: ShaleStore + Send + Sync, T>( // of [extension.child] at [pos] or [pos + 1]. // Note that an extension node's child is always a branch node. // See if [extension]'s child is before, at, or after [key]. - let key_nibbles = Nibbles::<1>::new(key.as_ref()).into_iter(); + let key_nibbles = Nibbles::<1>::new(key).into_iter(); // Unmatched portion of [key]. let mut remaining_key = key_nibbles.skip(matched_key_nibbles.len()); From ba65fda712d671628ff226471cbd23a4368de66f Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:33:47 -0500 Subject: [PATCH 55/59] rename error --- firewood/src/merkle/stream.rs | 2 +- firewood/src/v2/api.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index e223478dd..4c6d985d2 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -170,7 +170,7 @@ fn get_iterator_intial_state + Send + Sync, T>( let Some(prev_index) = key_nibbles.pop() else { return Err(api::Error::InternalError(Box::new( - api::Error::ExtensionNodeAtRoot, + api::Error::SentinelNodeIsExtension, ))); }; let iter = std::iter::once((extension.chd(), prev_index)); diff --git a/firewood/src/v2/api.rs b/firewood/src/v2/api.rs index 5ea6da9b0..326d59044 100644 --- a/firewood/src/v2/api.rs +++ b/firewood/src/v2/api.rs @@ -86,8 +86,8 @@ pub enum Error { #[error("Child not found")] ChildNotFound, - #[error("Extension node is at the root; should be branch node")] - ExtensionNodeAtRoot, + #[error("Sentinel node is an extension node; should be a branch node")] + SentinelNodeIsExtension, #[error("Extension node has non-branch child")] InvalidExtensionChild, From d3f519f15a879a91608918c29d68afaa3434972d Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:48:21 -0500 Subject: [PATCH 56/59] appease linter --- firewood/src/merkle/stream.rs | 47 ++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 4c6d985d2..8703c256d 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -52,8 +52,8 @@ pub struct MerkleKeyValueStream<'a, S, T> { merkle: &'a Merkle, } -/// Returns the state of an iterator over [merkle], which has the given -/// [root_node], after it's been initialized. Iteration starts at [key]. +/// Returns the state of an iterator over merkle, which has the given +/// root_node, after it's been initialized. Iteration starts at the given key. fn get_iterator_intial_state + Send + Sync, T>( merkle: &Merkle, root_node: DiskAddress, @@ -464,8 +464,22 @@ mod tests { } } - let mut stream = merkle.iter(root); + // let mut merkle = create_test_merkle(); + // let root = merkle.init_root().unwrap(); + + // // Insert key-values in reverse order to ensure iterator + // // doesn't just return the keys in insertion order. + // for i in (0..=u8::MAX).rev() { + // for j in (0..=u8::MAX).rev() { + // let key = vec![i, j]; + // let value = vec![i, j]; + // merkle.insert(key, value, root).unwrap(); + // } + // } + + // Test with no start key + let mut stream = merkle.iter(root); for i in 0..u8::MAX { for j in 0..u8::MAX { let expected_key = vec![i, j]; @@ -480,8 +494,33 @@ mod tests { ); } } - check_stream_is_done(stream).await; + + // Test with start key + for i in 0..=u8::MAX { + let mut stream = merkle.iter_from(root, vec![i].into_boxed_slice()); + for j in 0..=u8::MAX { + let expected_key = vec![i, j]; + let expected_value = vec![i, j]; + assert_eq!( + stream.next().await.unwrap().unwrap(), + (expected_key.into_boxed_slice(), expected_value), + "i: {}, j: {}", + i, + j, + ); + } + if i == u8::MAX { + check_stream_is_done(stream).await; + } else { + assert_eq!( + stream.next().await.unwrap().unwrap(), + (vec![i + 1, 0].into_boxed_slice(), vec![i + 1, 0]), + "i: {}", + i, + ); + } + } } #[tokio::test] From 9f8281261b953a890295fe9a1577849a43aabab0 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:49:20 -0500 Subject: [PATCH 57/59] remove errantly added WIP code --- firewood/src/merkle/stream.rs | 40 ----------------------------------- 1 file changed, 40 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 8703c256d..2a1e5c7e9 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -464,20 +464,6 @@ mod tests { } } - // let mut merkle = create_test_merkle(); - // let root = merkle.init_root().unwrap(); - - // // Insert key-values in reverse order to ensure iterator - // // doesn't just return the keys in insertion order. - // for i in (0..=u8::MAX).rev() { - // for j in (0..=u8::MAX).rev() { - // let key = vec![i, j]; - // let value = vec![i, j]; - - // merkle.insert(key, value, root).unwrap(); - // } - // } - // Test with no start key let mut stream = merkle.iter(root); for i in 0..u8::MAX { @@ -495,32 +481,6 @@ mod tests { } } check_stream_is_done(stream).await; - - // Test with start key - for i in 0..=u8::MAX { - let mut stream = merkle.iter_from(root, vec![i].into_boxed_slice()); - for j in 0..=u8::MAX { - let expected_key = vec![i, j]; - let expected_value = vec![i, j]; - assert_eq!( - stream.next().await.unwrap().unwrap(), - (expected_key.into_boxed_slice(), expected_value), - "i: {}, j: {}", - i, - j, - ); - } - if i == u8::MAX { - check_stream_is_done(stream).await; - } else { - assert_eq!( - stream.next().await.unwrap().unwrap(), - (vec![i + 1, 0].into_boxed_slice(), vec![i + 1, 0]), - "i: {}", - i, - ); - } - } } #[tokio::test] From 60c6c39e7209ec0df4fe439f9bc4e8fea6ff083a Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 10:58:51 -0500 Subject: [PATCH 58/59] test cleanup --- firewood/src/merkle/stream.rs | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 2a1e5c7e9..80a38e23b 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -449,14 +449,14 @@ mod tests { } #[tokio::test] - async fn no_start_key() { + async fn table_test() { let mut merkle = create_test_merkle(); let root = merkle.init_root().unwrap(); // Insert key-values in reverse order to ensure iterator // doesn't just return the keys in insertion order. - for i in (0..u8::MAX).rev() { - for j in (0..u8::MAX).rev() { + for i in (0..=u8::MAX).rev() { + for j in (0..=u8::MAX).rev() { let key = vec![i, j]; let value = vec![i, j]; @@ -481,24 +481,8 @@ mod tests { } } check_stream_is_done(stream).await; - } - - #[tokio::test] - async fn with_start_key() { - let mut merkle = create_test_merkle(); - let root = merkle.init_root().unwrap(); - - // Insert key-values in reverse order to ensure iterator - // doesn't just return the keys in insertion order. - for i in (0..=u8::MAX).rev() { - for j in (0..=u8::MAX).rev() { - let key = vec![i, j]; - let value = vec![i, j]; - - merkle.insert(key, value, root).unwrap(); - } - } + // Test with start key for i in 0..=u8::MAX { let mut stream = merkle.iter_from(root, vec![i].into_boxed_slice()); for j in 0..=u8::MAX { From 25cbdbb17b302e05e6d4685bd577e2a2284dc7e3 Mon Sep 17 00:00:00 2001 From: Dan Laine Date: Fri, 2 Feb 2024 11:19:22 -0500 Subject: [PATCH 59/59] fix upper loop bound in test --- firewood/src/merkle/stream.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firewood/src/merkle/stream.rs b/firewood/src/merkle/stream.rs index 80a38e23b..f80276e04 100644 --- a/firewood/src/merkle/stream.rs +++ b/firewood/src/merkle/stream.rs @@ -466,8 +466,8 @@ mod tests { // Test with no start key let mut stream = merkle.iter(root); - for i in 0..u8::MAX { - for j in 0..u8::MAX { + for i in 0..=u8::MAX { + for j in 0..=u8::MAX { let expected_key = vec![i, j]; let expected_value = vec![i, j];