Merge pull request #537 from Chia-Network/improve-fuzzer

address incremental-serializer fuzzer failures
Chia-Network · Jan 21, 2025 · 99e2c44 · 99e2c44
2 parents 4eccd03 + dd8aea5
commit 99e2c44
Show file tree

Hide file tree

Showing 7 changed files with 156 additions and 78 deletions.
diff --git a/fuzz/fuzz_targets/deserialize_br_rand_tree.rs b/fuzz/fuzz_targets/deserialize_br_rand_tree.rs
@@ -11,7 +11,7 @@ fuzz_target!(|data: &[u8]| {
     let mut allocator = Allocator::new();
     let mut unstructured = arbitrary::Unstructured::new(data);
 
-    let program = make_tree::make_tree(&mut allocator, &mut unstructured);
+    let (program, _) = make_tree::make_tree(&mut allocator, &mut unstructured);
 
     let b1 = node_to_bytes_backrefs(&allocator, program).unwrap();
 

diff --git a/fuzz/fuzz_targets/fuzzing_utils.rs b/fuzz/fuzz_targets/fuzzing_utils.rs
@@ -1,5 +1,7 @@
 use chia_sha2::Sha256;
 use clvmr::allocator::{Allocator, NodePtr, SExp};
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
 
 #[allow(dead_code)]
 fn hash_atom(buf: &[u8]) -> [u8; 32] {
@@ -21,30 +23,43 @@ fn hash_pair(left: &[u8; 32], right: &[u8; 32]) -> [u8; 32] {
 #[allow(dead_code)]
 enum TreeOp {
     SExp(NodePtr),
-    Cons,
+    Cons(NodePtr),
 }
 
 #[allow(dead_code)]
 pub fn tree_hash(a: &Allocator, node: NodePtr) -> [u8; 32] {
-    let mut hashes = Vec::new();
+    let mut hashes = Vec::<[u8; 32]>::new();
     let mut ops = vec![TreeOp::SExp(node)];
+    let mut cache = HashMap::<NodePtr, [u8; 32]>::new();
 
     while let Some(op) = ops.pop() {
         match op {
-            TreeOp::SExp(node) => match a.sexp(node) {
-                SExp::Atom => {
-                    hashes.push(hash_atom(a.atom(node).as_ref()));
-                }
-                SExp::Pair(left, right) => {
-                    ops.push(TreeOp::Cons);
-                    ops.push(TreeOp::SExp(left));
-                    ops.push(TreeOp::SExp(right));
-                }
+            TreeOp::SExp(node) => match cache.entry(node) {
+                Entry::Occupied(e) => hashes.push(*e.get()),
+                Entry::Vacant(e) => match a.sexp(node) {
+                    SExp::Atom => {
+                        let hash = hash_atom(a.atom(node).as_ref());
+                        e.insert(hash);
+                        hashes.push(hash);
+                    }
+                    SExp::Pair(left, right) => {
+                        ops.push(TreeOp::Cons(node));
+                        ops.push(TreeOp::SExp(left));
+                        ops.push(TreeOp::SExp(right));
+                    }
+                },
             },
-            TreeOp::Cons => {
+            TreeOp::Cons(node) => {
                 let first = hashes.pop().unwrap();
                 let rest = hashes.pop().unwrap();
-                hashes.push(hash_pair(&first, &rest));
+                match cache.entry(node) {
+                    Entry::Occupied(e) => hashes.push(*e.get()),
+                    Entry::Vacant(e) => {
+                        let hash = hash_pair(&first, &rest);
+                        e.insert(hash);
+                        hashes.push(hash);
+                    }
+                }
             }
         }
     }

diff --git a/fuzz/fuzz_targets/incremental_serializer.rs b/fuzz/fuzz_targets/incremental_serializer.rs
@@ -1,16 +1,18 @@
 #![no_main]
 
 mod make_tree;
+mod node_eq;
 
-use clvmr::serde::{node_from_bytes_backrefs, node_to_bytes, Serializer};
+use clvmr::serde::{node_from_bytes_backrefs, Serializer};
 use clvmr::{Allocator, NodePtr, SExp};
 use make_tree::make_tree_limits;
+use std::collections::HashMap;
 
 use libfuzzer_sys::fuzz_target;
 
 enum TreeOp {
     SExp(NodePtr),
-    Cons,
+    Cons(NodePtr),
 }
 
 // returns the new root (with a sentinel) as well as the sub-tree under the
@@ -30,6 +32,7 @@ fn insert_sentinel(
     let mut copy = Vec::new();
     let mut ops = vec![TreeOp::SExp(root)];
     let mut subtree: Option<NodePtr> = None;
+    let mut copied_nodes = HashMap::<NodePtr, NodePtr>::new();
 
     while let Some(op) = ops.pop() {
         match op {
@@ -44,22 +47,29 @@ fn insert_sentinel(
                     node_idx -= 1;
                     continue;
                 }
-                node_idx -= 1;
                 match a.sexp(node) {
                     SExp::Atom => {
+                        node_idx -= 1;
                         copy.push(node);
                     }
                     SExp::Pair(left, right) => {
-                        ops.push(TreeOp::Cons);
-                        ops.push(TreeOp::SExp(left));
-                        ops.push(TreeOp::SExp(right));
+                        if let Some(copied_node) = copied_nodes.get(&node) {
+                            copy.push(*copied_node);
+                        } else {
+                            node_idx -= 1;
+                            ops.push(TreeOp::Cons(node));
+                            ops.push(TreeOp::SExp(left));
+                            ops.push(TreeOp::SExp(right));
+                        }
                     }
                 }
             }
-            TreeOp::Cons => {
+            TreeOp::Cons(node) => {
                 let left = copy.pop().unwrap();
                 let right = copy.pop().unwrap();
-                copy.push(a.new_pair(left, right).unwrap());
+                let new_node = a.new_pair(left, right).unwrap();
+                copy.push(new_node);
+                copied_nodes.insert(node, new_node);
             }
         }
     }
@@ -81,22 +91,22 @@ fuzz_target!(|data: &[u8]| {
     let mut allocator = Allocator::new();
 
     // since we copy the tree, we must limit the number of pairs created, to not
-    // exceed the limit of the Allocator
-    let program = make_tree_limits(&mut allocator, &mut unstructured, 10_000_000, 10_000_000);
+    // exceed the limit of the Allocator. Since we run this test for every node
+    // in the resulting tree, a tree being too large causes the fuzzer to
+    // time-out.
+    let (program, node_count) = make_tree_limits(&mut allocator, &mut unstructured, 600_000, false);
 
     // this just needs to be a unique NodePtr, that won't appear in the tree
     let sentinel = allocator.new_pair(NodePtr::NIL, NodePtr::NIL).unwrap();
 
     let checkpoint = allocator.checkpoint();
     // count up intil we've used every node as the sentinel/cut-point
-    let mut node_idx = 0;
+    let node_idx = unstructured.int_in_range(0..=node_count).unwrap_or(5) as i32;
 
     // try to put the sentinel in all positions, to get full coverage
-    while let Some((first_step, second_step)) =
+    if let Some((first_step, second_step)) =
         insert_sentinel(&mut allocator, program, node_idx, sentinel)
     {
-        node_idx += 1;
-
         let mut ser = Serializer::new(Some(sentinel));
         let (done, _) = ser.add(&allocator, first_step).unwrap();
         assert!(!done);
@@ -106,11 +116,7 @@ fuzz_target!(|data: &[u8]| {
         // now, make sure that we deserialize to the exact same structure, by
         // comparing the uncompressed form
         let roundtrip = node_from_bytes_backrefs(&mut allocator, ser.get_ref()).unwrap();
-        let b1 = node_to_bytes(&allocator, roundtrip).unwrap();
-
-        let b2 = node_to_bytes(&allocator, program).unwrap();
-
-        assert_eq!(&hex::encode(&b1), &hex::encode(&b2));
+        assert!(node_eq::node_eq(&allocator, program, roundtrip));
 
         // free the memory used by the last iteration from the allocator,
         // otherwise we'll exceed the Allocator limits eventually

diff --git a/fuzz/fuzz_targets/make_tree.rs b/fuzz/fuzz_targets/make_tree.rs
@@ -17,21 +17,24 @@ enum NodeType {
 }
 
 #[allow(dead_code)]
-pub fn make_tree(a: &mut Allocator, unstructured: &mut Unstructured) -> NodePtr {
-    make_tree_limits(a, unstructured, 60_000_000, 60_000_000)
+pub fn make_tree(a: &mut Allocator, unstructured: &mut Unstructured) -> (NodePtr, u32) {
+    make_tree_limits(a, unstructured, 600_000, true)
 }
 
+/// returns an arbitrary CLVM tree structure and the number of (unique) nodes
+/// it's made up of. That's both pairs and atoms.
 pub fn make_tree_limits(
     a: &mut Allocator,
     unstructured: &mut Unstructured,
-    mut max_pairs: i64,
-    mut max_atoms: i64,
-) -> NodePtr {
+    mut max_nodes: i64,
+    reuse_nodes: bool,
+) -> (NodePtr, u32) {
     let mut previous_nodes = Vec::<NodePtr>::new();
     let mut value_stack = Vec::<NodePtr>::new();
     let mut op_stack = vec![Op::SubTree];
     // the number of Op::SubTree items on the op_stack
     let mut sub_trees: i64 = 1;
+    let mut counter = 0;
 
     while let Some(op) = op_stack.pop() {
         match op {
@@ -43,6 +46,7 @@ pub fn make_tree_limits(
                 } else {
                     a.new_pair(right, left).expect("out of memory (pair)")
                 };
+                counter += 1;
                 value_stack.push(pair);
                 previous_nodes.push(pair);
             }
@@ -54,17 +58,16 @@ pub fn make_tree_limits(
                     match unstructured.arbitrary::<NodeType>() {
                         Err(..) => value_stack.push(NodePtr::NIL),
                         Ok(NodeType::Pair) => {
-                            if sub_trees > unstructured.len() as i64
-                                || max_pairs <= 0
-                                || max_atoms <= 0
-                            {
+                            if sub_trees > unstructured.len() as i64 || max_nodes <= 0 {
                                 // there isn't much entropy left, don't grow the
                                 // tree anymore
-                                value_stack.push(
+                                value_stack.push(if reuse_nodes {
                                     *unstructured
                                         .choose(&previous_nodes)
-                                        .unwrap_or(&NodePtr::NIL),
-                                );
+                                        .unwrap_or(&NodePtr::NIL)
+                                } else {
+                                    NodePtr::NIL
+                                });
                             } else {
                                 // swap left and right arbitrarily, to avoid
                                 // having a bias because we build the tree depth
@@ -74,11 +77,11 @@ pub fn make_tree_limits(
                                 op_stack.push(Op::SubTree);
                                 op_stack.push(Op::SubTree);
                                 sub_trees += 2;
-                                max_pairs -= 1;
-                                max_atoms -= 2;
+                                max_nodes -= 2;
                             }
                         }
                         Ok(NodeType::Bytes) => {
+                            counter += 1;
                             value_stack.push(match unstructured.arbitrary::<Vec<u8>>() {
                                 Err(..) => NodePtr::NIL,
                                 Ok(val) => {
@@ -89,6 +92,7 @@ pub fn make_tree_limits(
                             });
                         }
                         Ok(NodeType::U8) => {
+                            counter += 1;
                             value_stack.push(match unstructured.arbitrary::<u8>() {
                                 Err(..) => NodePtr::NIL,
                                 Ok(val) => a
@@ -97,6 +101,7 @@ pub fn make_tree_limits(
                             });
                         }
                         Ok(NodeType::U16) => {
+                            counter += 1;
                             value_stack.push(match unstructured.arbitrary::<u16>() {
                                 Err(..) => NodePtr::NIL,
                                 Ok(val) => a
@@ -105,23 +110,29 @@ pub fn make_tree_limits(
                             });
                         }
                         Ok(NodeType::U32) => {
+                            counter += 1;
                             value_stack.push(match unstructured.arbitrary::<u32>() {
                                 Err(..) => NodePtr::NIL,
                                 Ok(val) => a.new_number(val.into()).expect("out of memory (atom)"),
                             });
                         }
                         Ok(NodeType::Previous) => {
-                            value_stack.push(
+                            value_stack.push(if reuse_nodes {
                                 *unstructured
                                     .choose(&previous_nodes)
-                                    .unwrap_or(&NodePtr::NIL),
-                            );
+                                    .unwrap_or(&NodePtr::NIL)
+                            } else {
+                                NodePtr::NIL
+                            });
                         }
                     }
                 }
             }
         }
     }
     assert_eq!(value_stack.len(), 1);
-    *value_stack.last().expect("internal error, empty stack")
+    (
+        *value_stack.last().expect("internal error, empty stack"),
+        counter,
+    )
 }
diff --git a/fuzz/fuzz_targets/node_eq.rs b/fuzz/fuzz_targets/node_eq.rs
@@ -1,12 +1,17 @@
 use clvmr::{Allocator, NodePtr, SExp};
+use std::collections::HashSet;
 
 /// compare two CLVM trees. Returns true if they are identical, false otherwise
 pub fn node_eq(allocator: &Allocator, lhs: NodePtr, rhs: NodePtr) -> bool {
     let mut stack = vec![(lhs, rhs)];
+    let mut visited = HashSet::<NodePtr>::new();
 
     while let Some((l, r)) = stack.pop() {
         match (allocator.sexp(l), allocator.sexp(r)) {
             (SExp::Pair(ll, lr), SExp::Pair(rl, rr)) => {
+                if !visited.insert(l) {
+                    continue;
+                }
                 stack.push((lr, rr));
                 stack.push((ll, rl));
             }