From 21d6108ff9dcfae87445d5886566a4f7e7f346b8 Mon Sep 17 00:00:00 2001 From: Austin Abell Date: Mon, 2 Nov 2020 12:28:04 -0500 Subject: [PATCH] Optimize statediff and fix hamt bug (#799) * Improve state diff efficiency for large sets * Fix bug in hamt and add diff line --- ipld/hamt/src/pointer.rs | 19 +++--- utils/statediff/src/lib.rs | 133 ++++++++++++++++++++++++------------- 2 files changed, 93 insertions(+), 59 deletions(-) diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index 81ffc480b51a..aa5e11b63075 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -126,17 +126,14 @@ where } 2..=MAX_ARRAY_WIDTH => { // If more child values than max width, nothing to change. - let children_len: usize = n - .pointers - .iter() - .filter_map(|p| { - if let Pointer::Values(vals) = p { - Some(vals.len()) - } else { - None - } - }) - .sum(); + let mut children_len = 0; + for c in n.pointers.iter() { + if let Pointer::Values(vals) = c { + children_len += vals.len(); + } else { + return Ok(()); + } + } if children_len > MAX_ARRAY_WIDTH { return Ok(()); } diff --git a/utils/statediff/src/lib.rs b/utils/statediff/src/lib.rs index 088daeca741d..f9deb0896de0 100644 --- a/utils/statediff/src/lib.rs +++ b/utils/statediff/src/lib.rs @@ -12,7 +12,7 @@ use ipld::json::{IpldJson, IpldJsonRef}; use ipld::Ipld; use ipld_hamt::{BytesKey, Hamt}; use serde::{Deserialize, Serialize}; -use std::collections::BTreeMap; +use std::collections::HashMap; use std::error::Error as StdError; use vm::ActorState; @@ -24,26 +24,31 @@ struct ActorStateResolved { state: IpldJson, } +fn actor_to_resolved( + bs: &impl BlockStore, + actor: &ActorState, + depth: Option, +) -> ActorStateResolved { + let resolved = resolve_cids_recursive(bs, &actor.state, depth) + .unwrap_or_else(|_| Ipld::Link(actor.state.clone())); + ActorStateResolved { + state: IpldJson(resolved), + code: CidJson(actor.code.clone()), + balance: actor.balance.to_string(), + sequence: actor.sequence, + } +} + fn root_to_state_map( bs: &BS, root: &Cid, - depth: Option, -) -> Result, Box> { - let mut actors = BTreeMap::new(); +) -> Result, Box> { + let mut actors = HashMap::default(); let hamt: Hamt<_, _> = Hamt::load_with_bit_width(root, bs, HAMT_BIT_WIDTH)?; hamt.for_each(|k: &BytesKey, actor: &ActorState| { let addr = Address::from_bytes(&k.0)?; - let resolved = resolve_cids_recursive(bs, &actor.state, depth) - .unwrap_or_else(|_| Ipld::Link(actor.state.clone())); - let resolved_state = ActorStateResolved { - state: IpldJson(resolved), - code: CidJson(actor.code.clone()), - balance: actor.balance.to_string(), - sequence: actor.sequence, - }; - - actors.insert(addr.to_string(), resolved_state); + actors.insert(addr, actor.clone()); Ok(()) })?; @@ -52,49 +57,54 @@ fn root_to_state_map( /// Tries to resolve state tree actors, if all data exists in store. /// The actors hamt is hard to parse in a diff, so this attempts to remedy this. -fn try_resolve_actor_states( +/// This function will only print the actors that are added, removed, or changed so it +/// can be used on large state trees. +fn try_print_actor_states( bs: &BS, root: &Cid, expected_root: &Cid, depth: Option, -) -> Result> { - let e_state = root_to_state_map(bs, expected_root, depth)?; - let c_state = root_to_state_map(bs, root, depth)?; +) -> Result<(), Box> { + // For now, resolving to a map, because we need to use go implementation's inefficient caching + // this would probably be faster in most cases. + let mut e_state = root_to_state_map(bs, expected_root)?; - let expected_json = serde_json::to_string_pretty(&e_state)?; - let actual_json = serde_json::to_string_pretty(&c_state)?; + // Compare state with expected + let hamt: Hamt<_, _> = Hamt::load_with_bit_width(root, bs, HAMT_BIT_WIDTH)?; + hamt.for_each(|k: &BytesKey, actor: &ActorState| { + let addr = Address::from_bytes(&k.0)?; - Ok(Changeset::new(&expected_json, &actual_json, "\n")) -} + let calc_json = serde_json::to_string_pretty(&actor_to_resolved(bs, actor, depth))?; -/// Prints a diff of the resolved state tree. -/// If the actor's Hamt cannot be loaded, base ipld resolution is given. -pub fn print_state_diff( - bs: &BS, - root: &Cid, - expected_root: &Cid, - depth: Option, -) -> Result<(), Box> -where - BS: BlockStore, -{ - let Changeset { diffs, .. } = match try_resolve_actor_states(bs, root, expected_root, depth) { - Ok(cs) => cs, - Err(e) => { - println!( - "Could not resolve actor states: {}\nUsing default resolution:", - e - ); - let expected = resolve_cids_recursive(bs, &expected_root, depth)?; - let actual = resolve_cids_recursive(bs, &root, depth)?; - - let expected_json = serde_json::to_string_pretty(&IpldJsonRef(&expected))?; - let actual_json = serde_json::to_string_pretty(&IpldJsonRef(&actual))?; - - Changeset::new(&expected_json, &actual_json, "\n") + if let Some(other) = e_state.remove(&addr) { + if &other != actor { + let expected_json = + serde_json::to_string_pretty(&actor_to_resolved(bs, &other, depth))?; + let Changeset { diffs, .. } = Changeset::new(&expected_json, &calc_json, "\n"); + println!("Address {} changed: ", addr); + print_diffs(&diffs); + } + } else { + // Added actor, print out the json format actor state. + println!("{}", format!("+ Address {}:\n{}", addr, calc_json).green()) } - }; + Ok(()) + })?; + + // Print all addresses that no longer have actor state + for (addr, state) in e_state.into_iter() { + let expected_json = serde_json::to_string_pretty(&actor_to_resolved(bs, &state, depth))?; + println!( + "{}", + format!("- Address {}:\n{}", addr, expected_json).red() + ); + } + + Ok(()) +} + +fn print_diffs(diffs: &[Difference]) { for diff in diffs.iter() { match diff { Difference::Same(x) => { @@ -108,6 +118,33 @@ where } } } +} + +/// Prints a diff of the resolved state tree. +/// If the actor's Hamt cannot be loaded, base ipld resolution is given. +pub fn print_state_diff( + bs: &BS, + root: &Cid, + expected_root: &Cid, + depth: Option, +) -> Result<(), Box> +where + BS: BlockStore, +{ + if let Err(e) = try_print_actor_states(bs, root, expected_root, depth) { + println!( + "Could not resolve actor states: {}\nUsing default resolution:", + e + ); + let expected = resolve_cids_recursive(bs, &expected_root, depth)?; + let actual = resolve_cids_recursive(bs, &root, depth)?; + + let expected_json = serde_json::to_string_pretty(&IpldJsonRef(&expected))?; + let actual_json = serde_json::to_string_pretty(&IpldJsonRef(&actual))?; + + let Changeset { diffs, .. } = Changeset::new(&expected_json, &actual_json, "\n"); + print_diffs(&diffs); + } Ok(()) }