From 186a386ade38c4818e29c7b37a174c92e8d5aff8 Mon Sep 17 00:00:00 2001 From: Micah Date: Thu, 25 Jan 2024 11:21:27 -0800 Subject: [PATCH] Add function for doing both hash types at once, use it --- src/snapshot/hash/mod.rs | 81 +++++++++++++++++++----------- src/snapshot_middleware/project.rs | 6 +-- src/syncback/mod.rs | 28 +++++------ src/syncback/snapshot.rs | 39 ++++++++++++-- 4 files changed, 104 insertions(+), 50 deletions(-) diff --git a/src/snapshot/hash/mod.rs b/src/snapshot/hash/mod.rs index 82f17a295..d9c87f67b 100644 --- a/src/snapshot/hash/mod.rs +++ b/src/snapshot/hash/mod.rs @@ -12,6 +12,42 @@ use std::collections::{HashMap, VecDeque}; use crate::variant_eq::variant_eq; +/// Returns a map of hashes for every Instance contained in the DOM. +/// Hashes are mapped to the Instance's referent. +/// +/// The first hash in each tuple is the Instance's hash with no descendants, +/// the second is one using descendants. +pub fn hash_tree_both(dom: &WeakDom) -> HashMap { + let mut map: HashMap = HashMap::new(); + let mut order = descendants(dom); + + let mut prop_list = Vec::with_capacity(2); + + while let Some(referent) = order.pop() { + let inst = dom.get_by_ref(referent).unwrap(); + let mut hasher = hash_inst_no_descendants(inst, &mut prop_list); + let no_descendants = hasher.finalize(); + + let mut child_list = Vec::with_capacity(inst.children().len()); + + for child in inst.children() { + if let Some((_, descendant)) = map.get(child) { + child_list.push(descendant.as_bytes()) + } else { + panic!("Invariant: child {} not hashed before its parent", child); + } + } + child_list.sort_unstable(); + for hash in child_list { + hasher.update(hash); + } + + map.insert(referent, (no_descendants, hasher.finalize())); + } + + map +} + /// Returns a map of every `Ref` in the `WeakDom` to a hashed version of the /// `Instance` it points to, including the properties but not including the /// descendants of the Instance. @@ -50,9 +86,22 @@ pub fn hash_tree(dom: &WeakDom) -> HashMap { while let Some(referent) = order.pop() { let inst = dom.get_by_ref(referent).unwrap(); - let hash = hash_inst(inst, &mut prop_list, &map); + let mut hasher = hash_inst_no_descendants(inst, &mut prop_list); - map.insert(referent, hash); + let mut child_list = Vec::with_capacity(inst.children().len()); + for child in inst.children() { + if let Some(hash) = map.get(child) { + child_list.push(hash.as_bytes()) + } else { + panic!("Invariant: child {} not hashed before its parent", child); + } + } + child_list.sort_unstable(); + for hash in child_list { + hasher.update(hash); + } + + map.insert(referent, hasher.finalize()); } map @@ -94,34 +143,6 @@ fn hash_inst_no_descendants<'inst>( hasher } -/// Hashes an Instance using its class, name, properties, and descendants. -/// The passed `prop_list` is used to sort properties before hashing them. -/// -/// # Panics -/// If any children of the Instance are inside `map`, this function will panic. -fn hash_inst<'inst>( - inst: &'inst Instance, - prop_list: &mut Vec<(&'inst str, &'inst Variant)>, - map: &HashMap, -) -> Hash { - let mut hasher = hash_inst_no_descendants(inst, prop_list); - let mut child_list = Vec::with_capacity(inst.children().len()); - - for child in inst.children() { - if let Some(hash) = map.get(child) { - child_list.push(hash.as_bytes()) - } else { - panic!("Invariant: child {} not hashed before its parent", child); - } - } - child_list.sort_unstable(); - for hash in child_list { - hasher.update(hash); - } - - hasher.finalize() -} - pub(crate) fn descendants(dom: &WeakDom) -> Vec { let mut queue = VecDeque::new(); let mut ordered = Vec::new(); diff --git a/src/snapshot_middleware/project.rs b/src/snapshot_middleware/project.rs index a40e8195a..d7295ad28 100644 --- a/src/snapshot_middleware/project.rs +++ b/src/snapshot_middleware/project.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, collections::HashMap, path::Path}; +use std::{borrow::Cow, collections::HashMap, path::Path, rc::Rc}; use anyhow::{bail, Context}; use memofs::Vfs; @@ -408,7 +408,7 @@ pub fn syncback_project<'new, 'old>( if let Some(old_inst) = old_child_map.get(new_name.as_str()) { // This new instance represents an older one! children.push(SyncbackSnapshot { - data: snapshot.data, + data: Rc::clone(&snapshot.data), old: Some(old_inst.id()), new: new_child.referent(), parent_path, @@ -418,7 +418,7 @@ pub fn syncback_project<'new, 'old>( } else { // This new instance is... new. children.push(SyncbackSnapshot { - data: snapshot.data, + data: Rc::clone(&snapshot.data), old: None, new: new_child.referent(), parent_path, diff --git a/src/syncback/mod.rs b/src/syncback/mod.rs index 78e433e9c..30ab75901 100644 --- a/src/syncback/mod.rs +++ b/src/syncback/mod.rs @@ -6,11 +6,14 @@ use anyhow::Context; use memofs::Vfs; use rbx_dom_weak::{types::Ref, Instance, WeakDom}; use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, VecDeque}; +use std::{ + collections::{HashMap, VecDeque}, + rc::Rc, +}; use crate::{ glob::Glob, - snapshot::{hash_tree, InstanceSnapshot, InstanceWithMeta, RojoTree}, + snapshot::{hash_tree_both, InstanceSnapshot, InstanceWithMeta, RojoTree}, snapshot_middleware::Middleware, Project, }; @@ -26,18 +29,20 @@ pub fn syncback_loop<'old>( project: &'old Project, ) -> anyhow::Result> { log::debug!("Hashing project DOM"); - let old_hashes = hash_tree(old_tree.inner()); + let old_hashes = hash_tree_both(old_tree.inner()); log::debug!("Hashing file DOM"); - let new_hashes = hash_tree(new_tree); + let new_hashes = hash_tree_both(new_tree); let project_path = project.folder_location(); - let syncback_data = SyncbackData { + let syncback_data = Rc::new(SyncbackData { vfs, old_tree, new_tree, syncback_rules: project.syncback_rules.as_ref(), - }; + old_hashes, + new_hashes, + }); let mut snapshots = vec![SyncbackSnapshot { data: syncback_data, @@ -54,14 +59,9 @@ pub fn syncback_loop<'old>( let inst_path = get_inst_path(new_tree, snapshot.new); // We can quickly check that two subtrees are identical and if they are, // skip reconciling them. - if let Some(old_ref) = snapshot.old { - if old_hashes.get(&old_ref) == new_hashes.get(&snapshot.new) { - log::trace!( - "Skipping {inst_path} due to it being identically hashed as {:?}", - old_hashes.get(&old_ref) - ); - continue; - } + if snapshot.trees_are_deep_eq() { + log::trace!("Skipping {inst_path} due to it being identically hashed"); + continue; } let middleware = snapshot diff --git a/src/syncback/snapshot.rs b/src/syncback/snapshot.rs index e4551ed1c..d2ebf747e 100644 --- a/src/syncback/snapshot.rs +++ b/src/syncback/snapshot.rs @@ -1,7 +1,9 @@ +use blake3::Hash; use memofs::Vfs; use std::{ collections::{BTreeSet, HashMap}, path::{Path, PathBuf}, + rc::Rc, sync::OnceLock, }; @@ -20,16 +22,17 @@ use super::SyncbackRules; /// A glob that can be used to tell if a path contains a `.git` folder. static GIT_IGNORE_GLOB: OnceLock = OnceLock::new(); -#[derive(Clone, Copy)] pub struct SyncbackData<'new, 'old> { pub(super) vfs: &'old Vfs, pub(super) old_tree: &'old RojoTree, pub(super) new_tree: &'new WeakDom, pub(super) syncback_rules: Option<&'old SyncbackRules>, + pub(super) new_hashes: HashMap, + pub(super) old_hashes: HashMap, } pub struct SyncbackSnapshot<'new, 'old> { - pub data: SyncbackData<'new, 'old>, + pub data: Rc>, pub old: Option, pub new: Ref, pub parent_path: PathBuf, @@ -42,7 +45,7 @@ impl<'new, 'old> SyncbackSnapshot<'new, 'old> { #[inline] pub fn with_parent(&self, new_name: String, new_ref: Ref, old_ref: Option) -> Self { Self { - data: self.data, + data: Rc::clone(&self.data), old: old_ref, new: new_ref, parent_path: self.parent_path.join(&self.name), @@ -181,6 +184,36 @@ impl<'new, 'old> SyncbackSnapshot<'new, 'old> { true } + /// Returns whether two Instances are equal, using their hash for O(1) + /// comparisons. This comparison **does not** take descendants into account. + #[inline] + pub fn trees_are_eq(&self) -> bool { + let old_hashes = self + .old + .and_then(|referent| self.data.old_hashes.get(&referent)); + let new_hashes = self.data.new_hashes.get(&self.new); + + match (old_hashes, new_hashes) { + (Some((old_hash, _)), Some((new_hash, _))) => old_hash == new_hash, + _ => false, + } + } + + /// Returns whether two Instance trees are equal, using their hash for O(1) + /// comparisons. This comparison **does** take descendants into account. + #[inline] + pub fn trees_are_deep_eq(&self) -> bool { + let old_hashes = self + .old + .and_then(|referent| self.data.old_hashes.get(&referent)); + let new_hashes = self.data.new_hashes.get(&self.new); + + match (old_hashes, new_hashes) { + (Some((_, old_hash)), Some((_, new_hash))) => old_hash == new_hash, + _ => false, + } + } + /// Returns an Instance from the old tree with the provided referent, if it /// exists. #[inline]