From 9865b8869227edbfff54af94f42665d73eabe23f Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 10 Sep 2024 09:10:45 +0200 Subject: [PATCH 01/18] wip parallel materialize --- raphtory/src/db/api/mutation/import_ops.rs | 5 +- .../graph/storage_ops/time_semantics.rs | 46 ++-- raphtory/src/db/api/view/edge.rs | 6 +- raphtory/src/db/api/view/graph.rs | 197 +++++++++++++----- .../db/api/view/internal/time_semantics.rs | 31 ++- raphtory/src/db/graph/views/deletion_graph.rs | 43 ++-- raphtory/src/db/graph/views/window_graph.rs | 18 +- 7 files changed, 238 insertions(+), 108 deletions(-) diff --git a/raphtory/src/db/api/mutation/import_ops.rs b/raphtory/src/db/api/mutation/import_ops.rs index 1c50cb7ba2..82a4f5f861 100644 --- a/raphtory/src/db/api/mutation/import_ops.rs +++ b/raphtory/src/db/api/mutation/import_ops.rs @@ -1,7 +1,5 @@ use std::borrow::Borrow; -use raphtory_api::core::storage::arc_str::OptionAsStr; - use crate::{ core::{ entities::LayerIds, @@ -21,6 +19,7 @@ use crate::{ }, prelude::{AdditionOps, EdgeViewOps, GraphViewOps, NodeViewOps}, }; +use raphtory_api::core::storage::{arc_str::OptionAsStr, timeindex::AsTime}; use super::time_from_input; @@ -213,7 +212,7 @@ impl< if self.include_deletions() { for t in edge.graph.edge_deletion_history(edge.edge, &layer_ids) { - let ti = time_from_input(self, t)?; + let ti = time_from_input(self, t.t())?; let src_id = self.resolve_node(edge.src().id())?.inner(); let dst_id = self.resolve_node(edge.dst().id())?.inner(); let layer = self.resolve_layer(layer_name)?.inner(); diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 00fc9d0032..da5d381138 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -7,10 +7,12 @@ use raphtory_api::core::{ }; use rayon::iter::ParallelIterator; +use super::GraphStorage; use crate::{ core::{ entities::LayerIds, storage::timeindex::{TimeIndexIntoOps, TimeIndexOps}, + utils::iter::GenLockedIter, }, db::api::{ storage::graph::{ @@ -23,14 +25,12 @@ use crate::{ }, view::{ internal::{CoreGraphOps, TimeSemantics}, - BoxedIter, + BoxedIter, BoxedLIter, IntoDynBoxed, }, }, prelude::Prop, }; -use super::GraphStorage; - impl TimeSemantics for GraphStorage { fn node_earliest_time(&self, v: VID) -> Option { self.node_entry(v).additions().first_t() @@ -240,27 +240,37 @@ impl TimeSemantics for GraphStorage { } } - fn edge_deletion_history(&self, e: EdgeRef, layer_ids: &LayerIds) -> Vec { + fn edge_deletion_history<'a>( + &'a self, + e: EdgeRef, + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { let entry = self.core_edge(e.into()); - entry - .deletions_iter(layer_ids) - .map(|(_, d)| d.into_iter_t()) - .kmerge() - .collect() + GenLockedIter::from(entry, |entry| { + entry + .deletions_iter(layer_ids) + .map(|(_, d)| d.into_iter()) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed() } - fn edge_deletion_history_window( - &self, + fn edge_deletion_history_window<'a>( + &'a self, e: EdgeRef, w: Range, - layer_ids: &LayerIds, - ) -> Vec { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { let entry = self.core_edge(e.into()); - entry - .deletions_iter(layer_ids) - .map(|(_, d)| d.into_range_t(w.clone()).into_iter_t()) - .kmerge() - .collect() + GenLockedIter::from(entry, |entry| { + entry + .deletions_iter(layer_ids) + .map(|(_, d)| d.into_range_t(w.clone()).into_iter()) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed() } fn edge_is_valid(&self, _e: EdgeRef, _layer_ids: &LayerIds) -> bool { diff --git a/raphtory/src/db/api/view/edge.rs b/raphtory/src/db/api/view/edge.rs index c0a0b2d8ac..2cb8367d76 100644 --- a/raphtory/src/db/api/view/edge.rs +++ b/raphtory/src/db/api/view/edge.rs @@ -155,7 +155,11 @@ impl<'graph, E: BaseEdgeViewOps<'graph>> EdgeViewOps<'graph> for E { } fn deletions(&self) -> Self::ValueType> { - self.map(move |g, e| g.edge_deletion_history(e, &g.layer_ids().constrain_from_edge(e))) + self.map(move |g, e| { + g.edge_deletion_history(e, &g.layer_ids().constrain_from_edge(e)) + .map(|t| t.t()) + .collect() + }) } fn deletions_date_time(&self) -> Self::ValueType>>> { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index a77c3373d7..6a517e7215 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -28,7 +28,10 @@ use crate::{ }; use chrono::{DateTime, Utc}; use itertools::Itertools; -use raphtory_api::core::storage::arc_str::{ArcStr, OptionAsStr}; +use raphtory_api::core::{ + entities::EID, + storage::arc_str::{ArcStr, OptionAsStr}, +}; use rayon::prelude::*; use rustc_hash::FxHashSet; use std::{borrow::Borrow, sync::Arc}; @@ -135,74 +138,164 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> } fn materialize(&self) -> Result { + let storage = self.core_graph().lock(); let g = GraphStorage::default(); + + let layer_map: Vec<_> = match self.layer_ids() { + LayerIds::None => { + return Ok(self.new_base_graph(g)); + } + LayerIds::All => { + let mut layer_map = vec![0; self.unfiltered_num_layers()]; + let layers = storage.edge_meta().layer_meta().get_keys(); + for id in (1..layers.len()) { + let new_id = g.resolve_layer(Some(&layers[id]))?.inner(); + layer_map[id] = new_id; + } + layer_map + } + LayerIds::One(l_id) => { + let mut layer_map = vec![0; self.unfiltered_num_layers()]; + if *l_id > 0 { + let new_id = + g.resolve_layer(Some(&storage.edge_meta().get_layer_name_by_id(*l_id)))?; + layer_map[*l_id] = new_id.inner(); + } + layer_map + } + LayerIds::Multiple(ids) => { + let mut layer_map = vec![0; self.unfiltered_num_layers()]; + let ids = if ids[0] == 0 { &ids[1..] } else { ids }; + let layers = storage.edge_meta().layer_meta().get_keys(); + for id in ids { + let new_id = g.resolve_layer(Some(&layers[*id]))?.inner(); + layer_map[*id] = new_id; + } + layer_map + } + }; + + let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; + for node in self.nodes().iter() { + let new_id = g.resolve_node(node)?; + let VID(old_index) = node.node; + node_map[old_index] = new_id.inner(); + } + let earliest = if let Some(earliest) = self.earliest_time() { earliest } else { return Ok(self.new_base_graph(g)); }; - // make sure we preserve all layers even if they are empty - // skip default layer - for layer in self.unique_layers().skip(1) { - g.resolve_layer(Some(&layer))?; - } - // Add edges first so we definitely have all associated nodes (important in case of persistent edges) - for e in self.edges() { - // FIXME: this needs to be verified - for ee in e.explode_layers() { - let layer_id = *ee.edge.layer().expect("exploded layers"); - let layer_ids = LayerIds::One(layer_id); - let layer_name = self.get_layer_name(layer_id); - let layer_name: Option<&str> = if layer_id == 0 { - None - } else { - Some(&layer_name) - }; - - for ee in ee.explode() { - g.add_edge( - ee.time().expect("exploded edge"), - ee.src().id(), - ee.dst().id(), - ee.properties().temporal().collect_properties(), - layer_name, - )?; + { + // scope for the write lock + let mut new_storage = g.write_lock()?; + new_storage.edges.par_iter_mut().try_for_each(|mut shard| { + for (eid, edge) in self.edges().iter().enumerate() { + if let Some(mut new_edge) = shard.get_mut(EID(eid)) { + let edge_store = new_edge.edge_store_mut(); + edge_store.src = node_map[edge.edge.src().index()]; + edge_store.dst = node_map[edge.edge.dst().index()]; + edge_store.eid = EID(eid); + for e in edge.explode() { + let t = e.edge.time().unwrap(); + let layer = layer_map[*e.edge.layer().unwrap()]; + let edge_additions = new_edge.additions_mut(layer); + edge_additions.insert(e.edge.time().unwrap()); + let t_props = e.properties().temporal(); + let mut props_iter = t_props.iter_latest().peekable(); + if props_iter.peek().is_some() { + let mut edge_layer = new_edge.layer_mut(layer); + for (prop_name, prop_value) in props_iter { + let prop_id = g + .resolve_edge_property( + &prop_name, + prop_value.dtype(), + false, + )? + .inner(); + edge_layer.add_prop(t, prop_id, prop_value)?; + } + } + } + if self.include_deletions() { + for e in edge.explode_layers() { + let layer = *e.edge.layer().unwrap(); + let layer_ids = LayerIds::One(layer); + let mut deletion_history = + self.edge_deletion_history(edge.edge, &layer_ids).peekable(); + if deletion_history.peek().is_some() { + let mut edge_deletions = + new_edge.deletions_mut(layer_map[layer]); + for t in deletion_history { + edge_deletions.insert(t); + } + } + } + } + } } + Ok::<(), GraphError>(()) + })?; + + // Add edges first so we definitely have all associated nodes (important in case of persistent edges) + for e in self.edges() { + // FIXME: this needs to be verified + for ee in e.explode_layers() { + let layer_id = *ee.edge.layer().expect("exploded layers"); + let layer_ids = LayerIds::One(layer_id); + let layer_name = self.get_layer_name(layer_id); + let layer_name: Option<&str> = if layer_id == 0 { + None + } else { + Some(&layer_name) + }; + + for ee in ee.explode() { + g.add_edge( + ee.time().expect("exploded edge"), + ee.src().id(), + ee.dst().id(), + ee.properties().temporal().collect_properties(), + layer_name, + )?; + } - if self.include_deletions() { - for t in self.edge_deletion_history(e.edge, &layer_ids) { - g.delete_edge(t, e.src().id(), e.dst().id(), layer_name)?; + if self.include_deletions() { + for t in self.edge_deletion_history(e.edge, &layer_ids) { + g.delete_edge(t.t(), e.src().id(), e.dst().id(), layer_name)?; + } } - } - g.edge(ee.src().id(), ee.dst().id()) - .expect("edge added") - .add_constant_properties(ee.properties().constant(), layer_name)?; + g.edge(ee.src().id(), ee.dst().id()) + .expect("edge added") + .add_constant_properties(ee.properties().constant(), layer_name)?; + } } - } - for v in self.nodes().iter() { - let v_type_string = v.node_type(); //stop it being dropped - let v_type_str = v_type_string.as_str(); - for h in v.history() { - g.add_node(h, v.id(), NO_PROPS, v_type_str)?; - } - for (name, prop_view) in v.properties().temporal().iter() { - for (t, prop) in prop_view.iter() { - g.add_node(t, v.id(), [(name.clone(), prop)], v_type_str)?; + for v in self.nodes().iter() { + let v_type_string = v.node_type(); //stop it being dropped + let v_type_str = v_type_string.as_str(); + for h in v.history() { + g.add_node(h, v.id(), NO_PROPS, v_type_str)?; + } + for (name, prop_view) in v.properties().temporal().iter() { + for (t, prop) in prop_view.iter() { + g.add_node(t, v.id(), [(name.clone(), prop)], v_type_str)?; + } } - } - let node = match g.node(v.id()) { - Some(node) => node, - None => g.add_node(earliest, v.id(), NO_PROPS, v_type_str)?, - }; + let node = match g.node(v.id()) { + Some(node) => node, + None => g.add_node(earliest, v.id(), NO_PROPS, v_type_str)?, + }; - node.add_constant_properties(v.properties().constant())?; - } + node.add_constant_properties(v.properties().constant())?; + } - g.add_constant_properties(self.properties().constant())?; + g.add_constant_properties(self.properties().constant())?; + } Ok(self.new_base_graph(g)) } diff --git a/raphtory/src/db/api/view/internal/time_semantics.rs b/raphtory/src/db/api/view/internal/time_semantics.rs index be398c5b20..72ec4badf4 100644 --- a/raphtory/src/db/api/view/internal/time_semantics.rs +++ b/raphtory/src/db/api/view/internal/time_semantics.rs @@ -5,10 +5,11 @@ use crate::{ }, db::api::{ storage::graph::{edges::edge_ref::EdgeStorageRef, nodes::node_ref::NodeStorageRef}, - view::{internal::Base, BoxedIter, MaterializedGraph}, + view::{internal::Base, BoxedIter, BoxedLIter, MaterializedGraph}, }, }; use enum_dispatch::enum_dispatch; +use raphtory_api::core::storage::timeindex::TimeIndexEntry; use std::ops::Range; /// Methods for defining time windowing semantics for a graph @@ -118,15 +119,19 @@ pub trait TimeSemantics { ) -> Option; /// Get the edge deletions for use with materialize - fn edge_deletion_history(&self, e: EdgeRef, layer_ids: &LayerIds) -> Vec; + fn edge_deletion_history<'a>( + &'a self, + e: EdgeRef, + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry>; /// Get the edge deletions for use with materialize restricted to window `w` - fn edge_deletion_history_window( - &self, + fn edge_deletion_history_window<'a>( + &'a self, e: EdgeRef, w: Range, - layer_ids: &LayerIds, - ) -> Vec; + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry>; /// Check if edge `e` is currently valid in any layer included in `layer_ids` fn edge_is_valid(&self, e: EdgeRef, layer_ids: &LayerIds) -> bool; @@ -481,17 +486,21 @@ impl TimeSemantics for G { } #[inline] - fn edge_deletion_history(&self, e: EdgeRef, layer_ids: &LayerIds) -> Vec { + fn edge_deletion_history<'a>( + &'a self, + e: EdgeRef, + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { self.graph().edge_deletion_history(e, layer_ids) } #[inline] - fn edge_deletion_history_window( - &self, + fn edge_deletion_history_window<'a>( + &'a self, e: EdgeRef, w: Range, - layer_ids: &LayerIds, - ) -> Vec { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { self.graph().edge_deletion_history_window(e, w, layer_ids) } diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 907841c24d..a38aa2082e 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -2,6 +2,7 @@ use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, LayerIds, VID}, storage::timeindex::{AsTime, TimeIndexEntry, TimeIndexIntoOps, TimeIndexOps}, + utils::iter::GenLockedIter, Prop, }, db::{ @@ -17,7 +18,7 @@ use crate::{ }, storage::Storage, }, - view::{internal::*, BoxedIter, IntoDynBoxed}, + view::{internal::*, BoxedIter, BoxedLIter, IntoDynBoxed}, }, graph::graph::graph_equal, }, @@ -484,27 +485,37 @@ impl TimeSemantics for PersistentGraph { } } - fn edge_deletion_history(&self, e: EdgeRef, layer_ids: &LayerIds) -> Vec { + fn edge_deletion_history<'a>( + &'a self, + e: EdgeRef, + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { let entry = self.core_edge(e.into()); - entry - .deletions_iter(layer_ids) - .map(|(_, d)| d.into_iter_t()) - .kmerge() - .collect() + GenLockedIter::from(entry, |entry| { + entry + .deletions_iter(layer_ids) + .map(|(_, d)| d.into_iter()) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed() } - fn edge_deletion_history_window( - &self, + fn edge_deletion_history_window<'a>( + &'a self, e: EdgeRef, w: Range, - layer_ids: &LayerIds, - ) -> Vec { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { let entry = self.core_edge(e.into()); - entry - .deletions_iter(layer_ids) - .map(|(_, d)| d.into_range_t(w.clone()).into_iter_t()) - .kmerge() - .collect() + GenLockedIter::from(entry, |entry| { + entry + .deletions_iter(layer_ids) + .map(|(_, d)| d.into_range_t(w.clone()).into_iter()) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed() } fn edge_is_valid(&self, e: EdgeRef, layer_ids: &LayerIds) -> bool { diff --git a/raphtory/src/db/graph/views/window_graph.rs b/raphtory/src/db/graph/views/window_graph.rs index 2f7f44ea65..3f648701b7 100644 --- a/raphtory/src/db/graph/views/window_graph.rs +++ b/raphtory/src/db/graph/views/window_graph.rs @@ -54,7 +54,7 @@ use crate::{ Base, EdgeFilterOps, Immutable, InheritCoreOps, InheritLayerOps, InheritListOps, InheritMaterialize, NodeFilterOps, Static, TimeSemantics, }, - BoxedIter, + BoxedIter, BoxedLIter, }, }, graph::graph::graph_equal, @@ -62,7 +62,7 @@ use crate::{ prelude::GraphViewOps, }; use chrono::{DateTime, Utc}; -use raphtory_api::core::storage::arc_str::ArcStr; +use raphtory_api::core::storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}; use std::{ fmt::{Debug, Formatter}, ops::Range, @@ -355,17 +355,21 @@ impl<'graph, G: GraphViewOps<'graph>> TimeSemantics for WindowedGraph { .edge_latest_time_window(e, w.start..w.end, layer_ids) } - fn edge_deletion_history(&self, e: EdgeRef, layer_ids: &LayerIds) -> Vec { + fn edge_deletion_history<'a>( + &'a self, + e: EdgeRef, + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { self.graph .edge_deletion_history_window(e, self.start_bound()..self.end_bound(), layer_ids) } - fn edge_deletion_history_window( - &self, + fn edge_deletion_history_window<'a>( + &'a self, e: EdgeRef, w: Range, - layer_ids: &LayerIds, - ) -> Vec { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, TimeIndexEntry> { self.graph .edge_deletion_history_window(e, w.start..w.end, layer_ids) } From b12efaec7cc6e9878fae619424578add7c7fded1 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 11 Sep 2024 11:50:43 +0200 Subject: [PATCH 02/18] working implementation --- .../motifs/temporal_rich_club_coefficient.rs | 14 +- .../core/entities/properties/graph_meta.rs | 2 +- raphtory/src/core/storage/lazy_vec.rs | 3 +- raphtory/src/core/storage/raw_edges.rs | 6 + raphtory/src/core/utils/errors.rs | 12 +- raphtory/src/db/api/properties/internal.rs | 7 +- .../src/db/api/properties/temporal_props.rs | 6 +- .../db/api/storage/graph/storage_ops/mod.rs | 19 ++ .../storage/graph/storage_ops/time_props.rs | 7 +- .../graph/storage_ops/time_semantics.rs | 83 ++++---- raphtory/src/db/api/view/graph.rs | 186 +++++++++++------- .../db/api/view/internal/time_semantics.rs | 57 +++--- raphtory/src/db/graph/edge.rs | 17 +- raphtory/src/db/graph/node.rs | 21 +- raphtory/src/db/graph/views/deletion_graph.rs | 82 ++++---- raphtory/src/db/graph/views/window_graph.rs | 43 ++-- raphtory/src/python/packages/algorithms.rs | 1 - 17 files changed, 355 insertions(+), 211 deletions(-) diff --git a/raphtory/src/algorithms/motifs/temporal_rich_club_coefficient.rs b/raphtory/src/algorithms/motifs/temporal_rich_club_coefficient.rs index 7be58a31d4..999c3bd6a0 100644 --- a/raphtory/src/algorithms/motifs/temporal_rich_club_coefficient.rs +++ b/raphtory/src/algorithms/motifs/temporal_rich_club_coefficient.rs @@ -1,17 +1,10 @@ +use crate::prelude::{EdgeViewOps, GraphViewOps, NodeViewOps}; +use raphtory_api::core::entities::VID; use std::{ - borrow::Borrow, - cmp::{self, max, min}, + cmp::{max, min}, collections::HashSet, - default, }; -use raphtory_api::core::entities::VID; -use rustc_hash::FxHashSet; - -use rayon::prelude::*; - -use crate::prelude::{EdgeViewOps, GraphViewOps, NodeViewOps}; - struct SlidingWindows { iter: I, window_size: usize, @@ -127,7 +120,6 @@ mod rich_club_test { algorithms::centrality::pagerank::page_rank_tests::assert_eq_f64, db::{api::mutation::AdditionOps, graph::graph::Graph}, prelude::{TimeOps, NO_PROPS}, - test_storage, }; fn load_graph(edges: Vec<(i64, u64, u64)>) -> Graph { diff --git a/raphtory/src/core/entities/properties/graph_meta.rs b/raphtory/src/core/entities/properties/graph_meta.rs index ae4afc4613..5f637f521a 100644 --- a/raphtory/src/core/entities/properties/graph_meta.rs +++ b/raphtory/src/core/entities/properties/graph_meta.rs @@ -131,7 +131,7 @@ impl GraphMeta { } pub fn get_temporal_dtype(&self, prop_id: usize) -> Option { - self.temporal.get(&prop_id).map(|v| v.dtype()) + self.temporal_mapper.get_dtype(prop_id) } pub(crate) fn constant_names(&self) -> ArcReadLockedVec { diff --git a/raphtory/src/core/storage/lazy_vec.rs b/raphtory/src/core/storage/lazy_vec.rs index 591738e7a2..f8a4b636f8 100644 --- a/raphtory/src/core/storage/lazy_vec.rs +++ b/raphtory/src/core/storage/lazy_vec.rs @@ -127,8 +127,7 @@ where None => { let mut value = A::default(); updater(&mut value)?; - self.set(id, value) - .map_err(|e| GraphError::IllegalSet(e.to_string()))?; + self.set(id, value)?; } }; Ok(()) diff --git a/raphtory/src/core/storage/raw_edges.rs b/raphtory/src/core/storage/raw_edges.rs index 71b8c02ec0..9c461403e9 100644 --- a/raphtory/src/core/storage/raw_edges.rs +++ b/raphtory/src/core/storage/raw_edges.rs @@ -150,6 +150,7 @@ impl EdgesStorage { pub fn write_lock(&self) -> WriteLockedEdges { WriteLockedEdges { shards: self.shards.iter().map(|shard| shard.write()).collect(), + global_len: &self.len, } } @@ -380,6 +381,7 @@ pub struct EdgeShardWriter<'a> { shard: &'a mut EdgeShard, shard_id: usize, num_shards: usize, + global_len: &'a AtomicUsize, } impl<'a> EdgeShardWriter<'a> { @@ -393,6 +395,7 @@ impl<'a> EdgeShardWriter<'a> { pub fn get_mut(&mut self, eid: EID) -> Option { let offset = self.resolve(eid)?; if self.shard.edge_ids.len() <= offset { + self.global_len.fetch_max(eid.0 + 1, Ordering::Relaxed); self.shard .edge_ids .resize_with(offset + 1, EdgeStore::default) @@ -410,6 +413,7 @@ impl<'a> EdgeShardWriter<'a> { pub struct WriteLockedEdges<'a> { shards: Vec>, + global_len: &'a AtomicUsize, } impl<'a> WriteLockedEdges<'a> { @@ -420,6 +424,7 @@ impl<'a> WriteLockedEdges<'a> { .iter_mut() .map(|shard| shard.deref_mut()) .collect(); + let global_len = self.global_len; shards .into_par_iter() .enumerate() @@ -427,6 +432,7 @@ impl<'a> WriteLockedEdges<'a> { shard, shard_id, num_shards, + global_len, }) } diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index 7bd6a7fb2c..0a094cc8b1 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -1,4 +1,6 @@ -use crate::core::{utils::time::error::ParseTimeError, Prop, PropType}; +use crate::core::{ + storage::lazy_vec::IllegalSet, utils::time::error::ParseTimeError, Prop, PropType, +}; #[cfg(feature = "arrow")] use polars_arrow::{datatypes::ArrowDataType, legacy::error}; #[cfg(feature = "storage")] @@ -9,7 +11,7 @@ use raphtory_api::core::{ entities::{GidType, GID}, storage::arc_str::ArcStr, }; -use std::{io, path::PathBuf}; +use std::{fmt::Debug, io, path::PathBuf}; #[cfg(feature = "search")] use tantivy; #[cfg(feature = "search")] @@ -263,6 +265,12 @@ impl GraphError { } } +impl From> for GraphError { + fn from(value: IllegalSet) -> Self { + Self::IllegalSet(value.to_string()) + } +} + #[derive(thiserror::Error, Debug, PartialEq)] pub enum MutateGraphError { #[error("Create node '{node_id}' first before adding static properties to it")] diff --git a/raphtory/src/db/api/properties/internal.rs b/raphtory/src/db/api/properties/internal.rs index 51ad02c985..9eda14e030 100644 --- a/raphtory/src/db/api/properties/internal.rs +++ b/raphtory/src/db/api/properties/internal.rs @@ -1,5 +1,5 @@ use crate::{ - core::{storage::timeindex::AsTime, Prop}, + core::{storage::timeindex::AsTime, Prop, PropType}, db::api::view::internal::Base, }; use chrono::{DateTime, Utc}; @@ -8,6 +8,7 @@ use raphtory_api::core::storage::arc_str::ArcStr; #[enum_dispatch] pub trait TemporalPropertyViewOps { + fn dtype(&self, id: usize) -> PropType; fn temporal_value(&self, id: usize) -> Option { self.temporal_values(id).last().cloned() } @@ -81,6 +82,10 @@ impl TemporalPropertyViewOps for P where P::Base: TemporalPropertyViewOps, { + #[inline] + fn dtype(&self, id: usize) -> PropType { + self.base().dtype(id) + } #[inline] fn temporal_value(&self, id: usize) -> Option { self.base().temporal_value(id) diff --git a/raphtory/src/db/api/properties/temporal_props.rs b/raphtory/src/db/api/properties/temporal_props.rs index 0d8700a48c..bf20d974a0 100644 --- a/raphtory/src/db/api/properties/temporal_props.rs +++ b/raphtory/src/db/api/properties/temporal_props.rs @@ -1,5 +1,5 @@ use crate::{ - core::{DocumentInput, Prop, PropUnwrap}, + core::{DocumentInput, Prop, PropType, PropUnwrap}, db::{api::properties::internal::PropertiesOps, graph::views::deletion_graph::PersistentGraph}, prelude::Graph, }; @@ -20,6 +20,10 @@ impl TemporalPropertyView

{ pub(crate) fn new(props: P, key: usize) -> Self { TemporalPropertyView { props, id: key } } + + pub fn dtype(&self) -> PropType { + self.props.dtype(self.id) + } pub fn history(&self) -> Vec { self.props.temporal_history(self.id) } diff --git a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs index 1557a93ead..2cca8a42f5 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs @@ -87,6 +87,12 @@ impl CoreGraphOps for GraphStorage { } } +impl From for GraphStorage { + fn from(value: TemporalGraph) -> Self { + Self::Unlocked(Arc::new(value)) + } +} + impl Default for GraphStorage { fn default() -> Self { GraphStorage::Unlocked(Arc::new(TemporalGraph::default())) @@ -378,6 +384,19 @@ impl GraphStorage { }) } + pub fn nodes_par_opt<'a, 'graph: 'a, G: GraphViewOps<'graph>>( + &'a self, + view: &'a G, + type_filter: Option<&'a Arc<[bool]>>, + ) -> impl IndexedParallelIterator>> + 'a { + view.node_list().into_par_iter().map(move |vid| { + let node = self.node_entry(vid); + (type_filter.map_or(true, |type_filter| type_filter[node.node_type_id()]) + && view.filter_node(node.as_ref(), view.layer_ids())) + .then_some(node) + }) + } + pub fn into_nodes_par<'graph, G: GraphViewOps<'graph>>( self, view: G, diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_props.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_props.rs index 1a05f74d43..31a86535f2 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_props.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_props.rs @@ -2,7 +2,9 @@ use std::ops::Deref; use raphtory_api::core::storage::{arc_str::ArcStr, timeindex::AsTime}; +use super::GraphStorage; use crate::{ + core::PropType, db::api::{ properties::internal::{TemporalPropertiesOps, TemporalPropertyViewOps}, storage::graph::tprop_storage_ops::TPropOps, @@ -10,9 +12,10 @@ use crate::{ prelude::Prop, }; -use super::GraphStorage; - impl TemporalPropertyViewOps for GraphStorage { + fn dtype(&self, id: usize) -> PropType { + self.graph_meta().get_temporal_dtype(id).unwrap() + } fn temporal_history(&self, id: usize) -> Vec { self.graph_meta() .get_temporal_prop(id) diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index da5d381138..216665e409 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -1,4 +1,4 @@ -use std::ops::Range; +use std::{iter, ops::Range}; use itertools::{kmerge, Itertools}; use raphtory_api::core::{ @@ -312,9 +312,9 @@ impl TimeSemantics for GraphStorage { node.tprop(prop_id).len() > 0 } - fn temporal_node_prop_vec(&self, v: VID, id: usize) -> Vec<(i64, Prop)> { + fn temporal_node_prop_hist(&self, v: VID, id: usize) -> BoxedLIter<(TimeIndexEntry, Prop)> { let node = self.node_entry(v); - node.tprop(id).iter_t().collect() + GenLockedIter::from(node, |node| node.tprop(id).iter().into_dyn_boxed()).into_dyn_boxed() } fn has_temporal_node_prop_window(&self, v: VID, prop_id: usize, w: Range) -> bool { @@ -324,15 +324,20 @@ impl TimeSemantics for GraphStorage { iter_window_t.next().is_some() } - fn temporal_node_prop_vec_window( + fn temporal_node_prop_hist_window( &self, v: VID, id: usize, start: i64, end: i64, - ) -> Vec<(i64, Prop)> { + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { let node = self.node_entry(v); - node.tprop(id).iter_window_t(start..end).collect() + GenLockedIter::from(node, |node| { + node.tprop(id) + .iter_window(TimeIndexEntry::range(start..end)) + .into_dyn_boxed() + }) + .into_dyn_boxed() } fn has_temporal_edge_prop_window( @@ -348,31 +353,37 @@ impl TimeSemantics for GraphStorage { .any(|(_, p)| p.active(w.clone())) } - fn temporal_edge_prop_vec_window( - &self, + fn temporal_edge_prop_hist_window<'a>( + &'a self, e: EdgeRef, prop_id: usize, start: i64, end: i64, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { let entry = self.core_edge(e.into()); match e.time() { Some(t) => { if (start..end).contains(&t.t()) { - entry - .temporal_prop_iter(layer_ids, prop_id) - .flat_map(|(_, p)| p.at(&t).map(|v| (t.t(), v))) - .collect() + GenLockedIter::from(entry, move |entry| { + entry + .temporal_prop_iter(layer_ids, prop_id) + .flat_map(move |(_, p)| p.at(&t).map(move |v| (t, v))) + .into_dyn_boxed() + }) + .into_dyn_boxed() } else { - vec![] + iter::empty().into_dyn_boxed() } } - None => entry - .temporal_prop_iter(layer_ids, prop_id) - .map(|(_, p)| p.iter_window_t(start..end)) - .kmerge() - .collect(), + None => GenLockedIter::from(entry, |entry| { + entry + .temporal_prop_iter(layer_ids, prop_id) + .map(|(_, p)| p.iter_window(TimeIndexEntry::range(start..end))) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed(), } } @@ -381,23 +392,29 @@ impl TimeSemantics for GraphStorage { (&entry).has_temporal_prop(layer_ids, prop_id) } - fn temporal_edge_prop_vec( - &self, + fn temporal_edge_prop_hist<'a>( + &'a self, e: EdgeRef, prop_id: usize, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { let entry = self.core_edge(e.into()); match e.time() { - Some(t) => entry - .temporal_prop_iter(layer_ids, prop_id) - .flat_map(|(_, p)| p.at(&t).map(|v| (t.t(), v))) - .collect(), - None => entry - .temporal_prop_iter(layer_ids, prop_id) - .map(|(_, p)| p.iter_t()) - .kmerge() - .collect(), + Some(t) => GenLockedIter::from(entry, move |entry| { + entry + .temporal_prop_iter(layer_ids, prop_id) + .flat_map(move |(_, p)| p.at(&t).map(move |v| (t, v))) + .into_dyn_boxed() + }) + .into_dyn_boxed(), + None => GenLockedIter::from(entry, |entry| { + entry + .temporal_prop_iter(layer_ids, prop_id) + .map(|(_, p)| p.iter()) + .kmerge() + .into_dyn_boxed() + }) + .into_dyn_boxed(), } } } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 6a517e7215..176bf6ee61 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,13 +1,13 @@ use crate::{ core::{ - entities::{nodes::node_ref::AsNodeRef, LayerIds, VID}, + entities::{graph::tgraph::TemporalGraph, nodes::node_ref::AsNodeRef, LayerIds, VID}, storage::timeindex::AsTime, utils::errors::GraphError, }, db::{ api::{ mutation::{internal::InternalAdditionOps, AdditionOps, PropertyAdditionOps}, - properties::Properties, + properties::{internal::TemporalPropertiesOps, Properties}, storage::graph::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, storage_ops::GraphStorage, @@ -30,7 +30,11 @@ use chrono::{DateTime, Utc}; use itertools::Itertools; use raphtory_api::core::{ entities::EID, - storage::arc_str::{ArcStr, OptionAsStr}, + storage::{ + arc_str::{ArcStr, OptionAsStr}, + timeindex::TimeIndexEntry, + }, + Direction, }; use rayon::prelude::*; use rustc_hash::FxHashSet; @@ -139,16 +143,27 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> fn materialize(&self) -> Result { let storage = self.core_graph().lock(); - let g = GraphStorage::default(); + let g = TemporalGraph::default(); + if let Some(earliest) = self.earliest_time() { + g.update_time(TimeIndexEntry::start(earliest)); + } else { + return Ok(self.new_base_graph(g.into())); + }; + + if let Some(latest) = self.latest_time() { + g.update_time(TimeIndexEntry::end(latest)); + } else { + return Ok(self.new_base_graph(g.into())); + }; let layer_map: Vec<_> = match self.layer_ids() { LayerIds::None => { - return Ok(self.new_base_graph(g)); + return Ok(self.new_base_graph(g.into())); } LayerIds::All => { let mut layer_map = vec![0; self.unfiltered_num_layers()]; let layers = storage.edge_meta().layer_meta().get_keys(); - for id in (1..layers.len()) { + for id in 1..layers.len() { let new_id = g.resolve_layer(Some(&layers[id]))?.inner(); layer_map[id] = new_id; } @@ -176,17 +191,30 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> }; let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; - for node in self.nodes().iter() { - let new_id = g.resolve_node(node)?; - let VID(old_index) = node.node; - node_map[old_index] = new_id.inner(); - } - - let earliest = if let Some(earliest) = self.earliest_time() { - earliest - } else { - return Ok(self.new_base_graph(g)); - }; + storage + .nodes_par_opt(self, None) + .zip(node_map.par_iter_mut()) + .try_for_each(|(node, entry)| { + if let Some(node) = node { + let node_type_id = node.node_type_id(); + let new_id = if node_type_id != 0 { + g.resolve_node_and_type( + node.id(), + self.node_meta() + .get_node_type_name_by_id(node_type_id) + .as_str() + .unwrap(), + )? + .inner() + .0 + .inner() + } else { + g.resolve_node(node.id())?.inner() + }; + *entry = new_id; + } + Ok::<(), GraphError>(()) + })?; { // scope for the write lock @@ -206,7 +234,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> let t_props = e.properties().temporal(); let mut props_iter = t_props.iter_latest().peekable(); if props_iter.peek().is_some() { - let mut edge_layer = new_edge.layer_mut(layer); + let edge_layer = new_edge.layer_mut(layer); for (prop_name, prop_value) in props_iter { let prop_id = g .resolve_edge_property( @@ -219,6 +247,24 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> } } } + for e in edge.explode_layers() { + let layer = layer_map[*e.edge.layer().unwrap()]; + let c_props = e.properties().constant(); + let mut props_iter = c_props.iter().peekable(); + if props_iter.peek().is_some() { + let edge_layer = new_edge.layer_mut(layer); + for (prop_name, prop_value) in props_iter { + let prop_id = g + .resolve_edge_property( + &prop_name, + prop_value.dtype(), + true, + )? + .inner(); + edge_layer.add_constant_prop(prop_id, prop_value)?; + } + } + } if self.include_deletions() { for e in edge.explode_layers() { let layer = *e.edge.layer().unwrap(); @@ -226,8 +272,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> let mut deletion_history = self.edge_deletion_history(edge.edge, &layer_ids).peekable(); if deletion_history.peek().is_some() { - let mut edge_deletions = - new_edge.deletions_mut(layer_map[layer]); + let edge_deletions = new_edge.deletions_mut(layer_map[layer]); for t in deletion_history { edge_deletions.insert(t); } @@ -239,65 +284,70 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> Ok::<(), GraphError>(()) })?; - // Add edges first so we definitely have all associated nodes (important in case of persistent edges) - for e in self.edges() { - // FIXME: this needs to be verified - for ee in e.explode_layers() { - let layer_id = *ee.edge.layer().expect("exploded layers"); - let layer_ids = LayerIds::One(layer_id); - let layer_name = self.get_layer_name(layer_id); - let layer_name: Option<&str> = if layer_id == 0 { - None - } else { - Some(&layer_name) - }; - - for ee in ee.explode() { - g.add_edge( - ee.time().expect("exploded edge"), - ee.src().id(), - ee.dst().id(), - ee.properties().temporal().collect_properties(), - layer_name, - )?; + new_storage.nodes.par_iter_mut().try_for_each(|mut shard| { + for (eid, edge) in self.edges().iter().enumerate() { + if let Some(src_node) = shard.get_mut(node_map[edge.edge.src().index()]) { + for ee in edge.explode_layers() { + src_node.add_edge( + node_map[edge.edge.dst().index()], + Direction::OUT, + *ee.edge.layer().unwrap(), + EID(eid), + ); + } } - - if self.include_deletions() { - for t in self.edge_deletion_history(e.edge, &layer_ids) { - g.delete_edge(t.t(), e.src().id(), e.dst().id(), layer_name)?; + if let Some(dst_node) = shard.get_mut(node_map[edge.edge.dst().index()]) { + for ee in edge.explode_layers() { + dst_node.add_edge( + node_map[edge.edge.src().index()], + Direction::IN, + *ee.edge.layer().unwrap(), + EID(eid), + ); } } - - g.edge(ee.src().id(), ee.dst().id()) - .expect("edge added") - .add_constant_properties(ee.properties().constant(), layer_name)?; } - } - - for v in self.nodes().iter() { - let v_type_string = v.node_type(); //stop it being dropped - let v_type_str = v_type_string.as_str(); - for h in v.history() { - g.add_node(h, v.id(), NO_PROPS, v_type_str)?; - } - for (name, prop_view) in v.properties().temporal().iter() { - for (t, prop) in prop_view.iter() { - g.add_node(t, v.id(), [(name.clone(), prop)], v_type_str)?; + let nodes = self.nodes(); + for node in nodes.iter() { + if let Some(new_node) = shard.get_mut(node_map[node.node.index()]) { + if let Some(earliest) = node.earliest_time() { + // explicitly add node earliest_time to handle PersistentGraph + new_node.update_time(TimeIndexEntry::start(earliest)) + } + for t in node.history() { + new_node.update_time(TimeIndexEntry::start(t)); + } + for prop_id in node.temporal_prop_ids() { + let prop_name = self.node_meta().temporal_prop_meta().get_name(prop_id); + let prop_type = self + .node_meta() + .temporal_prop_meta() + .get_dtype(prop_id) + .unwrap(); + let new_prop_id = g + .resolve_node_property(&prop_name, prop_type, false)? + .inner(); + for (t, prop_value) in self.temporal_node_prop_hist(node.node, prop_id) + { + new_node.add_prop(t, new_prop_id, prop_value)?; + } + } + for (c_prop_name, prop_value) in node.properties().constant().iter() { + let prop_id = g + .resolve_node_property(&c_prop_name, prop_value.dtype(), true)? + .inner(); + new_node.add_constant_prop(prop_id, prop_value)?; + } } } - let node = match g.node(v.id()) { - Some(node) => node, - None => g.add_node(earliest, v.id(), NO_PROPS, v_type_str)?, - }; - - node.add_constant_properties(v.properties().constant())?; - } + Ok::<(), GraphError>(()) + })?; g.add_constant_properties(self.properties().constant())?; } - Ok(self.new_base_graph(g)) + Ok(self.new_base_graph(g.into())) } fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph { diff --git a/raphtory/src/db/api/view/internal/time_semantics.rs b/raphtory/src/db/api/view/internal/time_semantics.rs index 72ec4badf4..de2b968ef4 100644 --- a/raphtory/src/db/api/view/internal/time_semantics.rs +++ b/raphtory/src/db/api/view/internal/time_semantics.rs @@ -204,7 +204,7 @@ pub trait TimeSemantics { /// A vector of tuples representing the temporal values of the property for the given node /// that fall within the specified time window, where the first element of each tuple is the timestamp /// and the second element is the property value. - fn temporal_node_prop_vec(&self, v: VID, id: usize) -> Vec<(i64, Prop)>; + fn temporal_node_prop_hist(&self, v: VID, id: usize) -> BoxedLIter<(TimeIndexEntry, Prop)>; /// Check if node has temporal property with the given id in the window /// @@ -230,13 +230,13 @@ pub trait TimeSemantics { /// A vector of tuples representing the temporal values of the property for the given node /// that fall within the specified time window, where the first element of each tuple is the timestamp /// and the second element is the property value. - fn temporal_node_prop_vec_window( + fn temporal_node_prop_hist_window( &self, v: VID, id: usize, start: i64, end: i64, - ) -> Vec<(i64, Prop)>; + ) -> BoxedLIter<(TimeIndexEntry, Prop)>; /// Check if edge has temporal property with the given id in the window /// @@ -268,14 +268,14 @@ pub trait TimeSemantics { /// * A `Vec` of tuples containing the values of the temporal property with the given name for the given edge /// within the specified time window. /// - fn temporal_edge_prop_vec_window( - &self, + fn temporal_edge_prop_hist_window<'a>( + &'a self, e: EdgeRef, id: usize, start: i64, end: i64, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)>; + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)>; /// Check if edge has temporal property with the given id /// @@ -296,12 +296,12 @@ pub trait TimeSemantics { /// Returns: /// /// * A `Vec` of tuples containing the values of the temporal property with the given name for the given edge. - fn temporal_edge_prop_vec( - &self, + fn temporal_edge_prop_hist<'a>( + &'a self, e: EdgeRef, id: usize, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)>; + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)>; } pub trait InheritTimeSemantics: Base {} @@ -540,8 +540,12 @@ impl TimeSemantics for G { } #[inline] - fn temporal_node_prop_vec(&self, v: VID, prop_id: usize) -> Vec<(i64, Prop)> { - self.graph().temporal_node_prop_vec(v, prop_id) + fn temporal_node_prop_hist( + &self, + v: VID, + prop_id: usize, + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { + self.graph().temporal_node_prop_hist(v, prop_id) } #[inline] @@ -550,15 +554,15 @@ impl TimeSemantics for G { } #[inline] - fn temporal_node_prop_vec_window( + fn temporal_node_prop_hist_window( &self, v: VID, prop_id: usize, start: i64, end: i64, - ) -> Vec<(i64, Prop)> { + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { self.graph() - .temporal_node_prop_vec_window(v, prop_id, start, end) + .temporal_node_prop_hist_window(v, prop_id, start, end) } fn has_temporal_edge_prop_window( @@ -573,29 +577,30 @@ impl TimeSemantics for G { } #[inline] - fn temporal_edge_prop_vec_window( - &self, + fn temporal_edge_prop_hist_window<'a>( + &'a self, e: EdgeRef, prop_id: usize, start: i64, end: i64, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { self.graph() - .temporal_edge_prop_vec_window(e, prop_id, start, end, layer_ids) + .temporal_edge_prop_hist_window(e, prop_id, start, end, layer_ids) } + #[inline] fn has_temporal_edge_prop(&self, e: EdgeRef, prop_id: usize, layer_ids: &LayerIds) -> bool { self.graph().has_temporal_edge_prop(e, prop_id, layer_ids) } #[inline] - fn temporal_edge_prop_vec( - &self, + fn temporal_edge_prop_hist<'a>( + &'a self, e: EdgeRef, prop_id: usize, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { - self.graph().temporal_edge_prop_vec(e, prop_id, layer_ids) + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { + self.graph().temporal_edge_prop_hist(e, prop_id, layer_ids) } } diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 150b0df4dd..5c164068b9 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -12,6 +12,7 @@ use crate::{ entities::{edges::edge_ref::EdgeRef, LayerIds, VID}, storage::timeindex::AsTime, utils::{errors::GraphError, time::IntoTime}, + PropType, }, db::{ api::{ @@ -308,16 +309,24 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> ConstPropertiesO impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> TemporalPropertyViewOps for EdgeView { + fn dtype(&self, id: usize) -> PropType { + self.graph + .edge_meta() + .temporal_prop_meta() + .get_dtype(id) + .unwrap() + } + fn temporal_history(&self, id: usize) -> Vec { self.graph - .temporal_edge_prop_vec(self.edge, id, &self.layer_ids()) + .temporal_edge_prop_hist(self.edge, id, &self.layer_ids()) .into_iter() - .map(|(t, _)| t) + .map(|(t, _)| t.t()) .collect() } fn temporal_history_date_time(&self, id: usize) -> Option>> { self.graph - .temporal_edge_prop_vec(self.edge, id, &self.layer_ids()) + .temporal_edge_prop_hist(self.edge, id, &self.layer_ids()) .into_iter() .map(|(t, _)| t.dt()) .collect() @@ -326,7 +335,7 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> TemporalProperty fn temporal_values(&self, id: usize) -> Vec { let layer_ids = self.layer_ids(); self.graph - .temporal_edge_prop_vec(self.edge, id, &layer_ids) + .temporal_edge_prop_hist(self.edge, id, &layer_ids) .into_iter() .map(|(_, v)| v) .collect() diff --git a/raphtory/src/db/graph/node.rs b/raphtory/src/db/graph/node.rs index dc7d9f708c..b18b499331 100644 --- a/raphtory/src/db/graph/node.rs +++ b/raphtory/src/db/graph/node.rs @@ -26,7 +26,7 @@ use crate::{ }; use crate::{ - core::{entities::nodes::node_ref::AsNodeRef, storage::timeindex::AsTime}, + core::{entities::nodes::node_ref::AsNodeRef, storage::timeindex::AsTime, PropType}, db::{api::storage::graph::storage_ops::GraphStorage, graph::edges::Edges}, }; use chrono::{DateTime, Utc}; @@ -198,25 +198,32 @@ impl TemporalPropertiesOps for NodeView TemporalPropertyViewOps for NodeView { +impl TemporalPropertyViewOps for NodeView { + fn dtype(&self, id: usize) -> PropType { + self.graph + .node_meta() + .temporal_prop_meta() + .get_dtype(id) + .unwrap() + } fn temporal_value(&self, id: usize) -> Option { self.graph - .temporal_node_prop_vec(self.node, id) + .temporal_node_prop_hist(self.node, id) .last() .map(|(_, v)| v.to_owned()) } fn temporal_history(&self, id: usize) -> Vec { self.graph - .temporal_node_prop_vec(self.node, id) + .temporal_node_prop_hist(self.node, id) .into_iter() - .map(|(t, _)| t) + .map(|(t, _)| t.t()) .collect() } fn temporal_history_date_time(&self, id: usize) -> Option>> { self.graph - .temporal_node_prop_vec(self.node, id) + .temporal_node_prop_hist(self.node, id) .into_iter() .map(|(t, _)| t.dt()) .collect() @@ -224,7 +231,7 @@ impl TemporalPropertyViewOps for NodeView { fn temporal_values(&self, id: usize) -> Vec { self.graph - .temporal_node_prop_vec(self.node, id) + .temporal_node_prop_hist(self.node, id) .into_iter() .map(|(_, v)| v) .collect() diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index a38aa2082e..4111766dd6 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -554,8 +554,12 @@ impl TimeSemantics for PersistentGraph { self.0.has_temporal_node_prop(v, prop_id) } - fn temporal_node_prop_vec(&self, v: VID, prop_id: usize) -> Vec<(i64, Prop)> { - self.0.temporal_node_prop_vec(v, prop_id) + fn temporal_node_prop_hist( + &self, + v: VID, + prop_id: usize, + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { + self.0.temporal_node_prop_hist(v, prop_id) } fn has_temporal_node_prop_window(&self, v: VID, prop_id: usize, w: Range) -> bool { @@ -563,20 +567,23 @@ impl TimeSemantics for PersistentGraph { .has_temporal_node_prop_window(v, prop_id, i64::MIN..w.end) } - fn temporal_node_prop_vec_window( + fn temporal_node_prop_hist_window( &self, v: VID, prop_id: usize, start: i64, end: i64, - ) -> Vec<(i64, Prop)> { + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { let node = self.core_node_entry(v); - let prop = node.tprop(prop_id); - prop.last_before(start.saturating_add(1)) - .into_iter() - .map(|(_, v)| (start, v)) - .chain(prop.iter_window_t(start.saturating_add(1)..end)) - .collect() + GenLockedIter::from(node, move |node| { + let prop = node.tprop(prop_id); + prop.last_before(start.saturating_add(1)) + .into_iter() + .map(move |(_, v)| (TimeIndexEntry::start(start), v)) + .chain(prop.iter_window(TimeIndexEntry::range(start.saturating_add(1)..end))) + .into_dyn_boxed() + }) + .into_dyn_boxed() } fn has_temporal_edge_prop_window( @@ -612,45 +619,48 @@ impl TimeSemantics for PersistentGraph { } } - fn temporal_edge_prop_vec_window( - &self, + fn temporal_edge_prop_hist_window<'a>( + &'a self, e: EdgeRef, prop_id: usize, start: i64, end: i64, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { let entry = self.core_edge(e.into()); - entry - .temporal_prop_iter(layer_ids, prop_id) - .map(|(l, prop)| { - let first_prop = prop - .last_before(start.saturating_add(1)) - .filter(|(t, _)| { - !entry - .deletions(l) - .active(*t..TimeIndexEntry::start(start.saturating_add(1))) - }) - .map(|(_, v)| (start, v)); - first_prop - .into_iter() - .chain(prop.iter_window_t(start.saturating_add(1)..end)) - }) - .kmerge_by(|(t1, _), (t2, _)| t1 <= t2) - .collect() + GenLockedIter::from(entry, |entry| { + entry + .temporal_prop_iter(layer_ids, prop_id) + .map(|(l, prop)| { + let first_prop = prop + .last_before(start.saturating_add(1)) + .filter(|(t, _)| { + !entry + .deletions(l) + .active(*t..TimeIndexEntry::start(start.saturating_add(1))) + }) + .map(|(_, v)| (TimeIndexEntry::start(start), v)); + first_prop.into_iter().chain( + prop.iter_window(TimeIndexEntry::range(start.saturating_add(1)..end)), + ) + }) + .kmerge_by(|(t1, _), (t2, _)| t1 <= t2) + .into_dyn_boxed() + }) + .into_dyn_boxed() } fn has_temporal_edge_prop(&self, e: EdgeRef, prop_id: usize, layer_ids: &LayerIds) -> bool { self.0.has_temporal_edge_prop(e, prop_id, layer_ids) } - fn temporal_edge_prop_vec( - &self, + fn temporal_edge_prop_hist<'a>( + &'a self, e: EdgeRef, prop_id: usize, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { - self.0.temporal_edge_prop_vec(e, prop_id, layer_ids) + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { + self.0.temporal_edge_prop_hist(e, prop_id, layer_ids) } } diff --git a/raphtory/src/db/graph/views/window_graph.rs b/raphtory/src/db/graph/views/window_graph.rs index 3f648701b7..4e49f6460f 100644 --- a/raphtory/src/db/graph/views/window_graph.rs +++ b/raphtory/src/db/graph/views/window_graph.rs @@ -41,7 +41,7 @@ use crate::{ core::{ entities::{edges::edge_ref::EdgeRef, LayerIds, VID}, storage::timeindex::AsTime, - Prop, + Prop, PropType, }, db::{ api::{ @@ -151,6 +151,13 @@ impl<'graph, G: GraphViewOps<'graph>> NodeFilterOps for WindowedGraph { } impl<'graph, G: GraphViewOps<'graph>> TemporalPropertyViewOps for WindowedGraph { + fn dtype(&self, id: usize) -> PropType { + self.graph + .graph_meta() + .temporal_prop_meta() + .get_dtype(id) + .unwrap() + } fn temporal_history(&self, id: usize) -> Vec { self.temporal_prop_vec(id) .into_iter() @@ -407,9 +414,13 @@ impl<'graph, G: GraphViewOps<'graph>> TimeSemantics for WindowedGraph { .has_temporal_node_prop_window(v, prop_id, self.start_bound()..self.end_bound()) } - fn temporal_node_prop_vec(&self, v: VID, prop_id: usize) -> Vec<(i64, Prop)> { + fn temporal_node_prop_hist( + &self, + v: VID, + prop_id: usize, + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { self.graph - .temporal_node_prop_vec_window(v, prop_id, self.start_bound(), self.end_bound()) + .temporal_node_prop_hist_window(v, prop_id, self.start_bound(), self.end_bound()) } fn has_temporal_node_prop_window(&self, v: VID, prop_id: usize, w: Range) -> bool { @@ -417,15 +428,15 @@ impl<'graph, G: GraphViewOps<'graph>> TimeSemantics for WindowedGraph { .has_temporal_node_prop_window(v, prop_id, w.start..w.end) } - fn temporal_node_prop_vec_window( + fn temporal_node_prop_hist_window( &self, v: VID, prop_id: usize, start: i64, end: i64, - ) -> Vec<(i64, Prop)> { + ) -> BoxedLIter<(TimeIndexEntry, Prop)> { self.graph - .temporal_node_prop_vec_window(v, prop_id, start, end) + .temporal_node_prop_hist_window(v, prop_id, start, end) } fn has_temporal_edge_prop_window( @@ -439,16 +450,16 @@ impl<'graph, G: GraphViewOps<'graph>> TimeSemantics for WindowedGraph { .has_temporal_edge_prop_window(e, prop_id, w.start..w.end, layer_ids) } - fn temporal_edge_prop_vec_window( - &self, + fn temporal_edge_prop_hist_window<'a>( + &'a self, e: EdgeRef, prop_id: usize, start: i64, end: i64, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { self.graph - .temporal_edge_prop_vec_window(e, prop_id, start, end, layer_ids) + .temporal_edge_prop_hist_window(e, prop_id, start, end, layer_ids) } fn has_temporal_edge_prop(&self, e: EdgeRef, prop_id: usize, layer_ids: &LayerIds) -> bool { @@ -460,13 +471,13 @@ impl<'graph, G: GraphViewOps<'graph>> TimeSemantics for WindowedGraph { ) } - fn temporal_edge_prop_vec( - &self, + fn temporal_edge_prop_hist<'a>( + &'a self, e: EdgeRef, prop_id: usize, - layer_ids: &LayerIds, - ) -> Vec<(i64, Prop)> { - self.graph.temporal_edge_prop_vec_window( + layer_ids: &'a LayerIds, + ) -> BoxedLIter<'a, (TimeIndexEntry, Prop)> { + self.graph.temporal_edge_prop_hist_window( e, prop_id, self.start_bound(), diff --git a/raphtory/src/python/packages/algorithms.rs b/raphtory/src/python/packages/algorithms.rs index f4ffc85745..4c878f6a49 100644 --- a/raphtory/src/python/packages/algorithms.rs +++ b/raphtory/src/python/packages/algorithms.rs @@ -55,7 +55,6 @@ use crate::{ utils::PyTime, }, }; -use itertools::Itertools; use ordered_float::OrderedFloat; use pyo3::{prelude::*, types::PyIterator}; use rand::{prelude::StdRng, SeedableRng}; From f1eda374d3f28f7bce129f53bb8b1d285f9b43f8 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 11 Sep 2024 12:08:45 +0200 Subject: [PATCH 03/18] refactor the benchmark crate so the common parts can be imported normally --- raphtory-benchmark/Cargo.toml | 4 ++++ raphtory-benchmark/benches/algobench.rs | 3 +-- raphtory-benchmark/benches/arrow_algobench.rs | 2 -- raphtory-benchmark/benches/base.rs | 7 +++---- raphtory-benchmark/benches/edge_add.rs | 2 -- raphtory-benchmark/benches/graph_ops.rs | 4 +--- raphtory-benchmark/benches/materialise.rs | 15 +++++++++++++++ raphtory-benchmark/benches/parameterized.rs | 4 +--- raphtory-benchmark/{src => bin}/main.rs | 0 raphtory-benchmark/{benches => src}/common/mod.rs | 2 +- raphtory-benchmark/src/lib.rs | 1 + 11 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 raphtory-benchmark/benches/materialise.rs rename raphtory-benchmark/{src => bin}/main.rs (100%) rename raphtory-benchmark/{benches => src}/common/mod.rs (99%) create mode 100644 raphtory-benchmark/src/lib.rs diff --git a/raphtory-benchmark/Cargo.toml b/raphtory-benchmark/Cargo.toml index 8cbb064b13..8306680c25 100644 --- a/raphtory-benchmark/Cargo.toml +++ b/raphtory-benchmark/Cargo.toml @@ -42,6 +42,10 @@ harness = false name = "edge_add" harness = false +[[bench]] +name = "materialise" +harness = false + # [[bench]] # name = "arrow_algobench" # harness = false diff --git a/raphtory-benchmark/benches/algobench.rs b/raphtory-benchmark/benches/algobench.rs index 0d469c5053..adf68c591a 100644 --- a/raphtory-benchmark/benches/algobench.rs +++ b/raphtory-benchmark/benches/algobench.rs @@ -1,4 +1,3 @@ -use crate::common::bench; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode}; use raphtory::{ algorithms::{ @@ -17,8 +16,8 @@ use raphtory::{ prelude::*, }; use rayon::prelude::*; +use raphtory_benchmark::common::bench; -mod common; pub fn local_triangle_count_analysis(c: &mut Criterion) { let mut group = c.benchmark_group("local_triangle_count"); group.sample_size(10); diff --git a/raphtory-benchmark/benches/arrow_algobench.rs b/raphtory-benchmark/benches/arrow_algobench.rs index 245f720ca4..dd719b7e57 100644 --- a/raphtory-benchmark/benches/arrow_algobench.rs +++ b/raphtory-benchmark/benches/arrow_algobench.rs @@ -1,5 +1,3 @@ -mod common; - #[cfg(feature = "storage")] pub mod arrow_bench { diff --git a/raphtory-benchmark/benches/base.rs b/raphtory-benchmark/benches/base.rs index 8837a65dc2..18e59716c2 100644 --- a/raphtory-benchmark/benches/base.rs +++ b/raphtory-benchmark/benches/base.rs @@ -1,9 +1,8 @@ -use crate::common::{bootstrap_graph, run_large_ingestion_benchmarks}; -use common::run_graph_ops_benches; use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use raphtory::{graph_loader::lotr_graph::lotr_graph, prelude::*}; - -mod common; +use raphtory_benchmark::common::{ + bootstrap_graph, run_graph_ops_benches, run_large_ingestion_benchmarks, +}; pub fn base(c: &mut Criterion) { // let mut ingestion_group = c.benchmark_group("ingestion"); diff --git a/raphtory-benchmark/benches/edge_add.rs b/raphtory-benchmark/benches/edge_add.rs index 1bef0528c4..e1275423e9 100644 --- a/raphtory-benchmark/benches/edge_add.rs +++ b/raphtory-benchmark/benches/edge_add.rs @@ -1,7 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; use raphtory::prelude::*; - -mod common; use rand::{ distributions::{Alphanumeric, DistString}, thread_rng, Rng, diff --git a/raphtory-benchmark/benches/graph_ops.rs b/raphtory-benchmark/benches/graph_ops.rs index 3ce0cb0efb..3d6ac48ebd 100644 --- a/raphtory-benchmark/benches/graph_ops.rs +++ b/raphtory-benchmark/benches/graph_ops.rs @@ -1,4 +1,3 @@ -use common::run_graph_ops_benches; use criterion::{criterion_group, criterion_main, Criterion}; use raphtory::{ graph_loader::sx_superuser_graph::{sx_superuser_file, sx_superuser_graph, TEdge}, @@ -6,8 +5,7 @@ use raphtory::{ prelude::*, }; use raphtory_api::core::utils::hashing::calculate_hash; - -mod common; +use raphtory_benchmark::common::run_graph_ops_benches; pub fn graph(c: &mut Criterion) { let group_name = "analysis_graph"; diff --git a/raphtory-benchmark/benches/materialise.rs b/raphtory-benchmark/benches/materialise.rs new file mode 100644 index 0000000000..4b440c7c9c --- /dev/null +++ b/raphtory-benchmark/benches/materialise.rs @@ -0,0 +1,15 @@ +use criterion::Criterion; +use raphtory::{graph_loader::sx_superuser_graph::sx_superuser_graph, prelude::Graph}; +use raphtory_benchmark::common::bench_materialise; + +pub fn bench() { + let graph = sx_superuser_graph().unwrap(); +} + +pub fn run_materialise_bench(c: &mut Criterion, graph_name: &str, graph: Graph) { + let mut graph_group = c.benchmark_group(graph_name); + let make_graph = || graph.clone(); + graph_group.finish(); + + bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); +} diff --git a/raphtory-benchmark/benches/parameterized.rs b/raphtory-benchmark/benches/parameterized.rs index b91c6a0119..b8987d52a4 100644 --- a/raphtory-benchmark/benches/parameterized.rs +++ b/raphtory-benchmark/benches/parameterized.rs @@ -1,10 +1,8 @@ -use crate::common::{bootstrap_graph, run_large_ingestion_benchmarks}; use criterion::{ criterion_group, criterion_main, AxisScale, Criterion, PlotConfiguration, Throughput, }; use raphtory_api::core::entities::GID; - -mod common; +use raphtory_benchmark::common::{bootstrap_graph, run_large_ingestion_benchmarks}; pub fn parameterized(c: &mut Criterion) { let nodes_exponents = 1..6; diff --git a/raphtory-benchmark/src/main.rs b/raphtory-benchmark/bin/main.rs similarity index 100% rename from raphtory-benchmark/src/main.rs rename to raphtory-benchmark/bin/main.rs diff --git a/raphtory-benchmark/benches/common/mod.rs b/raphtory-benchmark/src/common/mod.rs similarity index 99% rename from raphtory-benchmark/benches/common/mod.rs rename to raphtory-benchmark/src/common/mod.rs index 34b2e882f4..084fef6e58 100644 --- a/raphtory-benchmark/benches/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -570,7 +570,7 @@ pub fn run_graph_ops_benches( ); } -fn bench_materialise(name: &str, c: &mut Criterion, make_graph: F) +pub fn bench_materialise(name: &str, c: &mut Criterion, make_graph: F) where F: Fn() -> G, G: StaticGraphViewOps, diff --git a/raphtory-benchmark/src/lib.rs b/raphtory-benchmark/src/lib.rs new file mode 100644 index 0000000000..34994bf5af --- /dev/null +++ b/raphtory-benchmark/src/lib.rs @@ -0,0 +1 @@ +pub mod common; From 2789dbbbaad7b6d4684cbf4f0f9d9deb87650806 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 11 Sep 2024 16:03:29 +0200 Subject: [PATCH 04/18] make edge iterator return references --- js-raphtory/src/graph/node.rs | 12 ++++---- raphtory-benchmark/benches/algobench.rs | 2 +- raphtory-benchmark/benches/edge_add.rs | 2 +- raphtory-benchmark/benches/materialise.rs | 14 ++++----- raphtory-graphql/src/model/graph/edge.rs | 11 +++++++ raphtory-graphql/src/model/graph/edges.rs | 2 +- .../src/model/schema/edge_schema.rs | 10 +++---- raphtory-graphql/src/model/schema/mod.rs | 4 +-- .../community_detection/modularity.rs | 2 +- raphtory/src/db/api/view/edge.rs | 2 +- raphtory/src/db/graph/edge.rs | 30 +++++++++++++++++-- raphtory/src/db/graph/edges.rs | 10 +++---- raphtory/src/db/graph/graph.rs | 12 ++++++-- raphtory/src/db/task/edge/eval_edges.rs | 19 +++++++----- raphtory/src/python/graph/edge.rs | 6 ++++ raphtory/src/python/graph/edges.rs | 14 +++------ raphtory/src/python/packages/vectors.rs | 4 +-- .../types/macros/trait_impl/iterable_mixin.rs | 25 ++++++++++++++++ raphtory/src/vectors/document_ref.rs | 2 +- raphtory/src/vectors/entity_id.rs | 2 +- raphtory/src/vectors/mod.rs | 4 +-- raphtory/src/vectors/vectorisable.rs | 11 ++++--- 22 files changed, 136 insertions(+), 64 deletions(-) diff --git a/js-raphtory/src/graph/node.rs b/js-raphtory/src/graph/node.rs index 8d4e2b808c..e5879d2dd0 100644 --- a/js-raphtory/src/graph/node.rs +++ b/js-raphtory/src/graph/node.rs @@ -81,7 +81,11 @@ impl Node { #[wasm_bindgen(js_name = edges)] pub fn edges(&self) -> js_sys::Array { - self.0.edges().iter().map(Edge).map(JsValue::from).collect() + self.0 + .edges() + .iter() + .map(|e| JsValue::from(Edge(e.cloned()))) + .collect() } // out_edges @@ -90,8 +94,7 @@ impl Node { self.0 .out_edges() .iter() - .map(Edge) - .map(JsValue::from) + .map(|e| JsValue::from(Edge(e.cloned()))) .collect() } @@ -101,8 +104,7 @@ impl Node { self.0 .in_edges() .iter() - .map(Edge) - .map(JsValue::from) + .map(|e| JsValue::from(Edge(e.cloned()))) .collect() } diff --git a/raphtory-benchmark/benches/algobench.rs b/raphtory-benchmark/benches/algobench.rs index adf68c591a..9d56334ec9 100644 --- a/raphtory-benchmark/benches/algobench.rs +++ b/raphtory-benchmark/benches/algobench.rs @@ -15,8 +15,8 @@ use raphtory::{ graphgen::random_attachment::random_attachment, prelude::*, }; -use rayon::prelude::*; use raphtory_benchmark::common::bench; +use rayon::prelude::*; pub fn local_triangle_count_analysis(c: &mut Criterion) { let mut group = c.benchmark_group("local_triangle_count"); diff --git a/raphtory-benchmark/benches/edge_add.rs b/raphtory-benchmark/benches/edge_add.rs index e1275423e9..ff88954ef1 100644 --- a/raphtory-benchmark/benches/edge_add.rs +++ b/raphtory-benchmark/benches/edge_add.rs @@ -1,9 +1,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use raphtory::prelude::*; use rand::{ distributions::{Alphanumeric, DistString}, thread_rng, Rng, }; +use raphtory::prelude::*; fn random_string(n: usize) -> String { Alphanumeric.sample_string(&mut thread_rng(), n) diff --git a/raphtory-benchmark/benches/materialise.rs b/raphtory-benchmark/benches/materialise.rs index 4b440c7c9c..5369f57999 100644 --- a/raphtory-benchmark/benches/materialise.rs +++ b/raphtory-benchmark/benches/materialise.rs @@ -1,15 +1,11 @@ -use criterion::Criterion; +use criterion::{criterion_group, criterion_main, Criterion}; use raphtory::{graph_loader::sx_superuser_graph::sx_superuser_graph, prelude::Graph}; use raphtory_benchmark::common::bench_materialise; -pub fn bench() { +pub fn bench(c: &mut Criterion) { let graph = sx_superuser_graph().unwrap(); + bench_materialise("materialise", c, || graph.clone()); } -pub fn run_materialise_bench(c: &mut Criterion, graph_name: &str, graph: Graph) { - let mut graph_group = c.benchmark_group(graph_name); - let make_graph = || graph.clone(); - graph_group.finish(); - - bench_materialise(&format!("{graph_name}_materialise"), c, make_graph); -} +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/raphtory-graphql/src/model/graph/edge.rs b/raphtory-graphql/src/model/graph/edge.rs index a513531c0a..e73812eea7 100644 --- a/raphtory-graphql/src/model/graph/edge.rs +++ b/raphtory-graphql/src/model/graph/edge.rs @@ -29,6 +29,17 @@ impl } } +impl Edge { + pub(crate) fn from_ref< + G: StaticGraphViewOps + IntoDynamic, + GH: StaticGraphViewOps + IntoDynamic, + >( + value: EdgeView<&G, &GH>, + ) -> Self { + value.cloned().into() + } +} + #[ResolvedObjectFields] impl Edge { //////////////////////// diff --git a/raphtory-graphql/src/model/graph/edges.rs b/raphtory-graphql/src/model/graph/edges.rs index 9c2a6fa41a..c2d5952723 100644 --- a/raphtory-graphql/src/model/graph/edges.rs +++ b/raphtory-graphql/src/model/graph/edges.rs @@ -22,7 +22,7 @@ impl GqlEdges { } fn iter(&self) -> Box + '_> { - let iter = self.ee.iter().map(Edge::from); + let iter = self.ee.iter().map(Edge::from_ref); Box::new(iter) } } diff --git a/raphtory-graphql/src/model/schema/edge_schema.rs b/raphtory-graphql/src/model/schema/edge_schema.rs index 03965fe18e..81165d04cc 100644 --- a/raphtory-graphql/src/model/schema/edge_schema.rs +++ b/raphtory-graphql/src/model/schema/edge_schema.rs @@ -5,7 +5,7 @@ use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; use raphtory::{ db::{api::view::StaticGraphViewOps, graph::edge::EdgeView}, - prelude::EdgeViewOps, + prelude::{EdgeViewOps, GraphViewOps}, }; use std::collections::{HashMap, HashSet}; @@ -40,13 +40,13 @@ impl EdgeSchema { /// Returns the list of property schemas for edges connecting these types of nodes async fn properties(&self) -> Vec { - let filter_types = |edge: &EdgeView| { + let filter_types = |edge: &EdgeView<&G>| { let src_type = get_node_type(edge.src()); let dst_type = get_node_type(edge.dst()); src_type == self.src_type && dst_type == self.dst_type }; - - let filtered_edges = self.graph.edges().iter().filter(filter_types); + let edges = self.graph.edges(); + let filtered_edges = edges.iter().filter(filter_types); let schema: SchemaAggregate = filtered_edges .map(collect_edge_schema) @@ -57,7 +57,7 @@ impl EdgeSchema { } } -fn collect_edge_schema(edge: EdgeView) -> SchemaAggregate { +fn collect_edge_schema<'graph, G: GraphViewOps<'graph>>(edge: EdgeView) -> SchemaAggregate { edge.properties() .iter() .map(|(key, value)| (key.to_string(), HashSet::from([value.to_string()]))) diff --git a/raphtory-graphql/src/model/schema/mod.rs b/raphtory-graphql/src/model/schema/mod.rs index d60b8b096d..6a13040e3f 100644 --- a/raphtory-graphql/src/model/schema/mod.rs +++ b/raphtory-graphql/src/model/schema/mod.rs @@ -1,6 +1,6 @@ use raphtory::{ db::{api::view::StaticGraphViewOps, graph::node::NodeView}, - prelude::NodeViewOps, + prelude::{GraphViewOps, NodeViewOps}, }; use std::collections::{HashMap, HashSet}; @@ -12,7 +12,7 @@ pub(crate) mod property_schema; const ENUM_BOUNDARY: usize = 20; -fn get_node_type(node: NodeView) -> String { +fn get_node_type<'graph, G: GraphViewOps<'graph>>(node: NodeView) -> String { let prop = node.properties().get("type"); prop.map(|prop| prop.to_string()) .unwrap_or_else(|| "NONE".to_string()) diff --git a/raphtory/src/algorithms/community_detection/modularity.rs b/raphtory/src/algorithms/community_detection/modularity.rs index c20d027d46..548f99ab5d 100644 --- a/raphtory/src/algorithms/community_detection/modularity.rs +++ b/raphtory/src/algorithms/community_detection/modularity.rs @@ -197,7 +197,7 @@ impl ModularityFunction for ModularityUnDir { let w = weight_prop .map(|w| e.properties().get(w).unwrap_f64()) .unwrap_or(1.0); - let dst_id = local_id_map[&e.nbr()]; + let dst_id = local_id_map[&e.nbr().cloned()]; (dst_id, w) }) .filter(|(_, w)| w >= &tol) diff --git a/raphtory/src/db/api/view/edge.rs b/raphtory/src/db/api/view/edge.rs index 2cb8367d76..78fcd38bdc 100644 --- a/raphtory/src/db/api/view/edge.rs +++ b/raphtory/src/db/api/view/edge.rs @@ -436,7 +436,7 @@ mod test_edge_view { // FIXME: boolean properties not supported yet (Issue #48) test_graph(&graph, |graph| { - let mut exploded_edges: Vec<_> = graph.edges().explode().iter().collect(); + let mut exploded_edges: Vec<_> = graph.edges().explode().into_iter().collect(); exploded_edges.sort_by_key(|a| a.time_and_index()); let res: Vec<_> = exploded_edges diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 5c164068b9..d7cda0a366 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -27,7 +27,7 @@ use crate::{ storage::graph::edges::edge_storage_ops::EdgeStorageOps, view::{ internal::{OneHopFilter, Static}, - BaseEdgeViewOps, IntoDynBoxed, StaticGraphViewOps, + Base, BaseEdgeViewOps, IntoDynBoxed, StaticGraphViewOps, }, }, graph::{edges::Edges, node::NodeView}, @@ -41,7 +41,7 @@ use std::{ }; /// A view of an edge in the graph. -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct EdgeView { pub base_graph: G, /// A view of an edge in the graph. @@ -63,6 +63,32 @@ impl<'graph, G: GraphViewOps<'graph>> EdgeView { } } +impl EdgeView<&G, &GH> { + pub fn cloned(&self) -> EdgeView { + let graph = self.graph.clone(); + let base_graph = self.base_graph.clone(); + let edge = self.edge; + EdgeView { + base_graph, + graph, + edge, + } + } +} + +impl EdgeView { + pub fn as_ref(&self) -> EdgeView<&G, &GH> { + let graph = &self.graph; + let base_graph = &self.base_graph; + let edge = self.edge; + EdgeView { + base_graph, + graph, + edge, + } + } +} + impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> EdgeView { pub(crate) fn new_filtered(base_graph: G, graph: GH, edge: EdgeRef) -> Self { Self { diff --git a/raphtory/src/db/graph/edges.rs b/raphtory/src/db/graph/edges.rs index c17ba5b0f0..84d879167d 100644 --- a/raphtory/src/db/graph/edges.rs +++ b/raphtory/src/db/graph/edges.rs @@ -55,10 +55,10 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> OneHopFilter<'gr } impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> Edges<'graph, G, GH> { - pub fn iter(&self) -> impl Iterator> + 'graph { - let base_graph = self.base_graph.clone(); - let graph = self.graph.clone(); - (self.edges)().map(move |e| EdgeView::new_filtered(base_graph.clone(), graph.clone(), e)) + pub fn iter(&self) -> impl Iterator> + '_ { + let base_graph = &self.base_graph; + let graph = &self.graph; + (self.edges)().map(move |e| EdgeView::new_filtered(base_graph, graph, e)) } pub fn len(&self) -> usize { @@ -70,7 +70,7 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> Edges<'graph, G, } pub fn collect(&self) -> Vec> { - self.iter().collect() + self.iter().map(|e| e.cloned()).collect() } pub fn get_const_prop_id(&self, prop_name: &str) -> Option { diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index e5d5707737..abdf14f84b 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -1655,7 +1655,13 @@ mod db_tests { .add_edge(3, 1, 2, vec![("weight".to_string(), Prop::I64(3))], None) .unwrap(); test_storage!(&graph, |graph| { - let e = graph.node(1).unwrap().out_edges().iter().next().unwrap(); + let e = graph + .node(1) + .unwrap() + .out_edges() + .into_iter() + .next() + .unwrap(); let res: HashMap> = e .window(1, 3) .properties() @@ -1900,7 +1906,7 @@ mod db_tests { .explode_layers() .iter() .flat_map(|e| { - e.explode().iter().filter_map(|e| { + e.explode().into_iter().filter_map(|e| { e.edge .layer() .zip(e.time().ok()) @@ -1936,7 +1942,7 @@ mod db_tests { .explode_layers() .iter() .flat_map(|e| { - e.explode().iter().filter_map(|e| { + e.explode().into_iter().filter_map(|e| { e.edge .layer() .zip(Some(e.time().unwrap())) diff --git a/raphtory/src/db/task/edge/eval_edges.rs b/raphtory/src/db/task/edge/eval_edges.rs index abbfe2b868..82bec08b3e 100644 --- a/raphtory/src/db/task/edge/eval_edges.rs +++ b/raphtory/src/db/task/edge/eval_edges.rs @@ -91,13 +91,16 @@ impl< let ss = self.ss; let local_state_prev = self.local_state_prev; let storage = self.storage; - self.edges.iter().map(move |edge| EvalEdgeView { - ss, - edge, - storage, - node_state: node_state.clone(), - local_state_prev, - }) + self.edges + .clone() + .into_iter() + .map(move |edge| EvalEdgeView { + ss, + edge, + storage, + node_state: node_state.clone(), + local_state_prev, + }) } } @@ -118,7 +121,7 @@ impl< let ss = self.ss; let local_state_prev = self.local_state_prev; let storage = self.storage; - Box::new(self.edges.iter().map(move |edge| EvalEdgeView { + Box::new(self.edges.into_iter().map(move |edge| EvalEdgeView { ss, edge, storage, diff --git a/raphtory/src/python/graph/edge.rs b/raphtory/src/python/graph/edge.rs index 219e65a0ca..96c885a9eb 100644 --- a/raphtory/src/python/graph/edge.rs +++ b/raphtory/src/python/graph/edge.rs @@ -71,6 +71,12 @@ impl for EdgeView<&DynamicGraph, &DynamicGraph> { + fn into_py(self, py: Python<'_>) -> PyObject { + self.cloned().into_py(py) + } +} + impl + StaticGraphViewOps> From> for PyMutableEdge { fn from(value: EdgeView) -> Self { let edge = EdgeView { diff --git a/raphtory/src/python/graph/edges.rs b/raphtory/src/python/graph/edges.rs index 0f6866e6ba..22d97ddb69 100644 --- a/raphtory/src/python/graph/edges.rs +++ b/raphtory/src/python/graph/edges.rs @@ -50,7 +50,8 @@ impl_iterable_mixin!( edges, Vec>, "list[Edge]", - "edge" + "edge", + |edges: &Edges<'static, DynamicGraph>| edges.clone().into_iter() ); impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> Repr for Edges<'graph, G, GH> { @@ -77,18 +78,11 @@ impl } } -impl PyEdges { - /// an iterable that can be used in rust - fn iter(&self) -> BoxedIter> { - self.edges.iter().into_dyn_boxed() - } -} - #[pymethods] impl PyEdges { /// Returns the number of edges fn count(&self) -> usize { - self.iter().count() + self.edges.len() } /// Returns the earliest time of the edges. @@ -340,7 +334,7 @@ impl PyEdges { impl Repr for PyEdges { fn repr(&self) -> String { - format!("Edges({})", iterator_repr(self.iter())) + format!("Edges({})", iterator_repr(self.edges.iter())) } } diff --git a/raphtory/src/python/packages/vectors.rs b/raphtory/src/python/packages/vectors.rs index 957e4c02a0..de8059f7ff 100644 --- a/raphtory/src/python/packages/vectors.rs +++ b/raphtory/src/python/packages/vectors.rs @@ -159,7 +159,7 @@ pub fn into_py_document( } /// This funtions ignores the time history of temporal props if their type is Document and they have a life different than Lifespan::Inherited -fn get_documents_from_props( +fn get_documents_from_props( properties: Properties

, name: &str, ) -> Box> { @@ -167,7 +167,7 @@ fn get_documents_from_props( match prop { Some(prop) => { - let props = prop.iter(); + let props = prop.into_iter(); let docs = props .map(|(time, prop)| prop_to_docs(&prop, Lifespan::event(time)).collect_vec()) .flatten(); diff --git a/raphtory/src/python/types/macros/trait_impl/iterable_mixin.rs b/raphtory/src/python/types/macros/trait_impl/iterable_mixin.rs index 2134df48d5..184c346b45 100644 --- a/raphtory/src/python/types/macros/trait_impl/iterable_mixin.rs +++ b/raphtory/src/python/types/macros/trait_impl/iterable_mixin.rs @@ -7,6 +7,31 @@ /// * collect_py_return_type: The python return type (as a string) used for documentation /// * element_name: The python element name (as a string) used for documentation macro_rules! impl_iterable_mixin { + ($obj:ty, $field:ident, $collect_return_type:ty, $collect_py_return_type:literal, $element_name:literal, $iter:expr) => { + #[pymethods] + impl $obj { + fn __len__(&self) -> usize { + self.$field.len() + } + + fn __bool__(&self) -> bool { + !self.$field.is_empty() + } + + fn __iter__(&self) -> $crate::python::utils::PyGenericIterator { + ($iter)(&self.$field).into() + } + + #[doc = concat!(" Collect all ", $element_name, "s into a list")] + /// + /// Returns: + #[doc = concat!(" ", $collect_py_return_type, ": the list of ", $element_name, "s")] + fn collect(&self) -> $collect_return_type { + self.$field.collect() + } + } + }; + ($obj:ty, $field:ident, $collect_return_type:ty, $collect_py_return_type:literal, $element_name:literal) => { #[pymethods] impl $obj { diff --git a/raphtory/src/vectors/document_ref.rs b/raphtory/src/vectors/document_ref.rs index a29d4434ea..420e444375 100644 --- a/raphtory/src/vectors/document_ref.rs +++ b/raphtory/src/vectors/document_ref.rs @@ -119,7 +119,7 @@ impl DocumentRef { src: original_graph.node(src).unwrap().name(), dst: original_graph.node(dst).unwrap().name(), content: template - .edge(&original_graph.edge(src, dst).unwrap()) + .edge(original_graph.edge(src, dst).unwrap().as_ref()) .nth(self.index) .unwrap() .content, diff --git a/raphtory/src/vectors/entity_id.rs b/raphtory/src/vectors/entity_id.rs index af12bc409a..0940736259 100644 --- a/raphtory/src/vectors/entity_id.rs +++ b/raphtory/src/vectors/entity_id.rs @@ -31,7 +31,7 @@ impl EntityId { Self::Node { id: node.id() } } - pub(crate) fn from_edge(edge: &EdgeView) -> Self { + pub(crate) fn from_edge(edge: EdgeView<&G>) -> Self { Self::Edge { src: edge.src().id(), dst: edge.dst().id(), diff --git a/raphtory/src/vectors/mod.rs b/raphtory/src/vectors/mod.rs index f40f143871..907f57a46a 100644 --- a/raphtory/src/vectors/mod.rs +++ b/raphtory/src/vectors/mod.rs @@ -237,7 +237,7 @@ mod vector_tests { let template = custom_template(); let doc: DocumentInput = template - .edge(&g.edge("Frodo", "Gandalf").unwrap()) + .edge(g.edge("Frodo", "Gandalf").unwrap().as_ref()) .next() .unwrap() .into(); @@ -261,7 +261,7 @@ mod vector_tests { // .map(|text| text.into()), // ) // } - // fn edge(&self, _edge: &EdgeView) -> Box> { + // fn edge(&self, _edge: EdgeView<&G, &G>) -> Box> { // Box::new(std::iter::empty()) // } // } diff --git a/raphtory/src/vectors/vectorisable.rs b/raphtory/src/vectors/vectorisable.rs index 159e0af1de..31f46084dd 100644 --- a/raphtory/src/vectors/vectorisable.rs +++ b/raphtory/src/vectors/vectorisable.rs @@ -1,5 +1,6 @@ use crate::{ db::api::view::{internal::IntoDynamic, StaticGraphViewOps}, + prelude::GraphViewOps, vectors::{ document_ref::DocumentRef, embedding_cache::EmbeddingCache, entity_id::EntityId, template::DocumentTemplate, vectorised_graph::VectorisedGraph, EmbeddingFunction, Lifespan, @@ -73,12 +74,13 @@ impl Vectorisable for G { life: doc.life, }) }); - let edges = self.edges().iter().flat_map(|edge| { + let edges = self.edges(); + let edges_iter = edges.iter().flat_map(|edge| { template - .edge(&edge) + .edge(edge) .enumerate() .map(move |(index, doc)| IndexedDocumentInput { - entity_id: EntityId::from_edge(&edge), + entity_id: EntityId::from_edge(edge), content: doc.content, index, life: doc.life, @@ -106,7 +108,8 @@ impl Vectorisable for G { if verbose { println!("computing embeddings for edges"); } - let edge_refs = compute_embedding_groups(edges, embedding.as_ref(), &cache_storage).await; // FIXME: re-enable + let edge_refs = + compute_embedding_groups(edges_iter, embedding.as_ref(), &cache_storage).await; // FIXME: re-enable if overwrite_cache { cache_storage.iter().for_each(|cache| cache.dump_to_disk()); From 25de9637650bc8b45571d056a9c15ede349942fb Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 11 Sep 2024 16:49:14 +0200 Subject: [PATCH 05/18] fix issues after rebase --- raphtory-graphql/src/model/schema/mod.rs | 2 +- raphtory/src/db/api/view/graph.rs | 3 +- raphtory/src/db/graph/edge.rs | 2 +- raphtory/src/python/graph/edges.rs | 3 +- .../src/python/types/wrappers/document.rs | 2 -- raphtory/src/vectors/document_ref.rs | 2 +- raphtory/src/vectors/entity_id.rs | 6 ++-- raphtory/src/vectors/mod.rs | 6 +--- raphtory/src/vectors/template.rs | 31 +++++++++---------- raphtory/src/vectors/vector_selection.rs | 29 +++++++++-------- raphtory/src/vectors/vectorisable.rs | 11 ++++--- 11 files changed, 46 insertions(+), 51 deletions(-) diff --git a/raphtory-graphql/src/model/schema/mod.rs b/raphtory-graphql/src/model/schema/mod.rs index 6a13040e3f..557bdac81a 100644 --- a/raphtory-graphql/src/model/schema/mod.rs +++ b/raphtory-graphql/src/model/schema/mod.rs @@ -1,5 +1,5 @@ use raphtory::{ - db::{api::view::StaticGraphViewOps, graph::node::NodeView}, + db::graph::node::NodeView, prelude::{GraphViewOps, NodeViewOps}, }; use std::collections::{HashMap, HashSet}; diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 176bf6ee61..2eb3d53965 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -10,7 +10,6 @@ use crate::{ properties::{internal::TemporalPropertiesOps, Properties}, storage::graph::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, - storage_ops::GraphStorage, }, view::{internal::*, *}, }, @@ -24,7 +23,7 @@ use crate::{ }, }, }, - prelude::{DeletionOps, NO_PROPS}, + prelude::DeletionOps, }; use chrono::{DateTime, Utc}; use itertools::Itertools; diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index d7cda0a366..5a9d94b886 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -27,7 +27,7 @@ use crate::{ storage::graph::edges::edge_storage_ops::EdgeStorageOps, view::{ internal::{OneHopFilter, Static}, - Base, BaseEdgeViewOps, IntoDynBoxed, StaticGraphViewOps, + BaseEdgeViewOps, IntoDynBoxed, StaticGraphViewOps, }, }, graph::{edges::Edges, node::NodeView}, diff --git a/raphtory/src/python/graph/edges.rs b/raphtory/src/python/graph/edges.rs index 22d97ddb69..c27a2e5bde 100644 --- a/raphtory/src/python/graph/edges.rs +++ b/raphtory/src/python/graph/edges.rs @@ -2,8 +2,7 @@ use crate::{ core::{utils::errors::GraphError, Prop}, db::{ api::view::{ - internal::CoreGraphOps, BoxedIter, DynamicGraph, IntoDynBoxed, IntoDynamic, - StaticGraphViewOps, + internal::CoreGraphOps, DynamicGraph, IntoDynBoxed, IntoDynamic, StaticGraphViewOps, }, graph::{ edge::EdgeView, diff --git a/raphtory/src/python/types/wrappers/document.rs b/raphtory/src/python/types/wrappers/document.rs index 931863d401..07fc9581a6 100644 --- a/raphtory/src/python/types/wrappers/document.rs +++ b/raphtory/src/python/types/wrappers/document.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use crate::{ core::{DocumentInput, Lifespan}, vectors::Embedding, diff --git a/raphtory/src/vectors/document_ref.rs b/raphtory/src/vectors/document_ref.rs index 420e444375..cb45c8a967 100644 --- a/raphtory/src/vectors/document_ref.rs +++ b/raphtory/src/vectors/document_ref.rs @@ -108,7 +108,7 @@ impl DocumentRef { EntityId::Node { id } => Document::Node { name: original_graph.node(id).unwrap().name(), content: template - .node(&original_graph.node(id).unwrap()) + .node((&&original_graph).node(id).unwrap()) .nth(self.index) .unwrap() .content, diff --git a/raphtory/src/vectors/entity_id.rs b/raphtory/src/vectors/entity_id.rs index 0940736259..477ff638ef 100644 --- a/raphtory/src/vectors/entity_id.rs +++ b/raphtory/src/vectors/entity_id.rs @@ -3,7 +3,7 @@ use crate::{ api::view::StaticGraphViewOps, graph::{edge::EdgeView, node::NodeView}, }, - prelude::{EdgeViewOps, NodeViewOps}, + prelude::{EdgeViewOps, GraphViewOps, NodeViewOps}, }; use raphtory_api::core::entities::GID; use serde::{Deserialize, Serialize}; @@ -27,11 +27,11 @@ impl EntityId { } } - pub(crate) fn from_node(node: &NodeView) -> Self { + pub(crate) fn from_node<'graph, G: GraphViewOps<'graph>>(node: NodeView) -> Self { Self::Node { id: node.id() } } - pub(crate) fn from_edge(edge: EdgeView<&G>) -> Self { + pub(crate) fn from_edge<'graph, G: GraphViewOps<'graph>>(edge: EdgeView) -> Self { Self::Edge { src: edge.src().id(), dst: edge.dst().id(), diff --git a/raphtory/src/vectors/mod.rs b/raphtory/src/vectors/mod.rs index 907f57a46a..8b46c1c438 100644 --- a/raphtory/src/vectors/mod.rs +++ b/raphtory/src/vectors/mod.rs @@ -107,10 +107,6 @@ mod vector_tests { use super::*; use crate::{ core::Prop, - db::{ - api::view::StaticGraphViewOps, - graph::{edge::EdgeView, node::NodeView}, - }, prelude::{AdditionOps, EdgeViewOps, Graph, GraphViewOps, NodeViewOps}, vectors::{embeddings::openai_embedding, vectorisable::Vectorisable}, }; @@ -220,7 +216,7 @@ mod vector_tests { let template = custom_template(); let doc: DocumentInput = template - .node(&g.node("Frodo").unwrap()) + .node(g.node("Frodo").unwrap()) .next() .unwrap() .into(); diff --git a/raphtory/src/vectors/template.rs b/raphtory/src/vectors/template.rs index 2a1c3c938f..5e85801cff 100644 --- a/raphtory/src/vectors/template.rs +++ b/raphtory/src/vectors/template.rs @@ -6,24 +6,23 @@ use raphtory_api::core::storage::arc_str::ArcStr; use serde::Serialize; use std::sync::Arc; +use super::datetimeformat::datetimeformat; use crate::{ core::{DocumentInput, Prop}, db::{ api::{properties::TemporalPropertyView, view::StaticGraphViewOps}, graph::{edge::EdgeView, node::NodeView}, }, - prelude::{EdgeViewOps, NodeViewOps}, + prelude::{EdgeViewOps, GraphViewOps, NodeViewOps}, }; -use super::datetimeformat::datetimeformat; - #[derive(Debug)] struct PropUpdate { time: i64, value: Value, } -impl From>> for Value { +impl<'graph, G: GraphViewOps<'graph>> From>> for Value { fn from(value: TemporalPropertyView>) -> Self { value .iter() @@ -73,8 +72,8 @@ struct NodeTemplateContext { temporal_props: Value, } -impl From<&NodeView> for NodeTemplateContext { - fn from(value: &NodeView) -> Self { +impl<'graph, G: GraphViewOps<'graph>> From> for NodeTemplateContext { + fn from(value: NodeView) -> Self { Self { name: value.name(), node_type: value.node_type(), @@ -193,9 +192,9 @@ impl DocumentTemplate { } /// A function that translate a node into an iterator of documents - pub(crate) fn node( + pub(crate) fn node<'graph, G: GraphViewOps<'graph>>( &self, - node: &NodeView, + node: NodeView, ) -> Box> { match &self.node_template { Some(template) => { @@ -216,7 +215,7 @@ impl DocumentTemplate { /// A function that translate an edge into an iterator of documents pub(crate) fn edge( &self, - edge: &EdgeView, + edge: EdgeView<&G, &G>, ) -> Box> { match &self.edge_template { Some(template) => { @@ -255,11 +254,11 @@ struct EdgeTemplateContext { props: Value, } -impl From<&EdgeView> for EdgeTemplateContext { - fn from(value: &EdgeView) -> Self { +impl From> for EdgeTemplateContext { + fn from(value: EdgeView<&G>) -> Self { Self { - src: (&value.src()).into(), - dst: (&value.dst()).into(), + src: (value.src()).into(), + dst: (value.dst()).into(), history: value.history(), layers: value .layer_names() @@ -326,7 +325,7 @@ mod template_tests { edge_template: None, }; - let mut docs = template.node(&graph.node("node1").unwrap()); + let mut docs = template.node(graph.node("node1").unwrap()); let rendered = docs.next().unwrap().content; let expected = indoc! {" node node1 is an unknown entity with the following props: @@ -340,7 +339,7 @@ mod template_tests { "}; assert_eq!(&rendered, expected); - let mut docs = template.node(&graph.node("node2").unwrap()); + let mut docs = template.node(graph.node("node2").unwrap()); let rendered = docs.next().unwrap().content; let expected = indoc! {" node node2 is a person with the following props: @@ -364,7 +363,7 @@ mod template_tests { edge_template: None, }; - let mut docs = template.node(&graph.node("node1").unwrap()); + let mut docs = template.node(graph.node("node1").unwrap()); let rendered = docs.next().unwrap().content; let expected = "September 9 2024 09:08:01"; assert_eq!(&rendered, expected); diff --git a/raphtory/src/vectors/vector_selection.rs b/raphtory/src/vectors/vector_selection.rs index 3105a0f212..1fdec33cce 100644 --- a/raphtory/src/vectors/vector_selection.rs +++ b/raphtory/src/vectors/vector_selection.rs @@ -109,7 +109,7 @@ impl VectorSelection { .flat_map(|id| { let node = self.graph.source_graph.node(id); let opt = - node.map(|node| self.graph.node_documents.get(&EntityId::from_node(&node))); + node.map(|node| self.graph.node_documents.get(&EntityId::from_node(node))); opt.flatten().unwrap_or(&self.graph.empty_vec) }) .map(|doc| (doc.clone(), 0.0)); @@ -127,8 +127,11 @@ impl VectorSelection { .into_iter() .flat_map(|(src, dst)| { let edge = self.graph.source_graph.edge(src, dst); - let opt = - edge.map(|edge| self.graph.edge_documents.get(&EntityId::from_edge(&edge))); + let opt = edge.map(|edge| { + self.graph + .edge_documents + .get(&EntityId::from_edge(edge.as_ref())) + }); opt.flatten().unwrap_or(&self.graph.empty_vec) }) .map(|doc| (doc.clone(), 0.0)); @@ -450,8 +453,8 @@ impl VectorSelection { None => Box::new(std::iter::empty()), Some(node) => { let edges = node.edges(); - let edge_docs = edges.iter().flat_map(|edge| { - let edge_id = EntityId::from_edge(&edge); + let edge_docs = edges.into_iter().flat_map(|edge| { + let edge_id = EntityId::from_edge(edge); self.graph .edge_documents .get(&edge_id) @@ -474,8 +477,8 @@ impl VectorSelection { match windowed_graph.edge(src, dst) { None => Box::new(std::iter::empty()), Some(edge) => { - let src_id = EntityId::from_node(&edge.src()); - let dst_id = EntityId::from_node(&edge.dst()); + let src_id = EntityId::from_node(edge.src()); + let dst_id = EntityId::from_node(edge.dst()); let src_docs = self .graph .node_documents @@ -505,7 +508,7 @@ impl VectorSelection { ) -> Box)> + '_> { let groups = nodes .map(move |node| { - let entity_id = EntityId::from_node(&node); + let entity_id = EntityId::from_node(node); self.graph.node_documents.get(&entity_id).map(|group| { let docs = group .iter() @@ -528,7 +531,7 @@ impl VectorSelection { ) -> Box)> + '_> { let groups = edges .map(move |edge| { - let entity_id = EntityId::from_edge(&edge); + let entity_id = EntityId::from_edge(edge); self.graph.edge_documents.get(&entity_id).map(|group| { let docs = group .iter() @@ -579,15 +582,15 @@ impl VectorSelection { EntityId::Node { id } => match windowed_graph.node(id) { None => Box::new(std::iter::empty()), Some(node) => { - let edges = node.edges().iter(); - self.edges_into_document_groups(edges, windowed_graph, window) + let edges = node.edges(); + self.edges_into_document_groups(edges.into_iter(), windowed_graph, window) } }, EntityId::Edge { src, dst } => match windowed_graph.edge(src, dst) { None => Box::new(std::iter::empty()), Some(edge) => { - let src_edges = edge.src().edges().iter(); - let dst_edges = edge.dst().edges().iter(); + let src_edges = edge.src().edges().into_iter(); + let dst_edges = edge.dst().edges().into_iter(); let edges = chain!(src_edges, dst_edges); self.edges_into_document_groups(edges, windowed_graph, window) } diff --git a/raphtory/src/vectors/vectorisable.rs b/raphtory/src/vectors/vectorisable.rs index 31f46084dd..1d266b2d12 100644 --- a/raphtory/src/vectors/vectorisable.rs +++ b/raphtory/src/vectors/vectorisable.rs @@ -1,6 +1,5 @@ use crate::{ db::api::view::{internal::IntoDynamic, StaticGraphViewOps}, - prelude::GraphViewOps, vectors::{ document_ref::DocumentRef, embedding_cache::EmbeddingCache, entity_id::EntityId, template::DocumentTemplate, vectorised_graph::VectorisedGraph, EmbeddingFunction, Lifespan, @@ -63,12 +62,13 @@ impl Vectorisable for G { index, life: doc.life, }); - let nodes = self.nodes().iter_owned().flat_map(|node| { + let nodes = self.nodes(); + let nodes_iter = nodes.iter().flat_map(|node| { template - .node(&node) + .node(node) .enumerate() .map(move |(index, doc)| IndexedDocumentInput { - entity_id: EntityId::from_node(&node), + entity_id: EntityId::from_node(node), content: doc.content, index, life: doc.life, @@ -103,7 +103,8 @@ impl Vectorisable for G { if verbose { println!("computing embeddings for nodes"); } - let node_refs = compute_embedding_groups(nodes, embedding.as_ref(), &cache_storage).await; + let node_refs = + compute_embedding_groups(nodes_iter, embedding.as_ref(), &cache_storage).await; if verbose { println!("computing embeddings for edges"); From 45104b3bbc08073e61a92f52e318cd8239025341 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 11 Sep 2024 17:48:38 +0200 Subject: [PATCH 06/18] don't reorder the node ids --- raphtory/src/core/storage/mod.rs | 10 +- raphtory/src/db/api/view/graph.rs | 134 ++++++++++--------- raphtory/src/db/graph/views/node_subgraph.rs | 6 +- 3 files changed, 81 insertions(+), 69 deletions(-) diff --git a/raphtory/src/core/storage/mod.rs b/raphtory/src/core/storage/mod.rs index bd8a4269ba..171b327c71 100644 --- a/raphtory/src/core/storage/mod.rs +++ b/raphtory/src/core/storage/mod.rs @@ -195,6 +195,7 @@ impl NodeStorage { pub(crate) fn write_lock(&self) -> WriteLockedNodes { WriteLockedNodes { guards: self.data.iter().map(|lock| lock.data.write()).collect(), + global_len: &self.len, } } @@ -304,12 +305,14 @@ impl NodeStorage { pub struct WriteLockedNodes<'a> { guards: Vec>>, + global_len: &'a AtomicUsize, } pub struct NodeShardWriter<'a> { shard: &'a mut Vec, shard_id: usize, num_shards: usize, + global_len: &'a AtomicUsize, } impl<'a> NodeShardWriter<'a> { @@ -328,6 +331,8 @@ impl<'a> NodeShardWriter<'a> { if let Some(offset) = self.resolve(vid) { if offset >= self.shard.len() { self.shard.resize_with(offset + 1, NodeStore::default); + self.global_len + .fetch_max(vid.index() + 1, Ordering::Relaxed); } self.shard[offset] = NodeStore::resolved(gid.to_owned(), vid); } @@ -343,7 +348,8 @@ impl<'a> NodeShardWriter<'a> { new_len += 1; } if new_len > self.shard.len() { - self.shard.resize_with(new_len, Default::default) + self.shard.resize_with(new_len, Default::default); + self.global_len.fetch_max(new_global_len, Ordering::Relaxed); } } } @@ -351,6 +357,7 @@ impl<'a> NodeShardWriter<'a> { impl<'a> WriteLockedNodes<'a> { pub fn par_iter_mut(&mut self) -> impl IndexedParallelIterator + '_ { let num_shards = self.guards.len(); + let global_len = &self.global_len; let shards: Vec<&mut Vec> = self .guards .iter_mut() @@ -363,6 +370,7 @@ impl<'a> WriteLockedNodes<'a> { shard, shard_id, num_shards, + global_len, }) } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 2eb3d53965..d979f512ed 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -27,17 +27,23 @@ use crate::{ }; use chrono::{DateTime, Utc}; use itertools::Itertools; -use raphtory_api::core::{ - entities::EID, - storage::{ - arc_str::{ArcStr, OptionAsStr}, - timeindex::TimeIndexEntry, +use raphtory_api::{ + atomic_extra::atomic_usize_from_mut_slice, + core::{ + entities::EID, + storage::{ + arc_str::{ArcStr, OptionAsStr}, + timeindex::TimeIndexEntry, + }, + Direction, }, - Direction, }; use rayon::prelude::*; use rustc_hash::FxHashSet; -use std::{borrow::Borrow, sync::Arc}; +use std::{ + borrow::Borrow, + sync::{atomic::Ordering, Arc}, +}; /// This trait GraphViewOps defines operations for accessing /// information about a graph. The trait has associated types @@ -189,35 +195,66 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> } }; - let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; - storage - .nodes_par_opt(self, None) - .zip(node_map.par_iter_mut()) - .try_for_each(|(node, entry)| { - if let Some(node) = node { - let node_type_id = node.node_type_id(); - let new_id = if node_type_id != 0 { - g.resolve_node_and_type( - node.id(), - self.node_meta() - .get_node_type_name_by_id(node_type_id) - .as_str() - .unwrap(), - )? - .inner() - .0 - .inner() - } else { - g.resolve_node(node.id())?.inner() - }; - *entry = new_id; + { + // scope for the write lock + let mut new_storage = g.write_lock()?; + new_storage.nodes.resize(self.count_nodes()); + + let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; + let node_map_shared = + atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + + new_storage.nodes.par_iter_mut().try_for_each(|mut shard| { + for (index, node) in self.nodes().iter().enumerate() { + let new_id = VID(index); + let gid = node.id(); + if let Some(new_node) = shard.get_mut(new_id) { + node_map_shared[node.node.index()].store(index, Ordering::Relaxed); + if let Some(node_type) = node.node_type() { + let new_type_id = g + .node_meta + .node_type_meta() + .get_or_create_id(&node_type) + .inner(); + new_node.node_type = new_type_id; + } + new_node.vid = new_id; + g.logical_to_physical.set((&gid).into(), new_id)?; + new_node.global_id = gid; + + if let Some(earliest) = node.earliest_time() { + // explicitly add node earliest_time to handle PersistentGraph + new_node.update_time(TimeIndexEntry::start(earliest)) + } + for t in node.history() { + new_node.update_time(TimeIndexEntry::start(t)); + } + for prop_id in node.temporal_prop_ids() { + let prop_name = self.node_meta().temporal_prop_meta().get_name(prop_id); + let prop_type = self + .node_meta() + .temporal_prop_meta() + .get_dtype(prop_id) + .unwrap(); + let new_prop_id = g + .resolve_node_property(&prop_name, prop_type, false)? + .inner(); + for (t, prop_value) in self.temporal_node_prop_hist(node.node, prop_id) + { + new_node.add_prop(t, new_prop_id, prop_value)?; + } + } + for (c_prop_name, prop_value) in node.properties().constant().iter() { + let prop_id = g + .resolve_node_property(&c_prop_name, prop_value.dtype(), true)? + .inner(); + new_node.add_constant_prop(prop_id, prop_value)?; + } + } } Ok::<(), GraphError>(()) })?; - { - // scope for the write lock - let mut new_storage = g.write_lock()?; new_storage.edges.par_iter_mut().try_for_each(|mut shard| { for (eid, edge) in self.edges().iter().enumerate() { if let Some(mut new_edge) = shard.get_mut(EID(eid)) { @@ -306,39 +343,6 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> } } } - let nodes = self.nodes(); - for node in nodes.iter() { - if let Some(new_node) = shard.get_mut(node_map[node.node.index()]) { - if let Some(earliest) = node.earliest_time() { - // explicitly add node earliest_time to handle PersistentGraph - new_node.update_time(TimeIndexEntry::start(earliest)) - } - for t in node.history() { - new_node.update_time(TimeIndexEntry::start(t)); - } - for prop_id in node.temporal_prop_ids() { - let prop_name = self.node_meta().temporal_prop_meta().get_name(prop_id); - let prop_type = self - .node_meta() - .temporal_prop_meta() - .get_dtype(prop_id) - .unwrap(); - let new_prop_id = g - .resolve_node_property(&prop_name, prop_type, false)? - .inner(); - for (t, prop_value) in self.temporal_node_prop_hist(node.node, prop_id) - { - new_node.add_prop(t, new_prop_id, prop_value)?; - } - } - for (c_prop_name, prop_value) in node.properties().constant().iter() { - let prop_id = g - .resolve_node_property(&c_prop_name, prop_value.dtype(), true)? - .inner(); - new_node.add_constant_prop(prop_id, prop_value)?; - } - } - } Ok::<(), GraphError>(()) })?; diff --git a/raphtory/src/db/graph/views/node_subgraph.rs b/raphtory/src/db/graph/views/node_subgraph.rs index e0adb05f70..a86a321ea0 100644 --- a/raphtory/src/db/graph/views/node_subgraph.rs +++ b/raphtory/src/db/graph/views/node_subgraph.rs @@ -102,8 +102,8 @@ impl<'graph, G: GraphViewOps<'graph>> NodeFilterOps for NodeSubgraph { #[cfg(test)] mod subgraph_tests { use crate::{ - algorithms::motifs::triangle_count::triangle_count, prelude::*, test_storage, - test_utils::test_graph, + algorithms::motifs::triangle_count::triangle_count, db::graph::graph::assert_graph_equal, + prelude::*, test_storage, test_utils::test_graph, }; use itertools::Itertools; @@ -119,7 +119,7 @@ mod subgraph_tests { let sg = graph.subgraph([1, 2]); let actual = sg.materialize().unwrap().into_events().unwrap(); - assert_eq!(actual, sg); + assert_graph_equal(&actual, &sg); }); } From 5d0ef865be6a699af3b60d30f606d036b5af3454 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 12 Sep 2024 16:05:49 +0200 Subject: [PATCH 07/18] add protobuf decoding and encoding benchmarks --- raphtory-benchmark/Cargo.toml | 10 +++++++++- raphtory-benchmark/benches/proto_decode.rs | 14 ++++++++++++++ raphtory-benchmark/benches/proto_encode.rs | 14 ++++++++++++++ raphtory-benchmark/src/common/mod.rs | 16 ++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 raphtory-benchmark/benches/proto_decode.rs create mode 100644 raphtory-benchmark/benches/proto_encode.rs diff --git a/raphtory-benchmark/Cargo.toml b/raphtory-benchmark/Cargo.toml index 8306680c25..8aa8c0bf8a 100644 --- a/raphtory-benchmark/Cargo.toml +++ b/raphtory-benchmark/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] criterion = { workspace = true } -raphtory = { path = "../raphtory", features = ["io"], version = "0.11.3" } +raphtory = { path = "../raphtory", features = ["io", "proto"], version = "0.11.3" } raphtory-api = { path = "../raphtory-api", version = "0.11.3" } pometry-storage.workspace = true sorted_vector_map = { workspace = true } @@ -46,6 +46,14 @@ harness = false name = "materialise" harness = false +[[bench]] +name = "proto_encode" +harness = false + +[[bench]] +name = "proto_decode" +harness = false + # [[bench]] # name = "arrow_algobench" # harness = false diff --git a/raphtory-benchmark/benches/proto_decode.rs b/raphtory-benchmark/benches/proto_decode.rs new file mode 100644 index 0000000000..d2f06b10ef --- /dev/null +++ b/raphtory-benchmark/benches/proto_decode.rs @@ -0,0 +1,14 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use raphtory::graph_loader::sx_superuser_graph::sx_superuser_graph; +use raphtory_benchmark::common::run_proto_decode_benchmark; + +fn bench(c: &mut Criterion) { + let graph = sx_superuser_graph().unwrap(); + let mut group = c.benchmark_group("proto_sx_superuser"); + group.sample_size(10); + run_proto_decode_benchmark(&mut group, graph); + group.finish(); +} + +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/raphtory-benchmark/benches/proto_encode.rs b/raphtory-benchmark/benches/proto_encode.rs new file mode 100644 index 0000000000..fc3df41047 --- /dev/null +++ b/raphtory-benchmark/benches/proto_encode.rs @@ -0,0 +1,14 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use raphtory::graph_loader::sx_superuser_graph::sx_superuser_graph; +use raphtory_benchmark::common::run_proto_encode_benchmark; + +fn bench(c: &mut Criterion) { + let graph = sx_superuser_graph().unwrap(); + let mut group = c.benchmark_group("proto_sx_superuser"); + group.sample_size(10); + run_proto_encode_benchmark(&mut group, graph); + group.finish(); +} + +criterion_group!(benches, bench); +criterion_main!(benches); diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index 084fef6e58..bb66919a98 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -6,6 +6,7 @@ use criterion::{ use rand::{distributions::Uniform, seq::*, Rng, SeedableRng}; use raphtory::{db::api::view::StaticGraphViewOps, prelude::*}; use std::collections::HashSet; +use tempfile::NamedTempFile; fn make_index_gen() -> Box> { let rng = rand::thread_rng(); @@ -570,6 +571,21 @@ pub fn run_graph_ops_benches( ); } +pub fn run_proto_encode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { + let f = NamedTempFile::new().unwrap(); + bench(group, "proto_encode", None, |b: &mut Bencher| { + b.iter(|| graph.encode(f.path()).unwrap()) + }); +} + +pub fn run_proto_decode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { + let f = NamedTempFile::new().unwrap(); + graph.encode(f.path()).unwrap(); + bench(group, "proto_decode", None, |b| { + b.iter(|| Graph::decode(f.path()).unwrap()) + }) +} + pub fn bench_materialise(name: &str, c: &mut Criterion, make_graph: F) where F: Fn() -> G, From fd92d1e860c854720522a3889087490c4f6d64a0 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 12 Sep 2024 16:06:13 +0200 Subject: [PATCH 08/18] implement protobuf decoding using sharded readers --- raphtory/src/core/storage/mod.rs | 39 ++- raphtory/src/core/storage/raw_edges.rs | 41 ++- raphtory/src/core/utils/iter.rs | 2 +- .../internal/internal_addition_ops.rs | 16 +- .../storage/graph/storage_ops/additions.rs | 23 ++ raphtory/src/db/api/storage/storage.rs | 12 + raphtory/src/db/api/view/graph.rs | 5 +- .../src/db/api/view/internal/materialize.rs | 5 +- raphtory/src/search/mod.rs | 17 +- raphtory/src/serialise/serialise.rs | 285 ++++++++++++++---- 10 files changed, 359 insertions(+), 86 deletions(-) diff --git a/raphtory/src/core/storage/mod.rs b/raphtory/src/core/storage/mod.rs index 171b327c71..75b556a5af 100644 --- a/raphtory/src/core/storage/mod.rs +++ b/raphtory/src/core/storage/mod.rs @@ -308,14 +308,17 @@ pub struct WriteLockedNodes<'a> { global_len: &'a AtomicUsize, } -pub struct NodeShardWriter<'a> { - shard: &'a mut Vec, +pub struct NodeShardWriter<'a, S> { + shard: S, shard_id: usize, num_shards: usize, global_len: &'a AtomicUsize, } -impl<'a> NodeShardWriter<'a> { +impl<'a, S> NodeShardWriter<'a, S> +where + S: DerefMut>, +{ #[inline] fn resolve(&self, index: VID) -> Option { let (shard_id, offset) = resolve(index.into(), self.num_shards); @@ -327,15 +330,16 @@ impl<'a> NodeShardWriter<'a> { self.resolve(index).map(|offset| &mut self.shard[offset]) } - pub fn set(&mut self, vid: VID, gid: GidRef) { - if let Some(offset) = self.resolve(vid) { + pub fn set(&mut self, vid: VID, gid: GidRef) -> Option<&mut NodeStore> { + self.resolve(vid).map(|offset| { if offset >= self.shard.len() { self.shard.resize_with(offset + 1, NodeStore::default); self.global_len .fetch_max(vid.index() + 1, Ordering::Relaxed); } self.shard[offset] = NodeStore::resolved(gid.to_owned(), vid); - } + &mut self.shard[offset] + }) } pub fn shard_id(&self) -> usize { @@ -355,9 +359,11 @@ impl<'a> NodeShardWriter<'a> { } impl<'a> WriteLockedNodes<'a> { - pub fn par_iter_mut(&mut self) -> impl IndexedParallelIterator + '_ { + pub fn par_iter_mut( + &mut self, + ) -> impl IndexedParallelIterator>> + '_ { let num_shards = self.guards.len(); - let global_len = &self.global_len; + let global_len = self.global_len; let shards: Vec<&mut Vec> = self .guards .iter_mut() @@ -374,6 +380,23 @@ impl<'a> WriteLockedNodes<'a> { }) } + pub fn into_par_iter_mut( + self, + ) -> impl IndexedParallelIterator>>> + + 'a { + let num_shards = self.guards.len(); + let global_len = self.global_len; + self.guards + .into_par_iter() + .enumerate() + .map(move |(shard_id, shard)| NodeShardWriter { + shard, + shard_id, + num_shards, + global_len, + }) + } + pub fn resize(&mut self, new_len: usize) { self.par_iter_mut() .for_each(|mut shard| shard.resize(new_len)) diff --git a/raphtory/src/core/storage/raw_edges.rs b/raphtory/src/core/storage/raw_edges.rs index 9c461403e9..e84e767249 100644 --- a/raphtory/src/core/storage/raw_edges.rs +++ b/raphtory/src/core/storage/raw_edges.rs @@ -1,8 +1,11 @@ use super::{resolve, timeindex::TimeIndex}; use crate::{ - core::entities::{ - edges::edge_store::{EdgeDataLike, EdgeLayer, EdgeStore}, - LayerIds, + core::{ + entities::{ + edges::edge_store::{EdgeDataLike, EdgeLayer, EdgeStore}, + LayerIds, + }, + utils::errors::GraphError, }, db::api::storage::graph::edges::edge_storage_ops::{EdgeStorageOps, MemEdge}, }; @@ -377,14 +380,17 @@ impl LockedEdges { } } -pub struct EdgeShardWriter<'a> { - shard: &'a mut EdgeShard, +pub struct EdgeShardWriter<'a, S> { + shard: S, shard_id: usize, num_shards: usize, global_len: &'a AtomicUsize, } -impl<'a> EdgeShardWriter<'a> { +impl<'a, S> EdgeShardWriter<'a, S> +where + S: DerefMut, +{ /// Map an edge id to local offset if it is in the shard fn resolve(&self, eid: EID) -> Option { let EID(eid) = eid; @@ -401,7 +407,7 @@ impl<'a> EdgeShardWriter<'a> { .resize_with(offset + 1, EdgeStore::default) } Some(MutEdge { - guard: self.shard, + guard: self.shard.deref_mut(), i: offset, }) } @@ -417,7 +423,9 @@ pub struct WriteLockedEdges<'a> { } impl<'a> WriteLockedEdges<'a> { - pub fn par_iter_mut(&mut self) -> impl IndexedParallelIterator + '_ { + pub fn par_iter_mut( + &mut self, + ) -> impl IndexedParallelIterator> + '_ { let num_shards = self.shards.len(); let shards: Vec<_> = self .shards @@ -436,6 +444,23 @@ impl<'a> WriteLockedEdges<'a> { }) } + pub fn into_par_iter_mut( + self, + ) -> impl IndexedParallelIterator>> + 'a + { + let num_shards = self.shards.len(); + let global_len = self.global_len; + self.shards + .into_par_iter() + .enumerate() + .map(move |(shard_id, shard)| EdgeShardWriter { + shard, + shard_id, + num_shards, + global_len, + }) + } + pub fn num_shards(&self) -> usize { self.shards.len() } diff --git a/raphtory/src/core/utils/iter.rs b/raphtory/src/core/utils/iter.rs index c941e8d335..56faff5ef1 100644 --- a/raphtory/src/core/utils/iter.rs +++ b/raphtory/src/core/utils/iter.rs @@ -20,7 +20,7 @@ impl<'a, O, OUT> Iterator for GenLockedIter<'a, O, OUT> { impl<'a, O, OUT> GenLockedIter<'a, O, OUT> { pub fn from<'b>( owner: O, - iter_fn: impl FnOnce(&O) -> Box + Send + '_>, + iter_fn: impl FnOnce(&O) -> Box + Send + '_> + 'b, ) -> Self { GenLockedIterBuilder { owner, diff --git a/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs b/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs index 5adb417d3c..daceb2df64 100644 --- a/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs +++ b/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs @@ -1,7 +1,7 @@ use crate::{ core::{ entities::{nodes::node_ref::AsNodeRef, EID, VID}, - storage::timeindex::TimeIndexEntry, + storage::{raw_edges::WriteLockedEdges, timeindex::TimeIndexEntry, WriteLockedNodes}, utils::errors::GraphError, Prop, PropType, }, @@ -14,6 +14,10 @@ use raphtory_api::core::{entities::GidType, storage::dict_mapper::MaybeNew}; pub trait InternalAdditionOps { fn id_type(&self) -> Option; fn write_lock(&self) -> Result; + + fn write_lock_nodes(&self) -> Result; + + fn write_lock_edges(&self) -> Result; fn num_shards(&self) -> Result; /// get the sequence id for the next event fn next_event_id(&self) -> Result; @@ -114,6 +118,16 @@ impl InternalAdditionOps for G { self.graph().write_lock() } + #[inline] + fn write_lock_nodes(&self) -> Result { + self.graph().write_lock_nodes() + } + + #[inline] + fn write_lock_edges(&self) -> Result { + self.graph().write_lock_edges() + } + #[inline] fn num_shards(&self) -> Result { self.graph().num_shards() diff --git a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs index bdda43a51a..01a0f2119f 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs @@ -5,6 +5,7 @@ use crate::{ graph::tgraph::TemporalGraph, nodes::{node_ref::AsNodeRef, node_store::NodeStore}, }, + storage::{raw_edges::WriteLockedEdges, WriteLockedNodes}, utils::errors::GraphError, PropType, }, @@ -27,6 +28,14 @@ impl InternalAdditionOps for TemporalGraph { Ok(WriteLockedGraph::new(self)) } + fn write_lock_nodes(&self) -> Result { + Ok(self.storage.nodes.write_lock()) + } + + fn write_lock_edges(&self) -> Result { + Ok(self.storage.edges.write_lock()) + } + fn num_shards(&self) -> Result { Ok(self.storage.nodes.data.len()) } @@ -192,6 +201,20 @@ impl InternalAdditionOps for GraphStorage { } } + fn write_lock_nodes(&self) -> Result { + match self { + GraphStorage::Unlocked(storage) => storage.write_lock_nodes(), + _ => Err(GraphError::AttemptToMutateImmutableGraph), + } + } + + fn write_lock_edges(&self) -> Result { + match self { + GraphStorage::Unlocked(storage) => storage.write_lock_edges(), + _ => Err(GraphError::AttemptToMutateImmutableGraph), + } + } + fn num_shards(&self) -> Result { match self { GraphStorage::Unlocked(storage) => storage.num_shards(), diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index dae3fb0de0..8a7e0a6a82 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -6,6 +6,7 @@ use crate::{ graph::tgraph::TemporalGraph, nodes::node_ref::{AsNodeRef, NodeRef}, }, + storage::{raw_edges::WriteLockedEdges, WriteLockedNodes}, utils::errors::GraphError, Prop, PropType, }, @@ -89,10 +90,21 @@ impl InternalAdditionOps for Storage { self.graph.id_type() } + #[inline] fn write_lock(&self) -> Result { self.graph.write_lock() } + #[inline] + fn write_lock_nodes(&self) -> Result { + self.graph.write_lock_nodes() + } + + #[inline] + fn write_lock_edges(&self) -> Result { + self.graph.write_lock_edges() + } + #[inline] fn num_shards(&self) -> Result { self.graph.num_shards() diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index d979f512ed..8bb2ef5952 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -203,6 +203,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; let node_map_shared = atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + let t_prop_keys = self.node_meta().temporal_prop_meta().get_keys(); new_storage.nodes.par_iter_mut().try_for_each(|mut shard| { for (index, node) in self.nodes().iter().enumerate() { @@ -230,14 +231,14 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> new_node.update_time(TimeIndexEntry::start(t)); } for prop_id in node.temporal_prop_ids() { - let prop_name = self.node_meta().temporal_prop_meta().get_name(prop_id); + let prop_name = &t_prop_keys[prop_id]; let prop_type = self .node_meta() .temporal_prop_meta() .get_dtype(prop_id) .unwrap(); let new_prop_id = g - .resolve_node_property(&prop_name, prop_type, false)? + .resolve_node_property(prop_name, prop_type, false)? .inner(); for (t, prop_value) in self.temporal_node_prop_hist(node.node, prop_id) { diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index afed0292f1..00f2de981d 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -6,7 +6,10 @@ use crate::{ properties::{graph_meta::GraphMeta, props::Meta, tprop::TProp}, LayerIds, EID, ELID, GID, VID, }, - storage::{locked_view::LockedView, timeindex::TimeIndexEntry}, + storage::{ + locked_view::LockedView, raw_edges::WriteLockedEdges, timeindex::TimeIndexEntry, + WriteLockedNodes, + }, utils::errors::{GraphError, GraphError::EventGraphDeletionsNotSupported}, PropType, }, diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index ac19d6ecad..403ae2a350 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -10,7 +10,11 @@ use crate::{ nodes::node_ref::{AsNodeRef, NodeRef}, EID, ELID, VID, }, - storage::timeindex::{AsTime, TimeIndexEntry}, + storage::{ + raw_edges::WriteLockedEdges, + timeindex::{AsTime, TimeIndexEntry}, + WriteLockedNodes, + }, utils::errors::GraphError, PropType, }, @@ -764,10 +768,21 @@ impl InternalAdditionOps for Indexe self.graph.id_type() } + #[inline] fn write_lock(&self) -> Result { self.graph.write_lock() } + #[inline] + fn write_lock_nodes(&self) -> Result { + self.graph.write_lock_nodes() + } + + #[inline] + fn write_lock_edges(&self) -> Result { + self.graph.write_lock_edges() + } + #[inline] fn num_shards(&self) -> Result { self.graph.num_shards() diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 64179bd3fc..a5c5643d8b 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -2,7 +2,7 @@ use crate::{ core::{ entities::{ edges::edge_store::EdgeStore, graph::tgraph::TemporalGraph, - nodes::node_store::NodeStore, + nodes::node_store::NodeStore, LayerIds, }, storage::timeindex::TimeIndexOps, utils::errors::GraphError, @@ -39,6 +39,7 @@ use raphtory_api::core::{ arc_str::ArcStr, timeindex::{AsTime, TimeIndexEntry}, }, + Direction, }; use rayon::prelude::*; use std::{borrow::Borrow, fs::File, io::Write, iter, path::Path, sync::Arc}; @@ -138,6 +139,108 @@ fn as_prop_type(p_type: SPropType) -> PropType { } } +impl NewEdge { + fn src(&self) -> VID { + VID(self.src as usize) + } + + fn dst(&self) -> VID { + VID(self.dst as usize) + } + + fn eid(&self) -> EID { + EID(self.eid as usize) + } +} + +impl DelEdge { + fn eid(&self) -> EID { + EID(self.eid as usize) + } + + fn layer_id(&self) -> usize { + self.layer_id as usize + } + + fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } +} + +impl UpdateEdgeCProps { + fn eid(&self) -> EID { + EID(self.eid as usize) + } + + fn layer_id(&self) -> usize { + self.layer_id as usize + } + + fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateEdgeTProps { + fn eid(&self) -> EID { + EID(self.eid as usize) + } + + fn layer_id(&self) -> usize { + self.layer_id as usize + } + + fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } + + fn has_props(&self) -> bool { + !self.properties.is_empty() + } + + fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateNodeType { + fn vid(&self) -> VID { + VID(self.id as usize) + } + + fn type_id(&self) -> usize { + self.type_id as usize + } +} + +impl UpdateNodeCProps { + fn vid(&self) -> VID { + VID(self.id as usize) + } + + fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateNodeTProps { + fn vid(&self) -> VID { + VID(self.id as usize) + } + + fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } + + fn has_props(&self) -> bool { + !self.properties.is_empty() + } + + fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + impl NewMeta { fn new(new_meta: Meta) -> Self { Self { @@ -702,42 +805,125 @@ impl StableDecode for TemporalGraph { } } }); - graph.nodes.par_iter().try_for_each(|node| { - let gid = match node.gid.as_ref().unwrap() { - Gid::GidStr(name) => GidRef::Str(name), - Gid::GidU64(gid) => GidRef::U64(*gid), - }; - let vid = VID(node.vid as usize); - storage.logical_to_physical.set(gid, vid)?; - let mut node_store = NodeStore::empty(gid.to_owned()); - node_store.vid = vid; - node_store.node_type = node.type_id as usize; - storage.storage.nodes.set(node_store); - Ok::<(), GraphError>(()) - })?; - graph.edges.par_iter().for_each(|edge| { - let eid = EID(edge.eid as usize); - let src = VID(edge.src as usize); - let dst = VID(edge.dst as usize); - let mut edge = EdgeStore::new(src, dst); - edge.eid = eid; - storage.storage.edges.set(edge).init(); - }); + {} + storage + .write_lock_edges()? + .into_par_iter_mut() + .try_for_each(|mut shard| { + for edge in graph.edges.iter() { + if let Some(mut new_edge) = shard.get_mut(edge.eid()) { + let edge_store = new_edge.edge_store_mut(); + edge_store.src = edge.src(); + edge_store.dst = edge.dst(); + edge_store.eid = edge.eid(); + } + } + for update in graph.updates.iter() { + if let Some(update) = update.update.as_ref() { + match update { + Update::DelEdge(del_edge) => { + if let Some(mut edge_mut) = shard.get_mut(del_edge.eid()) { + edge_mut + .deletions_mut(del_edge.layer_id()) + .insert(del_edge.time()); + } + } + Update::UpdateEdgeCprops(update) => { + if let Some(mut edge_mut) = shard.get_mut(update.eid()) { + let edge_layer = edge_mut.layer_mut(update.layer_id()); + for prop_update in update.props() { + let (id, prop) = prop_update?; + let prop = storage.process_prop_value(&prop); + edge_layer.update_constant_prop(id, prop)?; + } + } + } + Update::UpdateEdgeTprops(update) => { + if let Some(mut edge_mut) = shard.get_mut(update.eid()) { + edge_mut + .additions_mut(update.layer_id()) + .insert(update.time()); + if update.has_props() { + let edge_layer = edge_mut.layer_mut(update.layer_id()); + for prop_update in update.props() { + let (id, prop) = prop_update?; + let prop = storage.process_prop_value(&prop); + edge_layer.add_prop(update.time(), id, prop)?; + } + } + } + } + _ => {} + } + } + } + Ok::<(), GraphError>(()) + })?; + storage + .write_lock_nodes()? + .into_par_iter_mut() + .try_for_each(|mut shard| { + for node in graph.nodes.iter() { + let vid = VID(node.vid as usize); + let gid = match node.gid.as_ref().unwrap() { + Gid::GidStr(name) => GidRef::Str(name), + Gid::GidU64(gid) => GidRef::U64(*gid), + }; + if let Some(node_store) = shard.set(vid, gid) { + storage.logical_to_physical.set(gid, vid)?; + node_store.node_type = node.type_id as usize; + } + } + let edges = storage.storage.edges.read_lock(); + for edge in edges.iter() { + if let Some(src) = shard.get_mut(edge.src()) { + for layer in edge.layer_ids_iter(&LayerIds::All) { + src.add_edge(edge.dst(), Direction::OUT, layer, edge.eid()); + } + } + if let Some(dst) = shard.get_mut(edge.dst()) { + for layer in edge.layer_ids_iter(&LayerIds::All) { + dst.add_edge(edge.src(), Direction::IN, layer, edge.eid()); + } + } + } + for update in graph.updates.iter() { + if let Some(update) = update.update.as_ref() { + match update { + Update::UpdateNodeCprops(update) => { + if let Some(node) = shard.get_mut(update.vid()) { + for prop_update in update.props() { + let (id, prop) = prop_update?; + let prop = storage.process_prop_value(&prop); + node.update_constant_prop(id, prop)?; + } + } + } + Update::UpdateNodeTprops(update) => { + if let Some(node) = shard.get_mut(update.vid()) { + node.update_time(update.time()); + for prop_update in update.props() { + let (id, prop) = prop_update?; + let prop = storage.process_prop_value(&prop); + node.add_prop(update.time(), id, prop)?; + } + } + } + Update::UpdateNodeType(update) => { + if let Some(node) = shard.get_mut(update.vid()) { + node.node_type = update.type_id(); + } + } + _ => {} + } + } + } + Ok::<(), GraphError>(()) + })?; + graph.updates.par_iter().try_for_each(|update| { if let Some(update) = update.update.as_ref() { match update { - Update::UpdateNodeCprops(props) => { - storage.internal_update_constant_node_properties( - VID(props.id as usize), - &collect_props(&props.properties)?, - )?; - } - Update::UpdateNodeTprops(props) => { - let time = TimeIndexEntry(props.time, props.secondary as usize); - let node = VID(props.id as usize); - let props = collect_props(&props.properties)?; - storage.internal_add_node(time, node, &props)?; - } Update::UpdateGraphCprops(props) => { storage.internal_update_constant_properties(&collect_props( &props.properties, @@ -748,36 +934,7 @@ impl StableDecode for TemporalGraph { storage .internal_add_properties(time, &collect_props(&props.properties)?)?; } - Update::DelEdge(del_edge) => { - let time = TimeIndexEntry(del_edge.time, del_edge.secondary as usize); - storage.internal_delete_existing_edge( - time, - EID(del_edge.eid as usize), - del_edge.layer_id as usize, - )?; - } - Update::UpdateEdgeCprops(props) => { - storage.internal_update_constant_edge_properties( - EID(props.eid as usize), - props.layer_id as usize, - &collect_props(&props.properties)?, - )?; - } - Update::UpdateEdgeTprops(props) => { - let time = TimeIndexEntry(props.time, props.secondary as usize); - let eid = EID(props.eid as usize); - storage.internal_add_edge_update( - time, - eid, - &collect_props(&props.properties)?, - props.layer_id as usize, - )?; - } - Update::UpdateNodeType(update) => { - let id = VID(update.id as usize); - let type_id = update.type_id as usize; - storage.storage.get_node_mut(id).node_type = type_id; - } + _ => {} } } Ok::<_, GraphError>(()) From 664f91f56344198e514de5acdefb0c528d716efd Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 12 Sep 2024 16:32:40 +0200 Subject: [PATCH 09/18] add encode/decode benchmark to base --- raphtory-benchmark/benches/base.rs | 7 ++++++- raphtory-benchmark/benches/proto_decode.rs | 8 +++++++- raphtory-benchmark/src/common/mod.rs | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/raphtory-benchmark/benches/base.rs b/raphtory-benchmark/benches/base.rs index 18e59716c2..4037cfb126 100644 --- a/raphtory-benchmark/benches/base.rs +++ b/raphtory-benchmark/benches/base.rs @@ -2,6 +2,7 @@ use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use raphtory::{graph_loader::lotr_graph::lotr_graph, prelude::*}; use raphtory_benchmark::common::{ bootstrap_graph, run_graph_ops_benches, run_large_ingestion_benchmarks, + run_proto_decode_benchmark, run_proto_encode_benchmark, }; pub fn base(c: &mut Criterion) { @@ -47,7 +48,11 @@ pub fn base(c: &mut Criterion) { } } - run_graph_ops_benches(c, "lotr_graph", graph, layered_graph) + run_graph_ops_benches(c, "lotr_graph", graph.clone(), layered_graph); + let mut proto_group = c.benchmark_group("lotr_graph"); + run_proto_decode_benchmark(&mut proto_group, graph.clone()); + run_proto_encode_benchmark(&mut proto_group, graph.clone()); + proto_group.finish(); } criterion_group!(benches, base); diff --git a/raphtory-benchmark/benches/proto_decode.rs b/raphtory-benchmark/benches/proto_decode.rs index d2f06b10ef..65b3ca9cff 100644 --- a/raphtory-benchmark/benches/proto_decode.rs +++ b/raphtory-benchmark/benches/proto_decode.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use raphtory::graph_loader::sx_superuser_graph::sx_superuser_graph; +use raphtory::graph_loader::{lotr_graph::lotr_graph, sx_superuser_graph::sx_superuser_graph}; use raphtory_benchmark::common::run_proto_decode_benchmark; fn bench(c: &mut Criterion) { @@ -8,6 +8,12 @@ fn bench(c: &mut Criterion) { group.sample_size(10); run_proto_decode_benchmark(&mut group, graph); group.finish(); + + let mut group = c.benchmark_group("proto_lotr"); + let graph = lotr_graph(); + group.sample_size(100); + run_proto_decode_benchmark(&mut group, graph); + group.finish(); } criterion_group!(benches, bench); diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index bb66919a98..5a21c49513 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -572,6 +572,7 @@ pub fn run_graph_ops_benches( } pub fn run_proto_encode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { + println!("graph: {graph}"); let f = NamedTempFile::new().unwrap(); bench(group, "proto_encode", None, |b: &mut Bencher| { b.iter(|| graph.encode(f.path()).unwrap()) @@ -579,6 +580,7 @@ pub fn run_proto_encode_benchmark(group: &mut BenchmarkGroup, graph: G } pub fn run_proto_decode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { + println!("graph: {graph}"); let f = NamedTempFile::new().unwrap(); graph.encode(f.path()).unwrap(); bench(group, "proto_decode", None, |b| { From 3ccc4fa742f1fbe939d6ec9b3d8f6a6b1b32ead6 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 13 Sep 2024 09:52:58 +0200 Subject: [PATCH 10/18] add missing calls to update_time --- .../api/storage/graph/storage_ops/prop_add.rs | 1 + raphtory/src/serialise/serialise.rs | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs b/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs index 9a8b6c1b09..88ed6eacd0 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs @@ -20,6 +20,7 @@ impl InternalPropertyAdditionOps for TemporalGraph { let prop = self.process_prop_value(prop); self.graph_meta.add_prop(t, *prop_id, prop)?; } + self.update_time(t); Ok(()) } diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index a5c5643d8b..f4ae984bd6 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -826,6 +826,7 @@ impl StableDecode for TemporalGraph { edge_mut .deletions_mut(del_edge.layer_id()) .insert(del_edge.time()); + storage.update_time(del_edge.time()); } } Update::UpdateEdgeCprops(update) => { @@ -851,6 +852,7 @@ impl StableDecode for TemporalGraph { edge_layer.add_prop(update.time(), id, prop)?; } } + storage.update_time(update.time()) } } _ => {} @@ -907,6 +909,7 @@ impl StableDecode for TemporalGraph { let prop = storage.process_prop_value(&prop); node.add_prop(update.time(), id, prop)?; } + storage.update_time(update.time()) } } Update::UpdateNodeType(update) => { @@ -1167,6 +1170,7 @@ mod proto_test { graph::graph::assert_graph_equal, }, prelude::*, + search::IndexedGraph, serialise::{proto::GraphType, ProtoGraph}, }; @@ -1192,6 +1196,36 @@ mod proto_test { assert_graph_equal(&g1, &g2); } + #[cfg(feature = "search")] + #[test] + fn test_node_name() { + let g = Graph::new(); + g.add_edge(1, "ben", "hamza", NO_PROPS, None).unwrap(); + g.add_edge(2, "haaroon", "hamza", NO_PROPS, None).unwrap(); + g.add_edge(3, "ben", "haaroon", NO_PROPS, None).unwrap(); + let temp_file = tempfile::NamedTempFile::new().unwrap(); + + g.encode(&temp_file).unwrap(); + let g2 = MaterializedGraph::load_cached(&temp_file).unwrap(); + assert_eq!(g2.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); + let node_names: Vec<_> = g2.nodes().iter().map(|n| n.name()).collect(); + assert_eq!(node_names, ["ben", "hamza", "haaroon"]); + let g2_m = g2.materialize().unwrap(); + assert_eq!( + g2_m.nodes().name().collect_vec(), + ["ben", "hamza", "haaroon"] + ); + let g3 = g.materialize().unwrap(); + assert_eq!(g3.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); + let node_names: Vec<_> = g3.nodes().iter().map(|n| n.name()).collect(); + assert_eq!(node_names, ["ben", "hamza", "haaroon"]); + g3.encode(&temp_file).unwrap(); + let g4 = MaterializedGraph::decode(&temp_file).unwrap(); + assert_eq!(g4.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); + let node_names: Vec<_> = g4.nodes().iter().map(|n| n.name()).collect(); + assert_eq!(node_names, ["ben", "hamza", "haaroon"]); + } + #[test] fn node_with_const_props() { let temp_file = tempfile::NamedTempFile::new().unwrap(); From f9d957ebcbb2b66413949e83403f4171c04d2bc6 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 13 Sep 2024 10:28:32 +0200 Subject: [PATCH 11/18] make assert_graph_eq check more things and fix related bugs --- raphtory-api/src/core/storage/timeindex.rs | 2 +- raphtory/src/db/graph/graph.rs | 92 +++++++++++++++++++--- raphtory/src/serialise/serialise.rs | 21 +++-- 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/raphtory-api/src/core/storage/timeindex.rs b/raphtory-api/src/core/storage/timeindex.rs index baac2ba4f2..073e07007b 100644 --- a/raphtory-api/src/core/storage/timeindex.rs +++ b/raphtory-api/src/core/storage/timeindex.rs @@ -52,7 +52,7 @@ impl TimeIndexEntry { } pub fn end(t: i64) -> Self { - Self(t.saturating_add(1), 0) + Self(t, usize::MAX) } } diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index abdf14f84b..6da5020fc7 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -89,20 +89,88 @@ pub fn assert_graph_equal< g1.count_temporal_edges(), g2.count_temporal_edges() ); + assert_eq!( + g1.earliest_time(), + g2.earliest_time(), + "mismatched earliest time: left {:?}, right {:?}", + g1.earliest_time(), + g2.earliest_time() + ); + assert_eq!( + g1.latest_time(), + g2.latest_time(), + "mismatched latest time: left {:?}, right {:?}", + g1.latest_time(), + g2.latest_time() + ); for n1 in g1.nodes() { - assert!(g2.has_node(n1.id()), "missing node {:?}", n1.id()); - - let c1 = n1.properties().constant().into_iter().count(); - let t1 = n1.properties().temporal().into_iter().count(); - let check = g2 + let n2 = g2 .node(n1.id()) - .filter(|node| { - c1 == node.properties().constant().into_iter().count() - && t1 == node.properties().temporal().into_iter().count() - }) - .is_some(); - - assert!(check, "node {:?} properties mismatch", n1.id()); + .expect(&format!("missing node {:?}", n1.id())); + assert_eq!( + n1.name(), + n2.name(), + "mismatched node name: left {:?}, right {:?}", + n1.name(), + n2.name() + ); + assert_eq!( + n1.earliest_time(), + n2.earliest_time(), + "mismatched node earliest time for node {:?}: left {:?}, right {:?}", + n1.id(), + n1.earliest_time(), + n2.earliest_time() + ); + // This doesn't hold for materialised windowed PersistentGraph (node is still present after the end of the window) + // assert_eq!( + // n1.latest_time(), + // n2.latest_time(), + // "mismatched node latest time for node {:?}: left {:?}, right {:?}", + // n1.id(), + // n1.latest_time(), + // n2.latest_time() + // ); + assert_eq!( + n1.properties().constant().as_map(), + n2.properties().constant().as_map(), + "mismatched constant properties for node {:?}: left {:?}, right {:?}", + n1.id(), + n1.properties().constant().as_map(), + n2.properties().constant().as_map() + ); + assert_eq!( + n1.properties().temporal().histories(), + n2.properties().temporal().histories(), + "mismatched temporal properties for node {:?}: left {:?}, right {:?}", + n1.id(), + n1.properties().temporal().histories(), + n2.properties().temporal().histories() + ); + assert_eq!( + n1.out_degree(), + n2.out_degree(), + "mismatched out-degree for node {:?}: left {}, right {}", + n1.id(), + n1.out_degree(), + n2.out_degree(), + ); + assert_eq!( + n1.in_degree(), + n2.in_degree(), + "mismatched in-degree for node {:?}: left {}, right {}", + n1.id(), + n1.in_degree(), + n2.in_degree(), + ); + assert_eq!( + n1.degree(), + n2.degree(), + "mismatched degree for node {:?}: left {}, right {}", + n1.id(), + n1.degree(), + n2.degree(), + ); } for e in g1.edges().explode() { diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index f4ae984bd6..8c8fd73088 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -1,9 +1,6 @@ use crate::{ core::{ - entities::{ - edges::edge_store::EdgeStore, graph::tgraph::TemporalGraph, - nodes::node_store::NodeStore, LayerIds, - }, + entities::{graph::tgraph::TemporalGraph, LayerIds}, storage::timeindex::TimeIndexOps, utils::errors::GraphError, DocumentInput, Lifespan, Prop, PropType, @@ -881,11 +878,23 @@ impl StableDecode for TemporalGraph { if let Some(src) = shard.get_mut(edge.src()) { for layer in edge.layer_ids_iter(&LayerIds::All) { src.add_edge(edge.dst(), Direction::OUT, layer, edge.eid()); + for t in edge.additions(layer).iter() { + src.update_time(t); + } + for t in edge.deletions(layer).iter() { + src.update_time(t) + } } } if let Some(dst) = shard.get_mut(edge.dst()) { for layer in edge.layer_ids_iter(&LayerIds::All) { dst.add_edge(edge.src(), Direction::IN, layer, edge.eid()); + for t in edge.additions(layer).iter() { + dst.update_time(t); + } + for t in edge.deletions(layer).iter() { + dst.update_time(t) + } } } } @@ -1160,8 +1169,6 @@ fn as_proto_prop(prop: &Prop) -> proto::Prop { #[cfg(test)] mod proto_test { - use chrono::{DateTime, NaiveDateTime}; - use super::*; use crate::{ core::DocumentInput, @@ -1170,9 +1177,9 @@ mod proto_test { graph::graph::assert_graph_equal, }, prelude::*, - search::IndexedGraph, serialise::{proto::GraphType, ProtoGraph}, }; + use chrono::{DateTime, NaiveDateTime}; #[test] fn node_no_props() { From 0bd049e533a832d574f68a46dbb840f8d44aa6f4 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 13 Sep 2024 11:29:03 +0200 Subject: [PATCH 12/18] make sure we don't mess with the earliest time of the graph when reserving the node ids --- raphtory/src/disk_graph/graph_impl/mod.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/raphtory/src/disk_graph/graph_impl/mod.rs b/raphtory/src/disk_graph/graph_impl/mod.rs index eb9f80b7ca..2bcaef235d 100644 --- a/raphtory/src/disk_graph/graph_impl/mod.rs +++ b/raphtory/src/disk_graph/graph_impl/mod.rs @@ -624,7 +624,7 @@ mod storage_tests { use crate::{ core::Prop, - db::graph::graph::assert_graph_equal, + db::{api::mutation::internal::InternalAdditionOps, graph::graph::assert_graph_equal}, prelude::{AdditionOps, Graph, GraphViewOps, NodeViewOps, NO_PROPS, *}, }; @@ -736,7 +736,7 @@ mod storage_tests { .flat_map(|(_, src, dst)| [*src, *dst]) .collect(); for n in nodes { - g.add_node(0, n, NO_PROPS, None).unwrap(); + g.resolve_node(n).unwrap(); } for (t, src, dst) in edges { g.add_edge(*t, *src, *dst, NO_PROPS, None).unwrap(); @@ -772,6 +772,10 @@ mod storage_tests { }) } + #[test] + fn test_one_empty_graph_non_zero_time() { + inner_merge_test(&[], &[(1, 0, 0)]) + } #[test] fn test_empty_graphs() { inner_merge_test(&[], &[]) From dbf6e087eb3ab5addc23972c0233e303c6f6b3bf Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 13 Sep 2024 11:41:33 +0200 Subject: [PATCH 13/18] fix warnings --- raphtory/src/core/storage/raw_edges.rs | 21 +++------------------ raphtory/src/db/api/view/graph.rs | 8 ++------ raphtory/src/serialise/serialise.rs | 8 +------- raphtory/src/vectors/mod.rs | 2 +- 4 files changed, 7 insertions(+), 32 deletions(-) diff --git a/raphtory/src/core/storage/raw_edges.rs b/raphtory/src/core/storage/raw_edges.rs index e84e767249..b65d804546 100644 --- a/raphtory/src/core/storage/raw_edges.rs +++ b/raphtory/src/core/storage/raw_edges.rs @@ -1,11 +1,8 @@ use super::{resolve, timeindex::TimeIndex}; use crate::{ - core::{ - entities::{ - edges::edge_store::{EdgeDataLike, EdgeLayer, EdgeStore}, - LayerIds, - }, - utils::errors::GraphError, + core::entities::{ + edges::edge_store::{EdgeDataLike, EdgeLayer, EdgeStore}, + LayerIds, }, db::api::storage::graph::edges::edge_storage_ops::{EdgeStorageOps, MemEdge}, }; @@ -174,18 +171,6 @@ impl EdgesStorage { } } - pub(crate) fn set(&self, value: EdgeStore) -> UninitialisedEdge { - let EID(index) = value.eid; - self.len.fetch_max(index + 1, atomic::Ordering::Relaxed); - let (bucket, offset) = self.resolve(index); - let guard = self.shards[bucket].write(); - UninitialisedEdge { - guard, - offset, - value, - } - } - pub fn get_edge_mut(&self, eid: EID) -> EdgeWGuard { let (bucket, offset) = self.resolve(eid.into()); EdgeWGuard { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 8bb2ef5952..60074d2b0b 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -6,7 +6,7 @@ use crate::{ }, db::{ api::{ - mutation::{internal::InternalAdditionOps, AdditionOps, PropertyAdditionOps}, + mutation::{internal::InternalAdditionOps, PropertyAdditionOps}, properties::{internal::TemporalPropertiesOps, Properties}, storage::graph::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, @@ -23,7 +23,6 @@ use crate::{ }, }, }, - prelude::DeletionOps, }; use chrono::{DateTime, Utc}; use itertools::Itertools; @@ -31,10 +30,7 @@ use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, core::{ entities::EID, - storage::{ - arc_str::{ArcStr, OptionAsStr}, - timeindex::TimeIndexEntry, - }, + storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, Direction, }, }; diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 8c8fd73088..7d89efd688 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -7,9 +7,7 @@ use crate::{ }, db::{ api::{ - mutation::internal::{ - InternalAdditionOps, InternalDeletionOps, InternalPropertyAdditionOps, - }, + mutation::internal::{InternalAdditionOps, InternalPropertyAdditionOps}, storage::graph::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, storage_ops::GraphStorage, tprop_storage_ops::TPropOps, @@ -229,10 +227,6 @@ impl UpdateNodeTProps { TimeIndexEntry(self.time, self.secondary as usize) } - fn has_props(&self) -> bool { - !self.properties.is_empty() - } - fn props(&self) -> impl Iterator> + '_ { self.properties.iter().map(as_prop) } diff --git a/raphtory/src/vectors/mod.rs b/raphtory/src/vectors/mod.rs index 8b46c1c438..e12c4ece74 100644 --- a/raphtory/src/vectors/mod.rs +++ b/raphtory/src/vectors/mod.rs @@ -107,7 +107,7 @@ mod vector_tests { use super::*; use crate::{ core::Prop, - prelude::{AdditionOps, EdgeViewOps, Graph, GraphViewOps, NodeViewOps}, + prelude::{AdditionOps, Graph, GraphViewOps}, vectors::{embeddings::openai_embedding, vectorisable::Vectorisable}, }; use dotenv::dotenv; From 6e7c0c457a3a9d74a4c5b5504d41f9c48979c408 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 16 Sep 2024 15:58:17 +0200 Subject: [PATCH 14/18] improve materialise implementation and add more tests --- .../src/core/entities/edges/edge_ref.rs | 4 +- raphtory-api/src/core/entities/mod.rs | 9 +- raphtory-api/src/core/storage/dict_mapper.rs | 8 + raphtory-api/src/core/storage/timeindex.rs | 2 +- raphtory/src/core/entities/graph/tgraph.rs | 10 +- raphtory/src/core/entities/mod.rs | 2 +- .../core/entities/properties/graph_meta.rs | 9 + .../src/core/entities/properties/props.rs | 13 ++ raphtory/src/db/api/mutation/import_ops.rs | 2 +- .../internal/internal_addition_ops.rs | 8 + .../src/db/api/properties/temporal_props.rs | 6 + .../storage/graph/storage_ops/additions.rs | 15 +- .../api/storage/graph/storage_ops/prop_add.rs | 27 +++ .../graph/storage_ops/time_semantics.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 4 + raphtory/src/db/api/view/edge.rs | 7 +- raphtory/src/db/api/view/graph.rs | 164 ++++++++++-------- raphtory/src/db/graph/edge.rs | 4 +- raphtory/src/db/graph/graph.rs | 116 +++++++++---- raphtory/src/db/graph/views/deletion_graph.rs | 6 +- raphtory/src/io/arrow/df_loaders.rs | 17 +- raphtory/src/lib.rs | 37 +++- raphtory/src/search/mod.rs | 5 + 23 files changed, 337 insertions(+), 140 deletions(-) diff --git a/raphtory-api/src/core/entities/edges/edge_ref.rs b/raphtory-api/src/core/entities/edges/edge_ref.rs index 76e56bbcf1..d7984fd20a 100644 --- a/raphtory-api/src/core/entities/edges/edge_ref.rs +++ b/raphtory-api/src/core/entities/edges/edge_ref.rs @@ -76,8 +76,8 @@ impl EdgeRef { } #[inline(always)] - pub fn layer(&self) -> Option<&usize> { - self.layer_id.as_ref() + pub fn layer(&self) -> Option { + self.layer_id } #[inline(always)] diff --git a/raphtory-api/src/core/entities/mod.rs b/raphtory-api/src/core/entities/mod.rs index 476f51b330..ed58cf9099 100644 --- a/raphtory-api/src/core/entities/mod.rs +++ b/raphtory-api/src/core/entities/mod.rs @@ -100,7 +100,7 @@ impl From for ELID { fn from(value: EdgeRef) -> Self { ELID { edge: value.pid(), - layer: value.layer().copied(), + layer: value.layer(), } } } @@ -186,6 +186,13 @@ impl GID { GID::Str(v) => parse_u64_strict(v), } } + + pub fn as_ref(&self) -> GidRef { + match self { + GID::U64(v) => GidRef::U64(*v), + GID::Str(v) => GidRef::Str(v), + } + } } impl From for GID { diff --git a/raphtory-api/src/core/storage/dict_mapper.rs b/raphtory-api/src/core/storage/dict_mapper.rs index 58619246cc..a1af880ecf 100644 --- a/raphtory-api/src/core/storage/dict_mapper.rs +++ b/raphtory-api/src/core/storage/dict_mapper.rs @@ -87,6 +87,14 @@ impl BorrowMut for MaybeNew { } impl DictMapper { + pub fn deep_clone(&self) -> Self { + let reverse_map = self.reverse_map.read().clone(); + + Self { + map: self.map.clone(), + reverse_map: Arc::new(RwLock::new(reverse_map)), + } + } pub fn get_or_create_id(&self, name: &Q) -> MaybeNew where Q: Hash + Eq + ?Sized + ToOwned + Borrow, diff --git a/raphtory-api/src/core/storage/timeindex.rs b/raphtory-api/src/core/storage/timeindex.rs index 073e07007b..71bf054f1b 100644 --- a/raphtory-api/src/core/storage/timeindex.rs +++ b/raphtory-api/src/core/storage/timeindex.rs @@ -3,7 +3,7 @@ use std::{fmt, ops::Range}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; -#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Ord, PartialOrd, Eq)] +#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Ord, PartialOrd, Eq, Hash)] pub struct TimeIndexEntry(pub i64, pub usize); pub trait AsTime: fmt::Debug + Copy + Ord + Eq + Send + Sync + 'static { diff --git a/raphtory/src/core/entities/graph/tgraph.rs b/raphtory/src/core/entities/graph/tgraph.rs index c9cd67f1ab..a0353f8f1c 100644 --- a/raphtory/src/core/entities/graph/tgraph.rs +++ b/raphtory/src/core/entities/graph/tgraph.rs @@ -58,10 +58,10 @@ pub struct TemporalGraph { pub(in crate::core) latest_time: MaxCounter, // props meta data for nodes (mapping between strings and ids) - pub(crate) node_meta: Arc, + pub(crate) node_meta: Meta, // props meta data for edges (mapping between strings and ids) - pub(crate) edge_meta: Arc, + pub(crate) edge_meta: Meta, // graph properties pub(crate) graph_meta: GraphMeta, @@ -93,8 +93,8 @@ impl TemporalGraph { event_counter: AtomicUsize::new(0), earliest_time: MinCounter::new(), latest_time: MaxCounter::new(), - node_meta: Arc::new(Meta::new()), - edge_meta: Arc::new(Meta::new()), + node_meta: Meta::new(), + edge_meta: Meta::new(), graph_meta: GraphMeta::new(), } } @@ -106,7 +106,7 @@ impl TemporalGraph { } } - fn get_valid_layers(edge_meta: &Arc) -> Vec { + fn get_valid_layers(edge_meta: &Meta) -> Vec { edge_meta .layer_meta() .get_keys() diff --git a/raphtory/src/core/entities/mod.rs b/raphtory/src/core/entities/mod.rs index 0f9e73d165..6cc3d5a655 100644 --- a/raphtory/src/core/entities/mod.rs +++ b/raphtory/src/core/entities/mod.rs @@ -107,7 +107,7 @@ impl LayerIds { pub fn constrain_from_edge(&self, e: EdgeRef) -> LayerIds { match e.layer() { None => self.clone(), - Some(l) => self.find(*l).map(LayerIds::One).unwrap_or(LayerIds::None), + Some(l) => self.find(l).map(LayerIds::One).unwrap_or(LayerIds::None), } } diff --git a/raphtory/src/core/entities/properties/graph_meta.rs b/raphtory/src/core/entities/properties/graph_meta.rs index 5f637f521a..f058022ad5 100644 --- a/raphtory/src/core/entities/properties/graph_meta.rs +++ b/raphtory/src/core/entities/properties/graph_meta.rs @@ -31,6 +31,15 @@ impl GraphMeta { } } + pub fn deep_clone(&self) -> Self { + Self { + constant_mapper: self.constant_mapper.deep_clone(), + temporal_mapper: self.temporal_mapper.deep_clone(), + constant: self.constant.clone(), + temporal: self.temporal.clone(), + } + } + #[inline] pub fn const_prop_meta(&self) -> &DictMapper { &self.constant_mapper diff --git a/raphtory/src/core/entities/properties/props.rs b/raphtory/src/core/entities/properties/props.rs index aa694c85cf..11a605b5ba 100644 --- a/raphtory/src/core/entities/properties/props.rs +++ b/raphtory/src/core/entities/properties/props.rs @@ -120,6 +120,12 @@ impl Default for Meta { } impl Meta { + pub fn set_const_prop_meta(&mut self, meta: PropMapper) { + self.meta_prop_constant = meta; + } + pub fn set_temporal_prop_meta(&mut self, meta: PropMapper) { + self.meta_prop_temporal = meta; + } pub fn const_prop_meta(&self) -> &PropMapper { &self.meta_prop_constant } @@ -262,6 +268,13 @@ impl Deref for PropMapper { } impl PropMapper { + pub fn deep_clone(&self) -> Self { + let dtypes = self.dtypes.read().clone(); + Self { + id_mapper: self.id_mapper.deep_clone(), + dtypes: Arc::new(RwLock::new(dtypes)), + } + } pub(crate) fn get_or_create_and_validate( &self, prop: &str, diff --git a/raphtory/src/db/api/mutation/import_ops.rs b/raphtory/src/db/api/mutation/import_ops.rs index 82a4f5f861..6d6f2a9430 100644 --- a/raphtory/src/db/api/mutation/import_ops.rs +++ b/raphtory/src/db/api/mutation/import_ops.rs @@ -192,7 +192,7 @@ impl< // Add edges first so we definitely have all associated nodes (important in case of persistent edges) // FIXME: this needs to be verified for ee in edge.explode_layers() { - let layer_id = *ee.edge.layer().expect("exploded layers"); + let layer_id = ee.edge.layer().expect("exploded layers"); let layer_ids = LayerIds::One(layer_id); let layer_name = self.get_layer_name(layer_id); let layer_name: Option<&str> = if layer_id == 0 { diff --git a/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs b/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs index daceb2df64..dc161eaaa4 100644 --- a/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs +++ b/raphtory/src/db/api/mutation/internal/internal_addition_ops.rs @@ -22,6 +22,9 @@ pub trait InternalAdditionOps { /// get the sequence id for the next event fn next_event_id(&self) -> Result; + /// get the current sequence id without incrementing the counter + fn read_event_id(&self) -> usize; + fn reserve_event_ids(&self, num_ids: usize) -> Result; /// map layer name to id and allocate a new layer if needed @@ -138,6 +141,11 @@ impl InternalAdditionOps for G { self.graph().next_event_id() } + #[inline] + fn read_event_id(&self) -> usize { + self.graph().read_event_id() + } + #[inline] fn reserve_event_ids(&self, num_ids: usize) -> Result { self.graph().reserve_event_ids(num_ids) diff --git a/raphtory/src/db/api/properties/temporal_props.rs b/raphtory/src/db/api/properties/temporal_props.rs index bf20d974a0..ff3b981731 100644 --- a/raphtory/src/db/api/properties/temporal_props.rs +++ b/raphtory/src/db/api/properties/temporal_props.rs @@ -174,6 +174,12 @@ impl TemporalProperties

{ .flat_map(|(k, v)| v.latest().map(|v| (k.clone(), v))) .collect() } + + pub fn as_map(&self) -> HashMap> { + self.iter() + .map(|(key, value)| (key, value.histories().collect())) + .collect() + } } impl PropUnwrap for TemporalPropertyView

{ diff --git a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs index 01a0f2119f..8ea394c11b 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs @@ -10,7 +10,7 @@ use crate::{ PropType, }, db::api::{mutation::internal::InternalAdditionOps, storage::graph::locked::WriteLockedGraph}, - prelude::Prop, + prelude::{GraphViewOps, Prop}, }; use either::Either; use raphtory_api::core::{ @@ -44,6 +44,10 @@ impl InternalAdditionOps for TemporalGraph { Ok(self.event_counter.fetch_add(1, Ordering::Relaxed)) } + fn read_event_id(&self) -> usize { + self.event_counter.load(Ordering::Relaxed) + } + fn reserve_event_ids(&self, num_ids: usize) -> Result { Ok(self.event_counter.fetch_add(num_ids, Ordering::Relaxed)) } @@ -229,6 +233,15 @@ impl InternalAdditionOps for GraphStorage { } } + fn read_event_id(&self) -> usize { + match self { + GraphStorage::Mem(storage) => storage.graph.read_event_id(), + GraphStorage::Unlocked(storage) => storage.read_event_id(), + #[cfg(feature = "storage")] + GraphStorage::Disk(storage) => storage.inner.count_temporal_edges(), + } + } + fn reserve_event_ids(&self, num_ids: usize) -> Result { match self { GraphStorage::Unlocked(storage) => storage.reserve_event_ids(num_ids), diff --git a/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs b/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs index 88ed6eacd0..b9efd6f77b 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/prop_add.rs @@ -205,3 +205,30 @@ impl InternalPropertyAdditionOps for GraphStorage { } } } + +#[cfg(test)] +mod test { + use crate::{prelude::*, test_storage}; + use itertools::Itertools; + + #[test] + fn test_graph_temporal_prop_updates_time() { + let graph = Graph::new(); + graph.add_properties(1, [("test", "test")]).unwrap(); + graph.add_properties(2, [("test", "test2")]).unwrap(); + test_storage!(&graph, |graph| { + assert_eq!(graph.earliest_time(), Some(1)); + assert_eq!(graph.latest_time(), Some(2)); + assert_eq!( + graph + .properties() + .temporal() + .get("test") + .unwrap() + .iter() + .collect_vec(), + [(1, Prop::str("test")), (2, Prop::str("test2"))] + ); + }); + } +} diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 216665e409..f21e0d1346 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -180,7 +180,7 @@ impl TimeSemantics for GraphStorage { entry .clone() .into_layers(layer_ids.clone(), e) - .filter(move |e| entry.additions(*e.layer().unwrap()).active_t(w.clone())), + .filter(move |e| entry.additions(e.layer().unwrap()).active_t(w.clone())), ) } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 8a7e0a6a82..fd6c4c8a19 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -115,6 +115,10 @@ impl InternalAdditionOps for Storage { self.graph.next_event_id() } + fn read_event_id(&self) -> usize { + self.graph.read_event_id() + } + #[inline] fn reserve_event_ids(&self, num_ids: usize) -> Result { self.graph.reserve_event_ids(num_ids) diff --git a/raphtory/src/db/api/view/edge.rs b/raphtory/src/db/api/view/edge.rs index 78fcd38bdc..71f098875c 100644 --- a/raphtory/src/db/api/view/edge.rs +++ b/raphtory/src/db/api/view/edge.rs @@ -274,7 +274,7 @@ impl<'graph, E: BaseEdgeViewOps<'graph>> EdgeViewOps<'graph> for E { fn layer_name(&self) -> Self::ValueType> { self.map(|g, e| { e.layer() - .map(|l_id| g.get_layer_name(*l_id)) + .map(|l_id| g.get_layer_name(l_id)) .ok_or_else(|| GraphError::LayerNameAPIError) }) } @@ -290,7 +290,7 @@ impl<'graph, E: BaseEdgeViewOps<'graph>> EdgeViewOps<'graph> for E { let layer_names = g.edge_meta().layer_meta().get_keys(); g.edge_layers(e, &g.layer_ids().constrain_from_edge(e)) .map(move |ee| { - layer_names[*ee.layer().expect("exploded edge should have layer")].clone() + layer_names[ee.layer().expect("exploded edge should have layer")].clone() }) .into_dyn_boxed() }) @@ -434,8 +434,7 @@ mod test_edge_view { graph.add_edge(0, 1, 2, [("second", true)], None).unwrap(); graph.add_edge(0, 2, 3, [("second", true)], None).unwrap(); - // FIXME: boolean properties not supported yet (Issue #48) - test_graph(&graph, |graph| { + test_storage!(&graph, |graph| { let mut exploded_edges: Vec<_> = graph.edges().explode().into_iter().collect(); exploded_edges.sort_by_key(|a| a.time_and_index()); diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 60074d2b0b..e46c5868be 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -7,7 +7,10 @@ use crate::{ db::{ api::{ mutation::{internal::InternalAdditionOps, PropertyAdditionOps}, - properties::{internal::TemporalPropertiesOps, Properties}, + properties::{ + internal::{ConstPropertiesOps, TemporalPropertiesOps}, + Properties, + }, storage::graph::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, }, @@ -144,7 +147,21 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> fn materialize(&self) -> Result { let storage = self.core_graph().lock(); - let g = TemporalGraph::default(); + let mut g = TemporalGraph::default(); + + // Copy all graph properties + g.graph_meta = self.graph_meta().deep_clone(); + + // preserve all property mappings + g.node_meta + .set_const_prop_meta(self.node_meta().const_prop_meta().deep_clone()); + g.node_meta + .set_temporal_prop_meta(self.node_meta().temporal_prop_meta().deep_clone()); + g.edge_meta + .set_const_prop_meta(self.edge_meta().const_prop_meta().deep_clone()); + g.edge_meta + .set_temporal_prop_meta(self.edge_meta().temporal_prop_meta().deep_clone()); + if let Some(earliest) = self.earliest_time() { g.update_time(TimeIndexEntry::start(earliest)); } else { @@ -190,6 +207,9 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> layer_map } }; + // Set event counter to be the same as old graph to avoid any possibility for duplicate event ids + g.event_counter + .fetch_max(storage.read_event_id(), Ordering::Relaxed); { // scope for the write lock @@ -199,13 +219,12 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; let node_map_shared = atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); - let t_prop_keys = self.node_meta().temporal_prop_meta().get_keys(); new_storage.nodes.par_iter_mut().try_for_each(|mut shard| { for (index, node) in self.nodes().iter().enumerate() { let new_id = VID(index); let gid = node.id(); - if let Some(new_node) = shard.get_mut(new_id) { + if let Some(new_node) = shard.set(new_id, gid.as_ref()) { node_map_shared[node.node.index()].store(index, Ordering::Relaxed); if let Some(node_type) = node.node_type() { let new_type_id = g @@ -215,9 +234,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> .inner(); new_node.node_type = new_type_id; } - new_node.vid = new_id; - g.logical_to_physical.set((&gid).into(), new_id)?; - new_node.global_id = gid; + g.logical_to_physical.set(gid.as_ref(), new_id)?; if let Some(earliest) = node.earliest_time() { // explicitly add node earliest_time to handle PersistentGraph @@ -226,26 +243,17 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> for t in node.history() { new_node.update_time(TimeIndexEntry::start(t)); } - for prop_id in node.temporal_prop_ids() { - let prop_name = &t_prop_keys[prop_id]; - let prop_type = self - .node_meta() - .temporal_prop_meta() - .get_dtype(prop_id) - .unwrap(); - let new_prop_id = g - .resolve_node_property(prop_name, prop_type, false)? - .inner(); - for (t, prop_value) in self.temporal_node_prop_hist(node.node, prop_id) + for t_prop_id in node.temporal_prop_ids() { + for (t, prop_value) in + self.temporal_node_prop_hist(node.node, t_prop_id) { - new_node.add_prop(t, new_prop_id, prop_value)?; + new_node.add_prop(t, t_prop_id, prop_value)?; } } - for (c_prop_name, prop_value) in node.properties().constant().iter() { - let prop_id = g - .resolve_node_property(&c_prop_name, prop_value.dtype(), true)? - .inner(); - new_node.add_constant_prop(prop_id, prop_value)?; + for c_prop_id in node.const_prop_ids() { + if let Some(prop_value) = node.get_const_prop(c_prop_id) { + new_node.add_constant_prop(c_prop_id, prop_value)?; + } } } } @@ -259,51 +267,31 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> edge_store.src = node_map[edge.edge.src().index()]; edge_store.dst = node_map[edge.edge.dst().index()]; edge_store.eid = EID(eid); - for e in edge.explode() { - let t = e.edge.time().unwrap(); - let layer = layer_map[*e.edge.layer().unwrap()]; - let edge_additions = new_edge.additions_mut(layer); - edge_additions.insert(e.edge.time().unwrap()); - let t_props = e.properties().temporal(); - let mut props_iter = t_props.iter_latest().peekable(); - if props_iter.peek().is_some() { - let edge_layer = new_edge.layer_mut(layer); - for (prop_name, prop_value) in props_iter { - let prop_id = g - .resolve_edge_property( - &prop_name, - prop_value.dtype(), - false, - )? - .inner(); - edge_layer.add_prop(t, prop_id, prop_value)?; + for edge in edge.explode_layers() { + let old_layer = LayerIds::All.constrain_from_edge(edge.edge); + let layer = layer_map[edge.edge.layer().unwrap()]; + let additions = new_edge.additions_mut(layer); + for edge in edge.explode() { + let t = edge.edge.time().unwrap(); + additions.insert(t); + } + for t_prop in edge.temporal_prop_ids() { + for (t, prop_value) in + self.temporal_edge_prop_hist(edge.edge, t_prop, &old_layer) + { + new_edge.layer_mut(layer).add_prop(t, t_prop, prop_value)?; } } - } - for e in edge.explode_layers() { - let layer = layer_map[*e.edge.layer().unwrap()]; - let c_props = e.properties().constant(); - let mut props_iter = c_props.iter().peekable(); - if props_iter.peek().is_some() { - let edge_layer = new_edge.layer_mut(layer); - for (prop_name, prop_value) in props_iter { - let prop_id = g - .resolve_edge_property( - &prop_name, - prop_value.dtype(), - true, - )? - .inner(); - edge_layer.add_constant_prop(prop_id, prop_value)?; + for c_prop in edge.const_prop_ids() { + if let Some(prop_value) = edge.get_const_prop(c_prop) { + new_edge + .layer_mut(layer) + .add_constant_prop(c_prop, prop_value)?; } } - } - if self.include_deletions() { - for e in edge.explode_layers() { - let layer = *e.edge.layer().unwrap(); - let layer_ids = LayerIds::One(layer); + if self.include_deletions() { let mut deletion_history = - self.edge_deletion_history(edge.edge, &layer_ids).peekable(); + self.edge_deletion_history(edge.edge, &old_layer).peekable(); if deletion_history.peek().is_some() { let edge_deletions = new_edge.deletions_mut(layer_map[layer]); for t in deletion_history { @@ -324,7 +312,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> src_node.add_edge( node_map[edge.edge.dst().index()], Direction::OUT, - *ee.edge.layer().unwrap(), + ee.edge.layer().unwrap(), EID(eid), ); } @@ -334,7 +322,7 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> dst_node.add_edge( node_map[edge.edge.src().index()], Direction::IN, - *ee.edge.layer().unwrap(), + ee.edge.layer().unwrap(), EID(eid), ); } @@ -343,8 +331,6 @@ impl<'graph, G: BoxableGraphView + Sized + Clone + 'graph> GraphViewOps<'graph> Ok::<(), GraphError>(()) })?; - - g.add_constant_properties(self.properties().constant())?; } Ok(self.new_base_graph(g.into())) @@ -635,8 +621,15 @@ mod test_exploded_edges { #[cfg(test)] mod test_materialize { - use crate::{db::api::view::internal::CoreGraphOps, prelude::*, test_storage}; + use crate::{ + db::{api::view::internal::CoreGraphOps, graph::graph::assert_graph_equal}, + prelude::*, + test_storage, + test_utils::{build_edge_list, build_graph_from_edge_list}, + }; + use proptest::{arbitrary::any, proptest}; use raphtory_api::core::storage::arc_str::OptionAsStr; + use std::ops::Range; #[test] fn test_materialize() { @@ -645,6 +638,8 @@ mod test_materialize { g.add_edge(0, 1, 2, [("layer2", "2")], Some("2")).unwrap(); let gm = g.materialize().unwrap(); + + assert_graph_equal(&g, &gm); assert_eq!( gm.nodes().name().values().collect::>(), vec!["1", "2"] @@ -670,6 +665,33 @@ mod test_materialize { .contains("layer1")); } + #[test] + fn test_graph_properties() { + let g = Graph::new(); + g.add_properties(1, [("test", "test")]).unwrap(); + g.add_constant_properties([("test_constant", "test2")]) + .unwrap(); + + test_storage!(&g, |g| { + let gm = g.materialize().unwrap(); + assert_graph_equal(&g, &gm); + }); + } + + #[test] + fn materialize_prop_test() { + proptest!(|(edges in build_edge_list(100, 100), w in any::>())| { + let g = build_graph_from_edge_list(&edges); + test_storage!(&g, |g| { + let gm = g.materialize().unwrap(); + assert_graph_equal(&g, &gm); + let gw = g.window(w.start, w.end); + let gmw = gw.materialize().unwrap(); + assert_graph_equal(&gw, &gmw); + }); + }) + } + #[test] fn test_subgraph() { let g = Graph::new(); @@ -688,6 +710,8 @@ mod test_materialize { .collect::>(), vec!["4", "5"] ); + let gm = nodes_subgraph.materialize().unwrap(); + assert_graph_equal(&nodes_subgraph, &gm); } #[test] @@ -708,6 +732,8 @@ mod test_materialize { .collect::>(), vec!["1", "2", "3"] ); + let gm = exclude_nodes_subgraph.materialize().unwrap(); + assert_graph_equal(&exclude_nodes_subgraph, &gm); } #[test] diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index 5a9d94b886..b219e22168 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -201,7 +201,7 @@ impl Some(l_id) => self .graph .get_layer_id(name) - .filter(|id| id == l_id) + .filter(|&id| id == l_id) .ok_or_else(|| { GraphError::invalid_layer( name.to_owned(), @@ -221,7 +221,7 @@ impl } } }, - None => Ok(self.edge.layer().copied().unwrap_or(0)), + None => Ok(self.edge.layer().unwrap_or(0)), } } diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 6da5020fc7..2e0a4ea439 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -29,6 +29,7 @@ use core::panic; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::{ + collections::HashSet, fmt::{Display, Formatter}, sync::Arc, }; @@ -103,6 +104,21 @@ pub fn assert_graph_equal< g1.latest_time(), g2.latest_time() ); + assert_eq!( + g1.properties().constant().as_map(), + g2.properties().constant().as_map(), + "mismatched graph constant properties: left {:?}, right {:?}", + g1.properties().constant().as_map(), + g2.properties().constant().as_map() + ); + assert_eq!( + g1.properties().temporal().as_map(), + g2.properties().temporal().as_map(), + "mismatched graph temporal properties: left {:?}, right {:?}", + g1.properties().temporal().as_map(), + g2.properties().temporal().as_map() + ); + for n1 in g1.nodes() { let n2 = g2 .node(n1.id()) @@ -140,12 +156,12 @@ pub fn assert_graph_equal< n2.properties().constant().as_map() ); assert_eq!( - n1.properties().temporal().histories(), - n2.properties().temporal().histories(), + n1.properties().temporal().as_map(), + n2.properties().temporal().as_map(), "mismatched temporal properties for node {:?}: left {:?}, right {:?}", n1.id(), - n1.properties().temporal().histories(), - n2.properties().temporal().histories() + n1.properties().temporal().as_map(), + n2.properties().temporal().as_map() ); assert_eq!( n1.out_degree(), @@ -171,32 +187,73 @@ pub fn assert_graph_equal< n1.degree(), n2.degree(), ); + assert_eq!( + n1.out_neighbours().id().collect::>(), + n2.out_neighbours().id().collect::>(), + "mismatched out-neighbours for node {:?}: left {:?}, right {:?}", + n1.id(), + n1.out_neighbours().id().collect::>(), + n2.out_neighbours().id().collect::>() + ); + assert_eq!( + n1.in_neighbours().id().collect::>(), + n2.in_neighbours().id().collect::>(), + "mismatched in-neighbours for node {:?}: left {:?}, right {:?}", + n1.id(), + n1.in_neighbours().id().collect::>(), + n2.in_neighbours().id().collect::>() + ) } - for e in g1.edges().explode() { - // all exploded edges exist in other + for e1 in g1.edges() { let e2 = g2 - .edge(e.src().id(), e.dst().id()) - .unwrap_or_else(|| panic!("missing edge {:?}", e.id())); - assert!( - e2.active(e.time().unwrap()), - "exploded edge {:?} not active as expected at time {}", - e2.id(), - e.time().unwrap() - ); - - let c1 = e.properties().constant().into_iter().count(); - let t1 = e.properties().temporal().into_iter().count(); - let check = g2 - .edge(e.src().id(), e.dst().id()) - .filter(|ee| { - ee.active(e.time().expect("exploded")) - && c1 == e.properties().constant().into_iter().count() - && t1 == e.properties().temporal().into_iter().count() - }) - .is_some(); + .edge(e1.src().id(), e1.dst().id()) + .unwrap_or_else(|| panic!("missing edge {:?}", e1.id())); + assert_eq!( + e1.earliest_time(), + e2.earliest_time(), + "mismatched earliest time for edge {:?}: left {:?}, right {:?}", + e1.id(), + e1.earliest_time(), + e2.earliest_time() + ); + assert_eq!( + e1.properties().constant().as_map(), + e2.properties().constant().as_map(), + "mismatched constant properties for edge {:?}: left {:?}, right {:?}", + e1.id(), + e1.properties().constant().as_map(), + e2.properties().constant().as_map() + ); + assert_eq!( + e1.properties().temporal().as_map(), + e2.properties().temporal().as_map(), + "mismatched temporal properties for edge {:?}: left {:?}, right {:?}", + e1.id(), + e1.properties().temporal().as_map(), + e2.properties().temporal().as_map(), + ); - assert!(check, "edge {:?} properties mismatch", e.id()); + assert_eq!( + e1.explode() + .iter() + .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) + .collect::>(), + e2.explode() + .iter() + .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) + .collect::>(), + "mismatched updates for edge {:?}: left {:?}, right {:?}", + e1.id(), + e1.explode() + .iter() + .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) + .collect::>(), + e2.explode() + .iter() + .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) + .collect::>(), + ); } } @@ -1917,7 +1974,7 @@ mod db_tests { .explode_layers() .iter() .filter_map(|e| { - e.edge.layer().copied().and_then(|layer| { + e.edge.layer().and_then(|layer| { Some((e.src().id().as_u64()?, e.dst().id().as_u64()?, layer)) }) }) @@ -1946,7 +2003,6 @@ mod db_tests { .filter_map(|e| { e.edge .layer() - .copied() .map(|layer| (e.src().id(), e.dst().id(), layer)) }) .collect::>(); @@ -1978,7 +2034,7 @@ mod db_tests { e.edge .layer() .zip(e.time().ok()) - .map(|(layer, t)| (t, e.src().id(), e.dst().id(), *layer)) + .map(|(layer, t)| (t, e.src().id(), e.dst().id(), layer)) }) }) .collect::>(); @@ -2014,7 +2070,7 @@ mod db_tests { e.edge .layer() .zip(Some(e.time().unwrap())) - .map(|(layer, t)| (t, e.src().id(), e.dst().id(), *layer)) + .map(|(layer, t)| (t, e.src().id(), e.dst().id(), layer)) }) }) .collect::>(); diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 4111766dd6..3516c17459 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -372,11 +372,7 @@ impl TimeSemantics for PersistentGraph { let g = self.clone(); let edge = self.core_edge_arc(e.into()); Box::new(g.edge_layers(e, layer_ids).filter(move |&e| { - g.include_edge_window( - edge.as_ref(), - w.clone(), - &LayerIds::One(*e.layer().unwrap()), - ) + g.include_edge_window(edge.as_ref(), w.clone(), &LayerIds::One(e.layer().unwrap())) })) } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 98a2730678..1b20ef77bb 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -606,6 +606,7 @@ mod tests { df_loaders::load_edges_from_df, }, prelude::*, + test_utils::build_edge_list, }; use itertools::Itertools; use polars_arrow::array::{MutableArray, MutablePrimitiveArray, MutableUtf8Array}; @@ -615,22 +616,6 @@ mod tests { }; use tempfile::NamedTempFile; - fn build_edge_list( - len: usize, - num_nodes: u64, - ) -> impl Strategy> { - proptest::collection::vec( - ( - 0..num_nodes, - 0..num_nodes, - any::(), - any::(), - any::(), - ), - 0..=len, - ) - } - fn build_df( chunk_size: usize, edges: &[(u64, u64, i64, String, i64)], diff --git a/raphtory/src/lib.rs b/raphtory/src/lib.rs index f80cabb051..c536653ce0 100644 --- a/raphtory/src/lib.rs +++ b/raphtory/src/lib.rs @@ -143,7 +143,8 @@ pub use polars_arrow as arrow2; #[cfg(test)] mod test_utils { - use crate::prelude::Graph; + use crate::prelude::*; + use proptest::{arbitrary::any, prelude::Strategy}; #[cfg(feature = "storage")] use tempfile::TempDir; @@ -168,4 +169,38 @@ mod test_utils { .into_graph(); test(&disk_graph) } + + pub(crate) fn build_edge_list( + len: usize, + num_nodes: u64, + ) -> impl Strategy> { + proptest::collection::vec( + ( + 0..num_nodes, + 0..num_nodes, + any::(), + any::(), + any::(), + ), + 0..=len, + ) + } + + pub(crate) fn build_graph_from_edge_list(edge_list: &[(u64, u64, i64, String, i64)]) -> Graph { + let g = Graph::new(); + for (src, dst, time, str_prop, int_prop) in edge_list { + g.add_edge( + *time, + src, + dst, + [ + ("str_prop", str_prop.into_prop()), + ("int_prop", int_prop.into_prop()), + ], + None, + ) + .unwrap(); + } + g + } } diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index 403ae2a350..66427b98c6 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -793,6 +793,11 @@ impl InternalAdditionOps for Indexe self.graph.next_event_id() } + #[inline] + fn read_event_id(&self) -> usize { + self.graph.read_event_id() + } + #[inline] fn reserve_event_ids(&self, num_ids: usize) -> Result { self.graph.reserve_event_ids(num_ids) From 89550cf04c61637867b94005d85928ad6071a1fb Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 16 Sep 2024 16:51:55 +0200 Subject: [PATCH 15/18] fix graph properties for disk graph --- pometry-storage-private | 2 +- raphtory/src/db/api/view/edge.rs | 3 +- raphtory/src/db/graph/graph.rs | 35 ++++++++++--------- raphtory/src/disk_graph/graph_impl/interop.rs | 10 +++++- raphtory/src/disk_graph/mod.rs | 9 +++-- 5 files changed, 38 insertions(+), 21 deletions(-) diff --git a/pometry-storage-private b/pometry-storage-private index 8773185e13..b72eac8d47 160000 --- a/pometry-storage-private +++ b/pometry-storage-private @@ -1 +1 @@ -Subproject commit 8773185e13f78cd22265b2aef71b72fd90e8f5e6 +Subproject commit b72eac8d47333c7285f3d3c84aa44248eab4af07 diff --git a/raphtory/src/db/api/view/edge.rs b/raphtory/src/db/api/view/edge.rs index 71f098875c..151265523b 100644 --- a/raphtory/src/db/api/view/edge.rs +++ b/raphtory/src/db/api/view/edge.rs @@ -434,7 +434,8 @@ mod test_edge_view { graph.add_edge(0, 1, 2, [("second", true)], None).unwrap(); graph.add_edge(0, 2, 3, [("second", true)], None).unwrap(); - test_storage!(&graph, |graph| { + //FIXME: DiskGraph does not preserve secondary index (see #1780) + test_graph(&graph, |graph| { let mut exploded_edges: Vec<_> = graph.edges().explode().into_iter().collect(); exploded_edges.sort_by_key(|a| a.time_and_index()); diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index 2e0a4ea439..8440465e9a 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -234,25 +234,28 @@ pub fn assert_graph_equal< e2.properties().temporal().as_map(), ); + // FIXME: DiskGraph does not currently preserve secondary index + + let mut e1_updates: Vec<_> = e1 + .explode() + .iter() + .map(|e| (e.layer_name().unwrap(), e.time().unwrap())) + .collect(); + e1_updates.sort(); + + let mut e2_updates: Vec<_> = e2 + .explode() + .iter() + .map(|e| (e.layer_name().unwrap(), e.time().unwrap())) + .collect(); + e2_updates.sort(); assert_eq!( - e1.explode() - .iter() - .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) - .collect::>(), - e2.explode() - .iter() - .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) - .collect::>(), + e1_updates, + e2_updates, "mismatched updates for edge {:?}: left {:?}, right {:?}", e1.id(), - e1.explode() - .iter() - .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) - .collect::>(), - e2.explode() - .iter() - .map(|e| (e.edge.layer().unwrap(), e.edge.time().unwrap())) - .collect::>(), + e1_updates, + e2_updates, ); } } diff --git a/raphtory/src/disk_graph/graph_impl/interop.rs b/raphtory/src/disk_graph/graph_impl/interop.rs index 19b2761ea5..108207186f 100644 --- a/raphtory/src/disk_graph/graph_impl/interop.rs +++ b/raphtory/src/disk_graph/graph_impl/interop.rs @@ -9,7 +9,7 @@ use crate::{ edges::edge_storage_ops::EdgeStorageOps, nodes::node_storage_ops::NodeStorageOps, tprop_storage_ops::TPropOps, }, - view::internal::CoreGraphOps, + view::internal::{CoreGraphOps, TimeSemantics}, }, disk_graph::graph_impl::prop_conversion::arrow_array_from_props, prelude::*, @@ -139,4 +139,12 @@ impl GraphLike for Graph { prop_type, ) } + + fn earliest_time(&self) -> i64 { + self.earliest_time_global().unwrap_or(i64::MAX) + } + + fn latest_time(&self) -> i64 { + self.latest_time_global().unwrap_or(i64::MIN) + } } diff --git a/raphtory/src/disk_graph/mod.rs b/raphtory/src/disk_graph/mod.rs index 0504307ead..3126f83e36 100644 --- a/raphtory/src/disk_graph/mod.rs +++ b/raphtory/src/disk_graph/mod.rs @@ -12,7 +12,10 @@ use crate::{ }, utils::errors::GraphError, }, - db::{api::storage::graph::storage_ops, graph::views::deletion_graph::PersistentGraph}, + db::{ + api::{storage::graph::storage_ops, view::internal::CoreGraphOps}, + graph::views::deletion_graph::PersistentGraph, + }, disk_graph::graph_impl::{prop_conversion::make_node_properties_from_graph, ParquetLayerCols}, prelude::{Graph, Layer}, }; @@ -292,7 +295,9 @@ impl DiskGraphStorage { let inner_graph = TemporalGraph::from_graph(graph, graph_dir.as_ref(), || { make_node_properties_from_graph(graph, graph_dir.as_ref()) })?; - Ok(Self::new(inner_graph)) + let mut storage = Self::new(inner_graph); + storage.graph_props = Arc::new(graph.graph_meta().deep_clone()); + Ok(storage) } pub fn load_from_edge_lists( From 62dd97647375094156c84621b685ddb24148cb07 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 16 Sep 2024 17:39:25 +0200 Subject: [PATCH 16/18] clean up --- raphtory/src/core/entities/graph/tgraph.rs | 6 +-- .../storage/graph/storage_ops/additions.rs | 2 +- raphtory/src/db/api/view/graph.rs | 2 +- raphtory/src/io/arrow/df_loaders.rs | 5 +-- raphtory/src/serialise/serialise.rs | 1 - raphtory/src/vectors/template.rs | 39 +++++++++---------- 6 files changed, 23 insertions(+), 32 deletions(-) diff --git a/raphtory/src/core/entities/graph/tgraph.rs b/raphtory/src/core/entities/graph/tgraph.rs index a0353f8f1c..5be2070af4 100644 --- a/raphtory/src/core/entities/graph/tgraph.rs +++ b/raphtory/src/core/entities/graph/tgraph.rs @@ -32,11 +32,7 @@ use raphtory_api::core::{ use rustc_hash::FxHasher; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, - fmt::Debug, - hash::BuildHasherDefault, - iter, - sync::{atomic::AtomicUsize, Arc}, + collections::HashMap, fmt::Debug, hash::BuildHasherDefault, iter, sync::atomic::AtomicUsize, }; pub(crate) type FxDashSet = DashSet>; diff --git a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs index 8ea394c11b..10542ad571 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/additions.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/additions.rs @@ -10,7 +10,7 @@ use crate::{ PropType, }, db::api::{mutation::internal::InternalAdditionOps, storage::graph::locked::WriteLockedGraph}, - prelude::{GraphViewOps, Prop}, + prelude::Prop, }; use either::Either; use raphtory_api::core::{ diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index e46c5868be..3b2e9880b2 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -6,7 +6,7 @@ use crate::{ }, db::{ api::{ - mutation::{internal::InternalAdditionOps, PropertyAdditionOps}, + mutation::internal::InternalAdditionOps, properties::{ internal::{ConstPropertiesOps, TemporalPropertiesOps}, Properties, diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 1b20ef77bb..0696de8636 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -610,10 +610,7 @@ mod tests { }; use itertools::Itertools; use polars_arrow::array::{MutableArray, MutablePrimitiveArray, MutableUtf8Array}; - use proptest::{ - prelude::{any, Strategy}, - proptest, - }; + use proptest::proptest; use tempfile::NamedTempFile; fn build_df( diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 7d89efd688..4bc9b9bcde 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -796,7 +796,6 @@ impl StableDecode for TemporalGraph { } } }); - {} storage .write_lock_edges()? .into_par_iter_mut() diff --git a/raphtory/src/vectors/template.rs b/raphtory/src/vectors/template.rs index 5e85801cff..7a03798da7 100644 --- a/raphtory/src/vectors/template.rs +++ b/raphtory/src/vectors/template.rs @@ -1,20 +1,19 @@ -use minijinja::{ - value::{Enumerator, Object}, - Environment, Template, Value, -}; -use raphtory_api::core::storage::arc_str::ArcStr; -use serde::Serialize; -use std::sync::Arc; - use super::datetimeformat::datetimeformat; use crate::{ core::{DocumentInput, Prop}, db::{ - api::{properties::TemporalPropertyView, view::StaticGraphViewOps}, + api::properties::TemporalPropertyView, graph::{edge::EdgeView, node::NodeView}, }, prelude::{EdgeViewOps, GraphViewOps, NodeViewOps}, }; +use minijinja::{ + value::{Enumerator, Object}, + Environment, Template, Value, +}; +use raphtory_api::core::storage::arc_str::ArcStr; +use serde::Serialize; +use std::sync::Arc; #[derive(Debug)] struct PropUpdate { @@ -36,7 +35,7 @@ impl<'graph, G: GraphViewOps<'graph>> From>> fo } // FIXME: merge with the one above -impl From> for Value { +impl<'graph, G: GraphViewOps<'graph>> From> for Value { fn from(value: TemporalPropertyView) -> Self { value .iter() @@ -106,8 +105,8 @@ struct GraphTemplateContext { } // FIXME: boilerplate for the properties -impl From<&G> for GraphTemplateContext { - fn from(value: &G) -> Self { +impl<'graph, G: GraphViewOps<'graph>> From for GraphTemplateContext { + fn from(value: G) -> Self { Self { props: value .properties() @@ -170,9 +169,9 @@ fn empty_iter() -> Box> { } impl DocumentTemplate { - pub(crate) fn graph( + pub(crate) fn graph<'graph, G: GraphViewOps<'graph>>( &self, - graph: &G, + graph: G, ) -> Box> { match &self.graph_template { Some(template) => { @@ -213,9 +212,9 @@ impl DocumentTemplate { } /// A function that translate an edge into an iterator of documents - pub(crate) fn edge( + pub(crate) fn edge<'graph, G: GraphViewOps<'graph>>( &self, - edge: EdgeView<&G, &G>, + edge: EdgeView, ) -> Box> { match &self.edge_template { Some(template) => { @@ -254,11 +253,11 @@ struct EdgeTemplateContext { props: Value, } -impl From> for EdgeTemplateContext { - fn from(value: EdgeView<&G>) -> Self { +impl<'graph, G: GraphViewOps<'graph>> From> for EdgeTemplateContext { + fn from(value: EdgeView) -> Self { Self { - src: (value.src()).into(), - dst: (value.dst()).into(), + src: value.src().into(), + dst: value.dst().into(), history: value.history(), layers: value .layer_names() From 74c2bbeb29812cb59efaed70a219900c7e4fbab1 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 18 Sep 2024 13:32:28 +0200 Subject: [PATCH 17/18] move proto impl to its own module and add proptest --- raphtory/src/serialise/mod.rs | 1 + raphtory/src/serialise/proto_ext.rs | 660 +++++++++++++++++++++++++++ raphtory/src/serialise/serialise.rs | 679 ++-------------------------- 3 files changed, 690 insertions(+), 650 deletions(-) create mode 100644 raphtory/src/serialise/proto_ext.rs diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index f418fc8417..650c7e1d16 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod incremental; +mod proto_ext; mod serialise; mod proto { diff --git a/raphtory/src/serialise/proto_ext.rs b/raphtory/src/serialise/proto_ext.rs new file mode 100644 index 0000000000..3ad3548f4c --- /dev/null +++ b/raphtory/src/serialise/proto_ext.rs @@ -0,0 +1,660 @@ +use crate::{ + core::{utils::errors::GraphError, DocumentInput, Lifespan, Prop, PropType}, + db::graph::views::deletion_graph::PersistentGraph, + prelude::{Graph, StableDecode, StableEncode}, + serialise::{ + proto, + proto::{ + graph_update::{ + DelEdge, PropPair, Update, UpdateEdgeCProps, UpdateEdgeTProps, UpdateGraphCProps, + UpdateGraphTProps, UpdateNodeCProps, UpdateNodeTProps, UpdateNodeType, + }, + new_meta::{ + Meta, NewEdgeCProp, NewEdgeTProp, NewGraphCProp, NewGraphTProp, NewLayer, + NewNodeCProp, NewNodeTProp, NewNodeType, + }, + new_node, prop, + prop_type::PropType as SPropType, + GraphUpdate, NewEdge, NewMeta, NewNode, + }, + }, +}; +use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; +use raphtory_api::core::{ + entities::{GidRef, EID, VID}, + storage::{ + arc_str::ArcStr, + timeindex::{AsTime, TimeIndexEntry}, + }, +}; +use std::{borrow::Borrow, sync::Arc}; + +fn as_proto_prop_type(p_type: &PropType) -> SPropType { + match p_type { + PropType::Str => SPropType::Str, + PropType::U8 => SPropType::U8, + PropType::U16 => SPropType::U16, + PropType::U32 => SPropType::U32, + PropType::I32 => SPropType::I32, + PropType::I64 => SPropType::I64, + PropType::U64 => SPropType::U64, + PropType::F32 => SPropType::F32, + PropType::F64 => SPropType::F64, + PropType::Bool => SPropType::Bool, + PropType::List => SPropType::List, + PropType::Map => SPropType::Map, + PropType::NDTime => SPropType::NdTime, + PropType::DTime => SPropType::DTime, + PropType::Graph => SPropType::Graph, + PropType::PersistentGraph => SPropType::PersistentGraph, + PropType::Document => SPropType::Document, + _ => unimplemented!("Empty prop types not supported!"), + } +} + +pub fn as_prop_type(p_type: SPropType) -> PropType { + match p_type { + SPropType::Str => PropType::Str, + SPropType::U8 => PropType::U8, + SPropType::U16 => PropType::U16, + SPropType::U32 => PropType::U32, + SPropType::I32 => PropType::I32, + SPropType::I64 => PropType::I64, + SPropType::U64 => PropType::U64, + SPropType::F32 => PropType::F32, + SPropType::F64 => PropType::F64, + SPropType::Bool => PropType::Bool, + SPropType::List => PropType::List, + SPropType::Map => PropType::Map, + SPropType::NdTime => PropType::NDTime, + SPropType::DTime => PropType::DTime, + SPropType::Graph => PropType::Graph, + SPropType::PersistentGraph => PropType::PersistentGraph, + SPropType::Document => PropType::Document, + } +} + +impl NewEdge { + pub fn src(&self) -> VID { + VID(self.src as usize) + } + + pub fn dst(&self) -> VID { + VID(self.dst as usize) + } + + pub fn eid(&self) -> EID { + EID(self.eid as usize) + } +} + +impl DelEdge { + pub fn eid(&self) -> EID { + EID(self.eid as usize) + } + + pub fn layer_id(&self) -> usize { + self.layer_id as usize + } + + pub fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } +} + +impl UpdateEdgeCProps { + pub fn eid(&self) -> EID { + EID(self.eid as usize) + } + + pub fn layer_id(&self) -> usize { + self.layer_id as usize + } + + pub fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateEdgeTProps { + pub fn eid(&self) -> EID { + EID(self.eid as usize) + } + + pub fn layer_id(&self) -> usize { + self.layer_id as usize + } + + pub fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } + + pub fn has_props(&self) -> bool { + !self.properties.is_empty() + } + + pub fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateNodeType { + pub fn vid(&self) -> VID { + VID(self.id as usize) + } + + pub fn type_id(&self) -> usize { + self.type_id as usize + } +} + +impl UpdateNodeCProps { + pub fn vid(&self) -> VID { + VID(self.id as usize) + } + + pub fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl UpdateNodeTProps { + pub fn vid(&self) -> VID { + VID(self.id as usize) + } + + pub fn time(&self) -> TimeIndexEntry { + TimeIndexEntry(self.time, self.secondary as usize) + } + + pub fn props(&self) -> impl Iterator> + '_ { + self.properties.iter().map(as_prop) + } +} + +impl NewMeta { + fn new(new_meta: Meta) -> Self { + Self { + meta: Some(new_meta), + } + } + + fn new_graph_cprop(key: &str, id: usize) -> Self { + let inner = NewGraphCProp { + name: key.to_string(), + id: id as u64, + }; + Self::new(Meta::NewGraphCprop(inner)) + } + + fn new_graph_tprop(key: &str, id: usize, dtype: &PropType) -> Self { + let mut inner = NewGraphTProp::default(); + inner.name = key.to_string(); + inner.id = id as u64; + inner.set_p_type(as_proto_prop_type(dtype)); + Self::new(Meta::NewGraphTprop(inner)) + } + + fn new_node_cprop(key: &str, id: usize, dtype: &PropType) -> Self { + let mut inner = NewNodeCProp::default(); + inner.name = key.to_string(); + inner.id = id as u64; + inner.set_p_type(as_proto_prop_type(dtype)); + Self::new(Meta::NewNodeCprop(inner)) + } + + fn new_node_tprop(key: &str, id: usize, dtype: &PropType) -> Self { + let mut inner = NewNodeTProp::default(); + inner.name = key.to_string(); + inner.id = id as u64; + inner.set_p_type(as_proto_prop_type(dtype)); + Self::new(Meta::NewNodeTprop(inner)) + } + + fn new_edge_cprop(key: &str, id: usize, dtype: &PropType) -> Self { + let mut inner = NewEdgeCProp::default(); + inner.name = key.to_string(); + inner.id = id as u64; + inner.set_p_type(as_proto_prop_type(dtype)); + Self::new(Meta::NewEdgeCprop(inner)) + } + + fn new_edge_tprop(key: &str, id: usize, dtype: &PropType) -> Self { + let mut inner = NewEdgeTProp::default(); + inner.name = key.to_string(); + inner.id = id as u64; + inner.set_p_type(as_proto_prop_type(dtype)); + Self::new(Meta::NewEdgeTprop(inner)) + } + + fn new_layer(layer: &str, id: usize) -> Self { + let mut inner = NewLayer::default(); + inner.name = layer.to_string(); + inner.id = id as u64; + Self::new(Meta::NewLayer(inner)) + } + + fn new_node_type(node_type: &str, id: usize) -> Self { + let mut inner = NewNodeType::default(); + inner.name = node_type.to_string(); + inner.id = id as u64; + Self::new(Meta::NewNodeType(inner)) + } +} + +impl GraphUpdate { + fn new(update: Update) -> Self { + Self { + update: Some(update), + } + } + + fn update_graph_cprops(values: impl Iterator)>) -> Self { + let inner = UpdateGraphCProps::new(values); + Self::new(Update::UpdateGraphCprops(inner)) + } + + fn update_graph_tprops( + time: TimeIndexEntry, + values: impl IntoIterator)>, + ) -> Self { + let inner = UpdateGraphTProps::new(time, values); + Self::new(Update::UpdateGraphTprops(inner)) + } + + fn update_node_type(node_id: VID, type_id: usize) -> Self { + let inner = UpdateNodeType { + id: node_id.as_u64(), + type_id: type_id as u64, + }; + Self::new(Update::UpdateNodeType(inner)) + } + + fn update_node_cprops( + node_id: VID, + properties: impl Iterator)>, + ) -> Self { + let properties = collect_proto_props(properties); + let inner = UpdateNodeCProps { + id: node_id.as_u64(), + properties, + }; + Self::new(Update::UpdateNodeCprops(inner)) + } + + fn update_node_tprops( + node_id: VID, + time: TimeIndexEntry, + properties: impl Iterator)>, + ) -> Self { + let properties = collect_proto_props(properties); + let inner = UpdateNodeTProps { + id: node_id.as_u64(), + time: time.t(), + secondary: time.i() as u64, + properties, + }; + Self::new(Update::UpdateNodeTprops(inner)) + } + + fn update_edge_tprops( + eid: EID, + time: TimeIndexEntry, + layer_id: usize, + properties: impl Iterator)>, + ) -> Self { + let properties = collect_proto_props(properties); + let inner = UpdateEdgeTProps { + eid: eid.0 as u64, + time: time.t(), + secondary: time.i() as u64, + layer_id: layer_id as u64, + properties, + }; + Self::new(Update::UpdateEdgeTprops(inner)) + } + + fn update_edge_cprops( + eid: EID, + layer_id: usize, + properties: impl Iterator)>, + ) -> Self { + let properties = collect_proto_props(properties); + let inner = UpdateEdgeCProps { + eid: eid.0 as u64, + layer_id: layer_id as u64, + properties, + }; + Self::new(Update::UpdateEdgeCprops(inner)) + } + + fn del_edge(eid: EID, layer_id: usize, time: TimeIndexEntry) -> Self { + let inner = DelEdge { + eid: eid.as_u64(), + time: time.t(), + secondary: time.i() as u64, + layer_id: layer_id as u64, + }; + Self::new(Update::DelEdge(inner)) + } +} + +impl UpdateGraphCProps { + fn new(values: impl Iterator)>) -> Self { + let properties = collect_proto_props(values); + UpdateGraphCProps { properties } + } +} + +impl UpdateGraphTProps { + fn new( + time: TimeIndexEntry, + values: impl IntoIterator)>, + ) -> Self { + let properties = collect_proto_props(values); + UpdateGraphTProps { + time: time.t(), + secondary: time.i() as u64, + properties, + } + } +} + +impl PropPair { + fn new(key: usize, value: &Prop) -> Self { + PropPair { + key: key as u64, + value: Some(as_proto_prop(value)), + } + } +} + +impl proto::Graph { + pub fn new_edge(&mut self, src: VID, dst: VID, eid: EID) { + let edge = NewEdge { + src: src.as_u64(), + dst: dst.as_u64(), + eid: eid.as_u64(), + }; + self.edges.push(edge); + } + + pub fn new_node(&mut self, gid: GidRef, vid: VID, type_id: usize) { + let type_id = type_id as u64; + let gid = match gid { + GidRef::U64(id) => new_node::Gid::GidU64(id), + GidRef::Str(name) => new_node::Gid::GidStr(name.to_string()), + }; + let node = NewNode { + type_id, + gid: Some(gid), + vid: vid.as_u64(), + }; + self.nodes.push(node); + } + + pub fn new_graph_cprop(&mut self, key: &str, id: usize) { + self.metas.push(NewMeta::new_graph_cprop(key, id)); + } + + pub fn new_graph_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { + self.metas.push(NewMeta::new_graph_tprop(key, id, dtype)); + } + + pub fn new_node_cprop(&mut self, key: &str, id: usize, dtype: &PropType) { + self.metas.push(NewMeta::new_node_cprop(key, id, dtype)); + } + + pub fn new_node_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { + self.metas.push(NewMeta::new_node_tprop(key, id, dtype)); + } + + pub fn new_edge_cprop(&mut self, key: &str, id: usize, dtype: &PropType) { + self.metas.push(NewMeta::new_edge_cprop(key, id, dtype)); + } + + pub fn new_edge_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { + self.metas.push(NewMeta::new_edge_tprop(key, id, dtype)) + } + + pub fn new_layer(&mut self, layer: &str, id: usize) { + self.metas.push(NewMeta::new_layer(layer, id)); + } + + pub fn new_node_type(&mut self, node_type: &str, id: usize) { + self.metas.push(NewMeta::new_node_type(node_type, id)); + } + + pub fn update_graph_cprops( + &mut self, + values: impl Iterator)>, + ) { + self.updates.push(GraphUpdate::update_graph_cprops(values)); + } + + pub fn update_graph_tprops( + &mut self, + time: TimeIndexEntry, + values: impl IntoIterator)>, + ) { + self.updates + .push(GraphUpdate::update_graph_tprops(time, values)); + } + + pub fn update_node_type(&mut self, node_id: VID, type_id: usize) { + self.updates + .push(GraphUpdate::update_node_type(node_id, type_id)) + } + pub fn update_node_cprops( + &mut self, + node_id: VID, + properties: impl Iterator)>, + ) { + self.updates + .push(GraphUpdate::update_node_cprops(node_id, properties)); + } + + pub fn update_node_tprops( + &mut self, + node_id: VID, + time: TimeIndexEntry, + properties: impl Iterator)>, + ) { + self.updates + .push(GraphUpdate::update_node_tprops(node_id, time, properties)); + } + + pub fn update_edge_tprops( + &mut self, + eid: EID, + time: TimeIndexEntry, + layer_id: usize, + properties: impl Iterator)>, + ) { + self.updates.push(GraphUpdate::update_edge_tprops( + eid, time, layer_id, properties, + )); + } + + pub fn update_edge_cprops( + &mut self, + eid: EID, + layer_id: usize, + properties: impl Iterator)>, + ) { + self.updates + .push(GraphUpdate::update_edge_cprops(eid, layer_id, properties)); + } + + pub fn del_edge(&mut self, eid: EID, layer_id: usize, time: TimeIndexEntry) { + self.updates + .push(GraphUpdate::del_edge(eid, layer_id, time)) + } +} + +fn as_prop(prop_pair: &PropPair) -> Result<(usize, Prop), GraphError> { + let PropPair { key, value } = prop_pair; + let value = value.as_ref().expect("Missing prop value"); + let value = value.value.as_ref(); + let value = as_prop_value(value)?; + + Ok((*key as usize, value)) +} + +fn as_prop_value(value: Option<&prop::Value>) -> Result { + let value = match value.expect("Missing prop value") { + prop::Value::BoolValue(b) => Prop::Bool(*b), + prop::Value::U8(u) => Prop::U8((*u).try_into().unwrap()), + prop::Value::U16(u) => Prop::U16((*u).try_into().unwrap()), + prop::Value::U32(u) => Prop::U32(*u), + prop::Value::I32(i) => Prop::I32(*i), + prop::Value::I64(i) => Prop::I64(*i), + prop::Value::U64(u) => Prop::U64(*u), + prop::Value::F32(f) => Prop::F32(*f), + prop::Value::F64(f) => Prop::F64(*f), + prop::Value::Str(s) => Prop::Str(ArcStr::from(s.as_str())), + prop::Value::Prop(props) => Prop::List(Arc::new( + props + .properties + .iter() + .map(|prop| as_prop_value(prop.value.as_ref())) + .collect::, _>>()?, + )), + prop::Value::Map(dict) => Prop::Map(Arc::new( + dict.map + .iter() + .map(|(k, v)| Ok((ArcStr::from(k.as_str()), as_prop_value(v.value.as_ref())?))) + .collect::>()?, + )), + prop::Value::NdTime(ndt) => { + let prop::NdTime { + year, + month, + day, + hour, + minute, + second, + nanos, + } = ndt; + let ndt = NaiveDateTime::new( + NaiveDate::from_ymd_opt(*year as i32, *month as u32, *day as u32).unwrap(), + NaiveTime::from_hms_nano_opt( + *hour as u32, + *minute as u32, + *second as u32, + *nanos as u32, + ) + .unwrap(), + ); + Prop::NDTime(ndt) + } + prop::Value::DTime(dt) => Prop::DTime(DateTime::parse_from_rfc3339(dt).unwrap().into()), + prop::Value::Graph(graph_proto) => Prop::Graph(Graph::decode_from_proto(graph_proto)?), + prop::Value::PersistentGraph(graph_proto) => { + Prop::PersistentGraph(PersistentGraph::decode_from_proto(graph_proto)?) + } + prop::Value::DocumentInput(doc) => Prop::Document(DocumentInput { + content: doc.content.clone(), + life: doc + .life + .as_ref() + .map(|l| match l.l_type { + Some(prop::lifespan::LType::Interval(prop::lifespan::Interval { + start, + end, + })) => Lifespan::Interval { start, end }, + Some(prop::lifespan::LType::Event(prop::lifespan::Event { time })) => { + Lifespan::Event { time } + } + None => Lifespan::Inherited, + }) + .unwrap_or(Lifespan::Inherited), + }), + }; + Ok(value) +} + +fn collect_proto_props( + iter: impl IntoIterator)>, +) -> Vec { + iter.into_iter() + .map(|(key, value)| PropPair::new(key, value.borrow())) + .collect() +} + +pub fn collect_props<'a>( + iter: impl IntoIterator, +) -> Result, GraphError> { + iter.into_iter().map(as_prop).collect() +} + +fn as_proto_prop(prop: &Prop) -> proto::Prop { + let value: prop::Value = match prop { + Prop::Bool(b) => prop::Value::BoolValue(*b), + Prop::U8(u) => prop::Value::U8((*u).into()), + Prop::U16(u) => prop::Value::U16((*u).into()), + Prop::U32(u) => prop::Value::U32(*u), + Prop::I32(i) => prop::Value::I32(*i), + Prop::I64(i) => prop::Value::I64(*i), + Prop::U64(u) => prop::Value::U64(*u), + Prop::F32(f) => prop::Value::F32(*f), + Prop::F64(f) => prop::Value::F64(*f), + Prop::Str(s) => prop::Value::Str(s.to_string()), + Prop::List(list) => { + let properties = list.iter().map(as_proto_prop).collect(); + prop::Value::Prop(prop::Props { properties }) + } + Prop::Map(map) => { + let map = map + .iter() + .map(|(k, v)| (k.to_string(), as_proto_prop(v))) + .collect(); + prop::Value::Map(prop::Dict { map }) + } + Prop::NDTime(ndt) => { + let (year, month, day) = (ndt.date().year(), ndt.date().month(), ndt.date().day()); + let (hour, minute, second, nanos) = ( + ndt.time().hour(), + ndt.time().minute(), + ndt.time().second(), + ndt.time().nanosecond(), + ); + + let proto_ndt = prop::NdTime { + year: year as u32, + month: month as u32, + day: day as u32, + hour: hour as u32, + minute: minute as u32, + second: second as u32, + nanos: nanos as u32, + }; + prop::Value::NdTime(proto_ndt) + } + Prop::DTime(dt) => { + prop::Value::DTime(dt.to_rfc3339_opts(chrono::SecondsFormat::AutoSi, true)) + } + Prop::Graph(g) => prop::Value::Graph(g.encode_to_proto()), + Prop::PersistentGraph(g) => prop::Value::PersistentGraph(g.encode_to_proto()), + Prop::Document(doc) => { + let life = match doc.life { + Lifespan::Interval { start, end } => { + Some(prop::lifespan::LType::Interval(prop::lifespan::Interval { + start, + end, + })) + } + Lifespan::Event { time } => { + Some(prop::lifespan::LType::Event(prop::lifespan::Event { time })) + } + Lifespan::Inherited => None, + }; + prop::Value::DocumentInput(prop::DocumentInput { + content: doc.content.clone(), + life: Some(prop::Lifespan { l_type: life }), + }) + } + }; + + proto::Prop { value: Some(value) } +} diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 4bc9b9bcde..6822abb13a 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -3,7 +3,7 @@ use crate::{ entities::{graph::tgraph::TemporalGraph, LayerIds}, storage::timeindex::TimeIndexOps, utils::errors::GraphError, - DocumentInput, Lifespan, Prop, PropType, + Prop, }, db::{ api::{ @@ -19,25 +19,19 @@ use crate::{ prelude::Graph, serialise::{ proto, - proto::{ - graph_update::*, new_meta::*, new_node, new_node::Gid, prop, - prop_type::PropType as SPropType, GraphUpdate, NewEdge, NewMeta, NewNode, - }, + proto::{graph_update::*, new_meta::*, new_node::Gid}, + proto_ext, }, }; -use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use itertools::Itertools; use prost::Message; use raphtory_api::core::{ entities::{GidRef, EID, ELID, VID}, - storage::{ - arc_str::ArcStr, - timeindex::{AsTime, TimeIndexEntry}, - }, + storage::timeindex::TimeIndexEntry, Direction, }; use rayon::prelude::*; -use std::{borrow::Borrow, fs::File, io::Write, iter, path::Path, sync::Arc}; +use std::{fs::File, io::Write, iter, path::Path, sync::Arc}; macro_rules! zip_tprop_updates { ($iter:expr) => { @@ -89,469 +83,6 @@ pub trait CacheOps: Sized { fn load_cached(path: impl AsRef) -> Result; } -fn as_proto_prop_type(p_type: &PropType) -> SPropType { - match p_type { - PropType::Str => SPropType::Str, - PropType::U8 => SPropType::U8, - PropType::U16 => SPropType::U16, - PropType::U32 => SPropType::U32, - PropType::I32 => SPropType::I32, - PropType::I64 => SPropType::I64, - PropType::U64 => SPropType::U64, - PropType::F32 => SPropType::F32, - PropType::F64 => SPropType::F64, - PropType::Bool => SPropType::Bool, - PropType::List => SPropType::List, - PropType::Map => SPropType::Map, - PropType::NDTime => SPropType::NdTime, - PropType::DTime => SPropType::DTime, - PropType::Graph => SPropType::Graph, - PropType::PersistentGraph => SPropType::PersistentGraph, - PropType::Document => SPropType::Document, - _ => unimplemented!("Empty prop types not supported!"), - } -} - -fn as_prop_type(p_type: SPropType) -> PropType { - match p_type { - SPropType::Str => PropType::Str, - SPropType::U8 => PropType::U8, - SPropType::U16 => PropType::U16, - SPropType::U32 => PropType::U32, - SPropType::I32 => PropType::I32, - SPropType::I64 => PropType::I64, - SPropType::U64 => PropType::U64, - SPropType::F32 => PropType::F32, - SPropType::F64 => PropType::F64, - SPropType::Bool => PropType::Bool, - SPropType::List => PropType::List, - SPropType::Map => PropType::Map, - SPropType::NdTime => PropType::NDTime, - SPropType::DTime => PropType::DTime, - SPropType::Graph => PropType::Graph, - SPropType::PersistentGraph => PropType::PersistentGraph, - SPropType::Document => PropType::Document, - } -} - -impl NewEdge { - fn src(&self) -> VID { - VID(self.src as usize) - } - - fn dst(&self) -> VID { - VID(self.dst as usize) - } - - fn eid(&self) -> EID { - EID(self.eid as usize) - } -} - -impl DelEdge { - fn eid(&self) -> EID { - EID(self.eid as usize) - } - - fn layer_id(&self) -> usize { - self.layer_id as usize - } - - fn time(&self) -> TimeIndexEntry { - TimeIndexEntry(self.time, self.secondary as usize) - } -} - -impl UpdateEdgeCProps { - fn eid(&self) -> EID { - EID(self.eid as usize) - } - - fn layer_id(&self) -> usize { - self.layer_id as usize - } - - fn props(&self) -> impl Iterator> + '_ { - self.properties.iter().map(as_prop) - } -} - -impl UpdateEdgeTProps { - fn eid(&self) -> EID { - EID(self.eid as usize) - } - - fn layer_id(&self) -> usize { - self.layer_id as usize - } - - fn time(&self) -> TimeIndexEntry { - TimeIndexEntry(self.time, self.secondary as usize) - } - - fn has_props(&self) -> bool { - !self.properties.is_empty() - } - - fn props(&self) -> impl Iterator> + '_ { - self.properties.iter().map(as_prop) - } -} - -impl UpdateNodeType { - fn vid(&self) -> VID { - VID(self.id as usize) - } - - fn type_id(&self) -> usize { - self.type_id as usize - } -} - -impl UpdateNodeCProps { - fn vid(&self) -> VID { - VID(self.id as usize) - } - - fn props(&self) -> impl Iterator> + '_ { - self.properties.iter().map(as_prop) - } -} - -impl UpdateNodeTProps { - fn vid(&self) -> VID { - VID(self.id as usize) - } - - fn time(&self) -> TimeIndexEntry { - TimeIndexEntry(self.time, self.secondary as usize) - } - - fn props(&self) -> impl Iterator> + '_ { - self.properties.iter().map(as_prop) - } -} - -impl NewMeta { - fn new(new_meta: Meta) -> Self { - Self { - meta: Some(new_meta), - } - } - - fn new_graph_cprop(key: &str, id: usize) -> Self { - let inner = NewGraphCProp { - name: key.to_string(), - id: id as u64, - }; - Self::new(Meta::NewGraphCprop(inner)) - } - - fn new_graph_tprop(key: &str, id: usize, dtype: &PropType) -> Self { - let mut inner = NewGraphTProp::default(); - inner.name = key.to_string(); - inner.id = id as u64; - inner.set_p_type(as_proto_prop_type(dtype)); - Self::new(Meta::NewGraphTprop(inner)) - } - - fn new_node_cprop(key: &str, id: usize, dtype: &PropType) -> Self { - let mut inner = NewNodeCProp::default(); - inner.name = key.to_string(); - inner.id = id as u64; - inner.set_p_type(as_proto_prop_type(dtype)); - Self::new(Meta::NewNodeCprop(inner)) - } - - fn new_node_tprop(key: &str, id: usize, dtype: &PropType) -> Self { - let mut inner = NewNodeTProp::default(); - inner.name = key.to_string(); - inner.id = id as u64; - inner.set_p_type(as_proto_prop_type(dtype)); - Self::new(Meta::NewNodeTprop(inner)) - } - - fn new_edge_cprop(key: &str, id: usize, dtype: &PropType) -> Self { - let mut inner = NewEdgeCProp::default(); - inner.name = key.to_string(); - inner.id = id as u64; - inner.set_p_type(as_proto_prop_type(dtype)); - Self::new(Meta::NewEdgeCprop(inner)) - } - - fn new_edge_tprop(key: &str, id: usize, dtype: &PropType) -> Self { - let mut inner = NewEdgeTProp::default(); - inner.name = key.to_string(); - inner.id = id as u64; - inner.set_p_type(as_proto_prop_type(dtype)); - Self::new(Meta::NewEdgeTprop(inner)) - } - - fn new_layer(layer: &str, id: usize) -> Self { - let mut inner = NewLayer::default(); - inner.name = layer.to_string(); - inner.id = id as u64; - Self::new(Meta::NewLayer(inner)) - } - - fn new_node_type(node_type: &str, id: usize) -> Self { - let mut inner = NewNodeType::default(); - inner.name = node_type.to_string(); - inner.id = id as u64; - Self::new(Meta::NewNodeType(inner)) - } -} - -impl GraphUpdate { - fn new(update: Update) -> Self { - Self { - update: Some(update), - } - } - - fn update_graph_cprops(values: impl Iterator)>) -> Self { - let inner = UpdateGraphCProps::new(values); - Self::new(Update::UpdateGraphCprops(inner)) - } - - fn update_graph_tprops( - time: TimeIndexEntry, - values: impl IntoIterator)>, - ) -> Self { - let inner = UpdateGraphTProps::new(time, values); - Self::new(Update::UpdateGraphTprops(inner)) - } - - fn update_node_type(node_id: VID, type_id: usize) -> Self { - let inner = UpdateNodeType { - id: node_id.as_u64(), - type_id: type_id as u64, - }; - Self::new(Update::UpdateNodeType(inner)) - } - - fn update_node_cprops( - node_id: VID, - properties: impl Iterator)>, - ) -> Self { - let properties = collect_proto_props(properties); - let inner = UpdateNodeCProps { - id: node_id.as_u64(), - properties, - }; - Self::new(Update::UpdateNodeCprops(inner)) - } - - fn update_node_tprops( - node_id: VID, - time: TimeIndexEntry, - properties: impl Iterator)>, - ) -> Self { - let properties = collect_proto_props(properties); - let inner = UpdateNodeTProps { - id: node_id.as_u64(), - time: time.t(), - secondary: time.i() as u64, - properties, - }; - Self::new(Update::UpdateNodeTprops(inner)) - } - - fn update_edge_tprops( - eid: EID, - time: TimeIndexEntry, - layer_id: usize, - properties: impl Iterator)>, - ) -> Self { - let properties = collect_proto_props(properties); - let inner = UpdateEdgeTProps { - eid: eid.0 as u64, - time: time.t(), - secondary: time.i() as u64, - layer_id: layer_id as u64, - properties, - }; - Self::new(Update::UpdateEdgeTprops(inner)) - } - - fn update_edge_cprops( - eid: EID, - layer_id: usize, - properties: impl Iterator)>, - ) -> Self { - let properties = collect_proto_props(properties); - let inner = UpdateEdgeCProps { - eid: eid.0 as u64, - layer_id: layer_id as u64, - properties, - }; - Self::new(Update::UpdateEdgeCprops(inner)) - } - - fn del_edge(eid: EID, layer_id: usize, time: TimeIndexEntry) -> Self { - let inner = DelEdge { - eid: eid.as_u64(), - time: time.t(), - secondary: time.i() as u64, - layer_id: layer_id as u64, - }; - Self::new(Update::DelEdge(inner)) - } -} - -impl UpdateGraphCProps { - fn new(values: impl Iterator)>) -> Self { - let properties = collect_proto_props(values); - UpdateGraphCProps { properties } - } -} - -impl UpdateGraphTProps { - fn new( - time: TimeIndexEntry, - values: impl IntoIterator)>, - ) -> Self { - let properties = collect_proto_props(values); - UpdateGraphTProps { - time: time.t(), - secondary: time.i() as u64, - properties, - } - } -} - -impl PropPair { - fn new(key: usize, value: &Prop) -> Self { - PropPair { - key: key as u64, - value: Some(as_proto_prop(value)), - } - } -} - -impl proto::Graph { - pub fn new_edge(&mut self, src: VID, dst: VID, eid: EID) { - let edge = NewEdge { - src: src.as_u64(), - dst: dst.as_u64(), - eid: eid.as_u64(), - }; - self.edges.push(edge); - } - - pub fn new_node(&mut self, gid: GidRef, vid: VID, type_id: usize) { - let type_id = type_id as u64; - let gid = match gid { - GidRef::U64(id) => new_node::Gid::GidU64(id), - GidRef::Str(name) => new_node::Gid::GidStr(name.to_string()), - }; - let node = NewNode { - type_id, - gid: Some(gid), - vid: vid.as_u64(), - }; - self.nodes.push(node); - } - - pub fn new_graph_cprop(&mut self, key: &str, id: usize) { - self.metas.push(NewMeta::new_graph_cprop(key, id)); - } - - pub fn new_graph_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { - self.metas.push(NewMeta::new_graph_tprop(key, id, dtype)); - } - - pub fn new_node_cprop(&mut self, key: &str, id: usize, dtype: &PropType) { - self.metas.push(NewMeta::new_node_cprop(key, id, dtype)); - } - - pub fn new_node_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { - self.metas.push(NewMeta::new_node_tprop(key, id, dtype)); - } - - pub fn new_edge_cprop(&mut self, key: &str, id: usize, dtype: &PropType) { - self.metas.push(NewMeta::new_edge_cprop(key, id, dtype)); - } - - pub fn new_edge_tprop(&mut self, key: &str, id: usize, dtype: &PropType) { - self.metas.push(NewMeta::new_edge_tprop(key, id, dtype)) - } - - pub fn new_layer(&mut self, layer: &str, id: usize) { - self.metas.push(NewMeta::new_layer(layer, id)); - } - - pub fn new_node_type(&mut self, node_type: &str, id: usize) { - self.metas.push(NewMeta::new_node_type(node_type, id)); - } - - pub fn update_graph_cprops( - &mut self, - values: impl Iterator)>, - ) { - self.updates.push(GraphUpdate::update_graph_cprops(values)); - } - - pub fn update_graph_tprops( - &mut self, - time: TimeIndexEntry, - values: impl IntoIterator)>, - ) { - self.updates - .push(GraphUpdate::update_graph_tprops(time, values)); - } - - pub fn update_node_type(&mut self, node_id: VID, type_id: usize) { - self.updates - .push(GraphUpdate::update_node_type(node_id, type_id)) - } - pub fn update_node_cprops( - &mut self, - node_id: VID, - properties: impl Iterator)>, - ) { - self.updates - .push(GraphUpdate::update_node_cprops(node_id, properties)); - } - - pub fn update_node_tprops( - &mut self, - node_id: VID, - time: TimeIndexEntry, - properties: impl Iterator)>, - ) { - self.updates - .push(GraphUpdate::update_node_tprops(node_id, time, properties)); - } - - pub fn update_edge_tprops( - &mut self, - eid: EID, - time: TimeIndexEntry, - layer_id: usize, - properties: impl Iterator)>, - ) { - self.updates.push(GraphUpdate::update_edge_tprops( - eid, time, layer_id, properties, - )); - } - - pub fn update_edge_cprops( - &mut self, - eid: EID, - layer_id: usize, - properties: impl Iterator)>, - ) { - self.updates - .push(GraphUpdate::update_edge_cprops(eid, layer_id, properties)); - } - - pub fn del_edge(&mut self, eid: EID, layer_id: usize, time: TimeIndexEntry) { - self.updates - .push(GraphUpdate::del_edge(eid, layer_id, time)) - } -} - impl StableEncode for GraphStorage { fn encode_to_proto(&self) -> proto::Graph { #[cfg(feature = "storage")] @@ -754,14 +285,14 @@ impl StableDecode for TemporalGraph { storage.node_meta.const_prop_meta().set_id_and_dtype( node_cprop.name.as_str(), node_cprop.id as usize, - as_prop_type(node_cprop.p_type()), + proto_ext::as_prop_type(node_cprop.p_type()), ) } Meta::NewNodeTprop(node_tprop) => { storage.node_meta.temporal_prop_meta().set_id_and_dtype( node_tprop.name.as_str(), node_tprop.id as usize, - as_prop_type(node_tprop.p_type()), + proto_ext::as_prop_type(node_tprop.p_type()), ) } Meta::NewGraphCprop(graph_cprop) => storage @@ -772,7 +303,7 @@ impl StableDecode for TemporalGraph { storage.graph_meta.temporal_prop_meta().set_id_and_dtype( graph_tprop.name.as_str(), graph_tprop.id as usize, - as_prop_type(graph_tprop.p_type()), + proto_ext::as_prop_type(graph_tprop.p_type()), ) } Meta::NewLayer(new_layer) => storage @@ -783,14 +314,14 @@ impl StableDecode for TemporalGraph { storage.edge_meta.const_prop_meta().set_id_and_dtype( edge_cprop.name.as_str(), edge_cprop.id as usize, - as_prop_type(edge_cprop.p_type()), + proto_ext::as_prop_type(edge_cprop.p_type()), ) } Meta::NewEdgeTprop(edge_tprop) => { storage.edge_meta.temporal_prop_meta().set_id_and_dtype( edge_tprop.name.as_str(), edge_tprop.id as usize, - as_prop_type(edge_tprop.p_type()), + proto_ext::as_prop_type(edge_tprop.p_type()), ) } } @@ -930,14 +461,16 @@ impl StableDecode for TemporalGraph { if let Some(update) = update.update.as_ref() { match update { Update::UpdateGraphCprops(props) => { - storage.internal_update_constant_properties(&collect_props( + storage.internal_update_constant_properties(&proto_ext::collect_props( &props.properties, )?)?; } Update::UpdateGraphTprops(props) => { let time = TimeIndexEntry(props.time, props.secondary as usize); - storage - .internal_add_properties(time, &collect_props(&props.properties)?)?; + storage.internal_add_properties( + time, + &proto_ext::collect_props(&props.properties)?, + )?; } _ => {} } @@ -993,186 +526,22 @@ impl StableDecode for PersistentGraph { } } -fn as_prop(prop_pair: &PropPair) -> Result<(usize, Prop), GraphError> { - let PropPair { key, value } = prop_pair; - let value = value.as_ref().expect("Missing prop value"); - let value = value.value.as_ref(); - let value = as_prop_value(value)?; - - Ok((*key as usize, value)) -} - -fn as_prop_value(value: Option<&prop::Value>) -> Result { - let value = match value.expect("Missing prop value") { - prop::Value::BoolValue(b) => Prop::Bool(*b), - prop::Value::U8(u) => Prop::U8((*u).try_into().unwrap()), - prop::Value::U16(u) => Prop::U16((*u).try_into().unwrap()), - prop::Value::U32(u) => Prop::U32(*u), - prop::Value::I32(i) => Prop::I32(*i), - prop::Value::I64(i) => Prop::I64(*i), - prop::Value::U64(u) => Prop::U64(*u), - prop::Value::F32(f) => Prop::F32(*f), - prop::Value::F64(f) => Prop::F64(*f), - prop::Value::Str(s) => Prop::Str(ArcStr::from(s.as_str())), - prop::Value::Prop(props) => Prop::List(Arc::new( - props - .properties - .iter() - .map(|prop| as_prop_value(prop.value.as_ref())) - .collect::, _>>()?, - )), - prop::Value::Map(dict) => Prop::Map(Arc::new( - dict.map - .iter() - .map(|(k, v)| Ok((ArcStr::from(k.as_str()), as_prop_value(v.value.as_ref())?))) - .collect::>()?, - )), - prop::Value::NdTime(ndt) => { - let prop::NdTime { - year, - month, - day, - hour, - minute, - second, - nanos, - } = ndt; - let ndt = NaiveDateTime::new( - NaiveDate::from_ymd_opt(*year as i32, *month as u32, *day as u32).unwrap(), - NaiveTime::from_hms_nano_opt( - *hour as u32, - *minute as u32, - *second as u32, - *nanos as u32, - ) - .unwrap(), - ); - Prop::NDTime(ndt) - } - prop::Value::DTime(dt) => Prop::DTime(DateTime::parse_from_rfc3339(dt).unwrap().into()), - prop::Value::Graph(graph_proto) => Prop::Graph(Graph::decode_from_proto(graph_proto)?), - prop::Value::PersistentGraph(graph_proto) => { - Prop::PersistentGraph(PersistentGraph::decode_from_proto(graph_proto)?) - } - prop::Value::DocumentInput(doc) => Prop::Document(DocumentInput { - content: doc.content.clone(), - life: doc - .life - .as_ref() - .map(|l| match l.l_type { - Some(prop::lifespan::LType::Interval(prop::lifespan::Interval { - start, - end, - })) => Lifespan::Interval { start, end }, - Some(prop::lifespan::LType::Event(prop::lifespan::Event { time })) => { - Lifespan::Event { time } - } - None => Lifespan::Inherited, - }) - .unwrap_or(Lifespan::Inherited), - }), - }; - Ok(value) -} - -fn collect_proto_props( - iter: impl IntoIterator)>, -) -> Vec { - iter.into_iter() - .map(|(key, value)| PropPair::new(key, value.borrow())) - .collect() -} - -fn collect_props<'a>( - iter: impl IntoIterator, -) -> Result, GraphError> { - iter.into_iter().map(as_prop).collect() -} - -fn as_proto_prop(prop: &Prop) -> proto::Prop { - let value: prop::Value = match prop { - Prop::Bool(b) => prop::Value::BoolValue(*b), - Prop::U8(u) => prop::Value::U8((*u).into()), - Prop::U16(u) => prop::Value::U16((*u).into()), - Prop::U32(u) => prop::Value::U32(*u), - Prop::I32(i) => prop::Value::I32(*i), - Prop::I64(i) => prop::Value::I64(*i), - Prop::U64(u) => prop::Value::U64(*u), - Prop::F32(f) => prop::Value::F32(*f), - Prop::F64(f) => prop::Value::F64(*f), - Prop::Str(s) => prop::Value::Str(s.to_string()), - Prop::List(list) => { - let properties = list.iter().map(as_proto_prop).collect(); - prop::Value::Prop(prop::Props { properties }) - } - Prop::Map(map) => { - let map = map - .iter() - .map(|(k, v)| (k.to_string(), as_proto_prop(v))) - .collect(); - prop::Value::Map(prop::Dict { map }) - } - Prop::NDTime(ndt) => { - let (year, month, day) = (ndt.date().year(), ndt.date().month(), ndt.date().day()); - let (hour, minute, second, nanos) = ( - ndt.time().hour(), - ndt.time().minute(), - ndt.time().second(), - ndt.time().nanosecond(), - ); - - let proto_ndt = prop::NdTime { - year: year as u32, - month: month as u32, - day: day as u32, - hour: hour as u32, - minute: minute as u32, - second: second as u32, - nanos: nanos as u32, - }; - prop::Value::NdTime(proto_ndt) - } - Prop::DTime(dt) => { - prop::Value::DTime(dt.to_rfc3339_opts(chrono::SecondsFormat::AutoSi, true)) - } - Prop::Graph(g) => prop::Value::Graph(g.encode_to_proto()), - Prop::PersistentGraph(g) => prop::Value::PersistentGraph(g.encode_to_proto()), - Prop::Document(doc) => { - let life = match doc.life { - Lifespan::Interval { start, end } => { - Some(prop::lifespan::LType::Interval(prop::lifespan::Interval { - start, - end, - })) - } - Lifespan::Event { time } => { - Some(prop::lifespan::LType::Event(prop::lifespan::Event { time })) - } - Lifespan::Inherited => None, - }; - prop::Value::DocumentInput(prop::DocumentInput { - content: doc.content.clone(), - life: Some(prop::Lifespan { l_type: life }), - }) - } - }; - - proto::Prop { value: Some(value) } -} - #[cfg(test)] mod proto_test { use super::*; use crate::{ - core::DocumentInput, + core::{DocumentInput, Lifespan}, db::{ api::{mutation::DeletionOps, properties::internal::ConstPropertiesOps}, graph::graph::assert_graph_equal, }, prelude::*, serialise::{proto::GraphType, ProtoGraph}, + test_utils::{build_edge_list, build_graph_from_edge_list}, }; use chrono::{DateTime, NaiveDateTime}; + use proptest::proptest; + use raphtory_api::core::storage::arc_str::ArcStr; #[test] fn node_no_props() { @@ -1641,6 +1010,16 @@ mod proto_test { assert!(bytes.is_empty()) } + #[test] + fn encode_decode_prop_test() { + proptest!(|(edges in build_edge_list(100, 100))| { + let g = build_graph_from_edge_list(&edges); + let bytes = g.encode_to_vec(); + let g2 = Graph::decode_from_bytes(&bytes).unwrap(); + assert_graph_equal(&g, &g2); + }) + } + fn write_props_to_vec(props: &mut Vec<(&str, Prop)>) { props.push(("name", Prop::Str("Alice".into()))); props.push(("age", Prop::U32(47))); From b0b7b0a762fa61631f922ee9998486156c64f216 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 18 Sep 2024 13:32:36 +0200 Subject: [PATCH 18/18] remove unused method --- .../src/db/api/storage/graph/storage_ops/mod.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs index 2cca8a42f5..c9d202112a 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs @@ -384,19 +384,6 @@ impl GraphStorage { }) } - pub fn nodes_par_opt<'a, 'graph: 'a, G: GraphViewOps<'graph>>( - &'a self, - view: &'a G, - type_filter: Option<&'a Arc<[bool]>>, - ) -> impl IndexedParallelIterator>> + 'a { - view.node_list().into_par_iter().map(move |vid| { - let node = self.node_entry(vid); - (type_filter.map_or(true, |type_filter| type_filter[node.node_type_id()]) - && view.filter_node(node.as_ref(), view.layer_ids())) - .then_some(node) - }) - } - pub fn into_nodes_par<'graph, G: GraphViewOps<'graph>>( self, view: G,