From 290630d79203dce3be3aed2ca0ac50d9b6c86e31 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:02:05 +0100 Subject: [PATCH 01/17] fix dfview --- raphtory/src/core/utils/errors.rs | 6 + raphtory/src/io/arrow/dataframe.rs | 98 +-- raphtory/src/io/arrow/df_loaders.rs | 801 ++++++++++-------- raphtory/src/io/arrow/mod.rs | 6 +- raphtory/src/io/arrow/prop_handler.rs | 536 ++++++------ raphtory/src/io/parquet_loaders.rs | 183 ++-- raphtory/src/python/graph/disk_graph.rs | 77 +- raphtory/src/python/graph/graph.rs | 132 +-- .../src/python/graph/graph_with_deletions.rs | 40 +- .../src/python/graph/io/pandas_loaders.rs | 147 ++-- 10 files changed, 1022 insertions(+), 1004 deletions(-) diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index fc71edf04c..2e7593fc84 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -3,6 +3,8 @@ use crate::core::{utils::time::error::ParseTimeError, Prop, PropType}; use polars_arrow::legacy::error; use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; use std::path::PathBuf; +#[cfg(feature = "python")] +use pyo3::PyErr; #[cfg(feature = "search")] use tantivy; #[cfg(feature = "search")] @@ -152,6 +154,10 @@ pub enum GraphError { #[error("Immutable graph is .. immutable!")] AttemptToMutateImmutableGraph, + + #[cfg(feature = "python")] + #[error("Python error occurred: {0}")] + PythonError(#[from] PyErr), } impl GraphError { diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index 2b8173f5e1..79251d3108 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -10,18 +10,17 @@ use polars_arrow::{ use itertools::Itertools; -#[derive(Debug)] -pub(crate) struct DFView { - pub(crate) names: Vec, - pub(crate) arrays: Vec>>, +pub(crate) struct DFView { + pub names: Vec, + pub(crate) chunks: I, } -impl DFView { - pub(crate) fn get_inner_size(&self) -> usize { - if self.arrays.is_empty() || self.arrays[0].is_empty() { - return 0; - } - self.arrays[0][0].len() +impl DFView +where + I: Iterator>, +{ + pub(crate) fn new(names: Vec, chunks: I) -> Self { + Self { names, chunks } } pub fn check_cols_exist(&self, cols: &[&str]) -> Result<(), GraphError> { @@ -36,66 +35,57 @@ impl DFView { Ok(()) } + pub(crate) fn get_index(&self, name: &str) -> Result { + self.names.iter().position(|n| n == name) + .ok_or_else(|| GraphError::ColumnDoesNotExist(name.to_string())) + } +} + +pub(crate) struct DFChunk { + pub(crate) chunk: Vec>, +} + +impl DFChunk { + pub(crate) fn get_inner_size(&self) -> usize { + self.chunk.first().map(|arr| arr.len()).unwrap_or(0) + } + pub(crate) fn iter_col( &self, - name: &str, - ) -> Option> + '_> { - let idx = self.names.iter().position(|n| n == name)?; - - let _ = (&self.arrays[0])[idx] + idx: usize, + ) -> Option> + '_> { + let col_arr = (&self.chunk)[idx] .as_any() .downcast_ref::>()?; - - let iter = self.arrays.iter().flat_map(move |arr| { - let arr = &arr[idx]; - let arr = arr.as_any().downcast_ref::>().unwrap(); - arr.iter() - }); - - Some(iter) + Some(col_arr.iter()) } - pub fn utf8(&self, name: &str) -> Option> + '_> { - let idx = self.names.iter().position(|n| n == name)?; + pub fn utf8(&self, idx: usize) -> Option> + '_> { // test that it's actually a utf8 array - let _ = (&self.arrays[0])[idx] + let col_arr = (&self.chunk)[idx] .as_any() .downcast_ref::>()?; - let iter = self.arrays.iter().flat_map(move |arr| { - let arr = &arr[idx]; - let arr = arr.as_any().downcast_ref::>().unwrap(); - arr.iter() - }); - - Some(iter) + Some(col_arr.iter()) } - pub fn time_iter_col(&self, name: &str) -> Option> + '_> { - let idx = self.names.iter().position(|n| n == name)?; - - let _ = (&self.arrays[0])[idx] + pub fn time_iter_col(&self, idx: usize) -> Option> + '_> { + let col_arr = (&self.chunk)[idx] .as_any() .downcast_ref::>()?; - let iter = self.arrays.iter().flat_map(move |arr| { - let arr = &arr[idx]; - let arr = if let DataType::Timestamp(_, _) = arr.data_type() { - let array = cast::cast( - &*arr.clone(), - &DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), - CastOptions::default(), - ) + let arr = if let DataType::Timestamp(_, _) = col_arr.data_type() { + let array = cast::cast( + col_arr, + &DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), + CastOptions::default(), + ) .unwrap(); - array - } else { - arr.clone() - }; - - let arr = arr.as_any().downcast_ref::>().unwrap(); - arr.clone().into_iter() - }); + array.as_any().downcast_ref::>().unwrap().clone() + } else { + col_arr.clone() + }; - Some(iter) + Some(arr.into_iter()) } } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index b05806af28..6377e8cb33 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -4,12 +4,13 @@ use crate::{ mutation::{internal::*, AdditionOps}, view::StaticGraphViewOps, }, - io::arrow::{dataframe::DFView, prop_handler::*}, + io::arrow::{dataframe::DFChunk, prop_handler::*}, prelude::*, }; #[cfg(feature = "python")] use kdam::tqdm; use std::{collections::HashMap, iter}; +use crate::io::arrow::dataframe::DFView; #[cfg(feature = "python")] macro_rules! maybe_tqdm { @@ -35,118 +36,133 @@ pub(crate) fn load_nodes_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df: &'a DFView, - size: usize, + df_view: DFView>>, node_id: &str, time: &str, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, node_type: Option<&str>, node_type_in_df: bool, graph: &G, ) -> Result<(), GraphError> { - let (prop_iter, const_prop_iter) = get_prop_rows(df, properties, const_properties)?; - - let node_type: Box>> = match node_type { - Some(node_type) => { - if node_type_in_df { - let iter_res: Result>>, GraphError> = - if let Some(node_types) = df.utf8::(node_type) { - Ok(Box::new(node_types)) - } else if let Some(node_types) = df.utf8::(node_type) { - Ok(Box::new(node_types)) - } else { - Err(GraphError::LoadFailure( - "Unable to convert / find node_type column in dataframe.".to_string(), - )) - }; - iter_res? - } else { - Box::new(iter::repeat(Some(node_type))) + let properties = properties.unwrap_or(&[]); + let const_properties = const_properties.unwrap_or(&[]); + + let properties_indices = properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + + let node_type_index = node_type.filter(|_| node_type_in_df).map(|node_type| df_view.get_index(node_type)).transpose()?; + let node_id_index = df_view.get_index(node_id)?; + let time_index = df_view.get_index(time)?; + + for chunk in df_view.chunks { + let df = chunk?; + let size = df.get_inner_size(); + let prop_iter = combine_properties(properties, &properties_indices, &df)?; + let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + + let node_type: Box>> = match node_type { + Some(node_type) => { + match node_type_index { + Some(index) => { + let iter_res: Result>>, GraphError> = + if let Some(node_types) = df.utf8::(index) { + Ok(Box::new(node_types)) + } else if let Some(node_types) = df.utf8::(index) { + Ok(Box::new(node_types)) + } else { + Err(GraphError::LoadFailure( + "Unable to convert / find node_type column in dataframe.".to_string(), + )) + }; + iter_res? + } + None => Box::new(iter::repeat(Some(node_type))) + } } - } - None => Box::new(iter::repeat(None)), - }; - - if let (Some(node_id), Some(time)) = (df.iter_col::(node_id), df.time_iter_col(time)) { - let iter = node_id - .map(|i| i.copied()) - .zip(time) - .zip(node_type) - .map(|((node_id, time), n_t)| (node_id, time, n_t)); - load_nodes_from_num_iter( - graph, - size, - iter, - prop_iter, - const_prop_iter, - shared_const_properties, - )?; - } else if let (Some(node_id), Some(time)) = - (df.iter_col::(node_id), df.time_iter_col(time)) - { - let iter = node_id.map(i64_opt_into_u64_opt).zip(time); - let iter = iter - .zip(node_type) - .map(|((node_id, time), n_t)| (node_id, time, n_t)); - - load_nodes_from_num_iter( - graph, - size, - iter, - prop_iter, - const_prop_iter, - shared_const_properties, - )?; - } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id), df.time_iter_col(time)) { - let iter = node_id.into_iter().zip(time); - let iter = iter - .zip(node_type) - .map(|((node_id, time), n_t)| (node_id, time, n_t)); - - let iter = maybe_tqdm!( + None => Box::new(iter::repeat(None)), + }; + + if let (Some(node_id), Some(time)) = (df.iter_col::(node_id_index), df.time_iter_col(time_index)) { + let iter = node_id + .map(|i| i.copied()) + .zip(time) + .zip(node_type) + .map(|((node_id, time), n_t)| (node_id, time, n_t)); + load_nodes_from_num_iter( + graph, + size, + iter, + prop_iter, + const_prop_iter, + shared_const_properties, + )?; + } else if let (Some(node_id), Some(time)) = + (df.iter_col::(node_id_index), df.time_iter_col(time_index)) + { + let iter = node_id.map(i64_opt_into_u64_opt).zip(time); + let iter = iter + .zip(node_type) + .map(|((node_id, time), n_t)| (node_id, time, n_t)); + + load_nodes_from_num_iter( + graph, + size, + iter, + prop_iter, + const_prop_iter, + shared_const_properties, + )?; + } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) { + let iter = node_id.into_iter().zip(time); + let iter = iter + .zip(node_type) + .map(|((node_id, time), n_t)| (node_id, time, n_t)); + + let iter = maybe_tqdm!( iter.zip(prop_iter).zip(const_prop_iter), size, "Loading nodes" ); - for (((node_id, time, n_t), props), const_props) in iter { - if let (Some(node_id), Some(time), n_t) = (node_id, time, n_t) { - let actual_type = extract_out_default_type(n_t); - let v = graph.add_node(time, node_id, props, actual_type)?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props.iter())?; + for (((node_id, time, n_t), props), const_props) in iter { + if let (Some(node_id), Some(time), n_t) = (node_id, time, n_t) { + let actual_type = extract_out_default_type(n_t); + let v = graph.add_node(time, node_id, props, actual_type)?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = &shared_const_properties { + v.add_constant_properties(shared_const_props.iter())?; + } } } - } - } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id), df.time_iter_col(time)) { - let iter = node_id.into_iter().zip(time); - let iter = iter - .zip(node_type) - .map(|((node_id, time), n_t)| (node_id, time, n_t)); + } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) { + let iter = node_id.into_iter().zip(time); + let iter = iter + .zip(node_type) + .map(|((node_id, time), n_t)| (node_id, time, n_t)); - let iter = maybe_tqdm!( + let iter = maybe_tqdm!( iter.zip(prop_iter).zip(const_prop_iter), size, "Loading nodes" ); - for (((node_id, time, n_t), props), const_props) in iter { - let actual_type = extract_out_default_type(n_t); - if let (Some(node_id), Some(time), n_t) = (node_id, time, actual_type) { - let v = graph.add_node(time, node_id, props, n_t)?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props)?; + for (((node_id, time, n_t), props), const_props) in iter { + let actual_type = extract_out_default_type(n_t); + if let (Some(node_id), Some(time), n_t) = (node_id, time, actual_type) { + let v = graph.add_node(time, node_id, props, n_t)?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = shared_const_properties { + v.add_constant_properties(shared_const_props)?; + } } } - } - } else { - return Err(GraphError::LoadFailure( - "node id column must be either u64 or text, time column must be i64. Ensure these contain no NaN, Null or None values.".to_string(), - )); + } else { + return Err(GraphError::LoadFailure( + "node id column must be either u64 or text, time column must be i64. Ensure these contain no NaN, Null or None values.".to_string(), + )); + }; } Ok(()) @@ -162,194 +178,217 @@ fn extract_out_default_type(n_t: Option<&str>) -> Option<&str> { pub(crate) fn load_edges_from_df< 'a, - S: AsRef, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df: &'a DFView, + df_view: DFView>>, size: usize, src: &str, dst: &str, time: &str, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, - layer: Option, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, + layer: Option<&str>, layer_in_df: bool, graph: &G, ) -> Result<(), GraphError> { - let (prop_iter, const_prop_iter) = get_prop_rows(df, properties, const_properties)?; - let layer = lift_layer(layer, layer_in_df, df); - - if let (Some(src), Some(dst), Some(time)) = ( - df.iter_col::(src), - df.iter_col::(dst), - df.time_iter_col(time), - ) { - let triplets = src - .map(|i| i.copied()) - .zip(dst.map(|i| i.copied())) - .zip(time); - load_edges_from_num_iter( - graph, - size, - triplets, - prop_iter, - const_prop_iter, - shared_const_properties, - layer, - )?; - } else if let (Some(src), Some(dst), Some(time)) = ( - df.iter_col::(src), - df.iter_col::(dst), - df.time_iter_col(time), - ) { - let triplets = src - .map(i64_opt_into_u64_opt) - .zip(dst.map(i64_opt_into_u64_opt)) - .zip(time); - load_edges_from_num_iter( - graph, - size, - triplets, - prop_iter, - const_prop_iter, - shared_const_properties, - layer, - )?; - } else if let (Some(src), Some(dst), Some(time)) = ( - df.utf8::(src), - df.utf8::(dst), - df.time_iter_col(time), - ) { - let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); - - let iter = maybe_tqdm!( + let properties = properties.unwrap_or(&[]); + let const_properties = const_properties.unwrap_or(&[]); + + let properties_indices = properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + + let src_index = df_view.get_index(src)?; + let dst_index = df_view.get_index(dst)?; + let time_index = df_view.get_index(time)?; + let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; + + for chunk in df_view.chunks { + let df = chunk?; + let prop_iter = combine_properties(properties, &properties_indices, &df)?; + let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + + let layer = lift_layer(layer, layer_index, &df); + + if let (Some(src), Some(dst), Some(time)) = ( + df.iter_col::(src_index), + df.iter_col::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src + .map(|i| i.copied()) + .zip(dst.map(|i| i.copied())) + .zip(time); + load_edges_from_num_iter( + graph, + size, + triplets, + prop_iter, + const_prop_iter, + shared_const_properties, + layer, + )?; + } else if let (Some(src), Some(dst), Some(time)) = ( + df.iter_col::(src_index), + df.iter_col::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src + .map(i64_opt_into_u64_opt) + .zip(dst.map(i64_opt_into_u64_opt)) + .zip(time); + load_edges_from_num_iter( + graph, + size, + triplets, + prop_iter, + const_prop_iter, + shared_const_properties, + layer, + )?; + } else if let (Some(src), Some(dst), Some(time)) = ( + df.utf8::(src_index), + df.utf8::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); + + let iter = maybe_tqdm!( triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), size, "Loading edges" ); - for (((((src, dst), time), props), const_props), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((((src, dst), time), props), const_props), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else if let (Some(src), Some(dst), Some(time)) = ( - df.utf8::(src), - df.utf8::(dst), - df.time_iter_col(time), - ) { - let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); - let iter = maybe_tqdm!( + } else if let (Some(src), Some(dst), Some(time)) = ( + df.utf8::(src_index), + df.utf8::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); + let iter = maybe_tqdm!( triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), size, "Loading edges" ); - for (((((src, dst), time), props), const_props), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((((src, dst), time), props), const_props), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else { - return Err(GraphError::LoadFailure( - "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." - .to_string(), - )); + } else { + return Err(GraphError::LoadFailure( + "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." + .to_string(), + )); + }; } Ok(()) } pub(crate) fn load_edges_deletions_from_df< 'a, - S: AsRef, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( - df: &'a DFView, + df_view: DFView>>, size: usize, src: &str, dst: &str, time: &str, - layer: Option, + layer: Option<&str>, layer_in_df: bool, graph: &G, ) -> Result<(), GraphError> { - let layer = lift_layer(layer, layer_in_df, df); - - if let (Some(src), Some(dst), Some(time)) = ( - df.iter_col::(src), - df.iter_col::(dst), - df.time_iter_col(time), - ) { - let triplets = src - .map(|i| i.copied()) - .zip(dst.map(|i| i.copied())) - .zip(time); - - let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); - - for (((src, dst), time), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - graph.delete_edge(time, src, dst, layer.as_deref())?; + let src_index = df_view.get_index(src)?; + let dst_index = df_view.get_index(dst)?; + let time_index = df_view.get_index(time)?; + let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; + + for chunk in df_view.chunks { + let df = chunk?; + let layer = lift_layer(layer, layer_index, &df); + + if let (Some(src), Some(dst), Some(time)) = ( + df.iter_col::(src_index), + df.iter_col::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src + .map(|i| i.copied()) + .zip(dst.map(|i| i.copied())) + .zip(time); + + let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); + + for (((src, dst), time), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + graph.delete_edge(time, src, dst, layer.as_deref())?; + } } - } - } else if let (Some(src), Some(dst), Some(time)) = ( - df.iter_col::(src), - df.iter_col::(dst), - df.time_iter_col(time), - ) { - let triplets = src - .map(i64_opt_into_u64_opt) - .zip(dst.map(i64_opt_into_u64_opt)) - .zip(time); - - let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); - - for (((src, dst), time), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - graph.delete_edge(time, src, dst, layer.as_deref())?; + } else if let (Some(src), Some(dst), Some(time)) = ( + df.iter_col::(src_index), + df.iter_col::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src + .map(i64_opt_into_u64_opt) + .zip(dst.map(i64_opt_into_u64_opt)) + .zip(time); + + let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); + + for (((src, dst), time), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + graph.delete_edge(time, src, dst, layer.as_deref())?; + } } - } - } else if let (Some(src), Some(dst), Some(time)) = ( - df.utf8::(src), - df.utf8::(dst), - df.time_iter_col(time), - ) { - let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); - let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); - - for (((src, dst), time), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - graph.delete_edge(time, src, dst, layer.as_deref())?; + } else if let (Some(src), Some(dst), Some(time)) = ( + df.utf8::(src_index), + df.utf8::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); + let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); + + for (((src, dst), time), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + graph.delete_edge(time, src, dst, layer.as_deref())?; + } } - } - } else if let (Some(src), Some(dst), Some(time)) = ( - df.utf8::(src), - df.utf8::(dst), - df.time_iter_col(time), - ) { - let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); - let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); - - for (((src, dst), time), layer) in iter { - if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { - graph.delete_edge(time, src, dst, layer.as_deref())?; + } else if let (Some(src), Some(dst), Some(time)) = ( + df.utf8::(src_index), + df.utf8::(dst_index), + df.time_iter_col(time_index), + ) { + let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); + let iter = maybe_tqdm!(triplets.zip(layer), size, "Loading edges"); + + for (((src, dst), time), layer) in iter { + if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { + graph.delete_edge(time, src, dst, layer.as_deref())?; + } } - } - } else { - return Err(GraphError::LoadFailure( - "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." - .to_string(), - )); + } else { + return Err(GraphError::LoadFailure( + "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." + .to_string(), + )); + }; } + Ok(()) } @@ -357,190 +396,206 @@ pub(crate) fn load_node_props_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df: &'a DFView, + df_view: DFView>>, size: usize, node_id: &str, - const_properties: Option>, - shared_const_properties: Option>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, graph: &G, ) -> Result<(), GraphError> { - let (_, const_prop_iter) = get_prop_rows(df, None, const_properties)?; - - if let Some(node_id) = df.iter_col::(node_id) { - let iter = node_id.map(|i| i.copied()); - let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); - - for (node_id, const_props) in iter { - if let Some(node_id) = node_id { - let v = graph - .node(node_id) - .ok_or(GraphError::NodeIdError(node_id))?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props.iter())?; + let const_properties = const_properties.unwrap_or(&[]); + let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let node_id_index = df_view.get_index(node_id)?; + + for chunk in df_view.chunks { + let df = chunk?; + let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + + if let Some(node_id) = df.iter_col::(node_id_index) { + let iter = node_id.map(|i| i.copied()); + let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); + + for (node_id, const_props) in iter { + if let Some(node_id) = node_id { + let v = graph + .node(node_id) + .ok_or(GraphError::NodeIdError(node_id))?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = &shared_const_properties { + v.add_constant_properties(shared_const_props.iter())?; + } } } - } - } else if let Some(node_id) = df.iter_col::(node_id) { - let iter = node_id.map(i64_opt_into_u64_opt); - let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); - - for (node_id, const_props) in iter { - if let Some(node_id) = node_id { - let v = graph - .node(node_id) - .ok_or(GraphError::NodeIdError(node_id))?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props.iter())?; + } else if let Some(node_id) = df.iter_col::(node_id_index) { + let iter = node_id.map(i64_opt_into_u64_opt); + let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); + + for (node_id, const_props) in iter { + if let Some(node_id) = node_id { + let v = graph + .node(node_id) + .ok_or(GraphError::NodeIdError(node_id))?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = &shared_const_properties { + v.add_constant_properties(shared_const_props.iter())?; + } } } - } - } else if let Some(node_id) = df.utf8::(node_id) { - let iter = node_id.into_iter(); - let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); - - for (node_id, const_props) in iter { - if let Some(node_id) = node_id { - let v = graph - .node(node_id) - .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props.iter())?; + } else if let Some(node_id) = df.utf8::(node_id_index) { + let iter = node_id.into_iter(); + let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); + + for (node_id, const_props) in iter { + if let Some(node_id) = node_id { + let v = graph + .node(node_id) + .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = &shared_const_properties { + v.add_constant_properties(shared_const_props.iter())?; + } } } - } - } else if let Some(node_id) = df.utf8::(node_id) { - let iter = node_id.into_iter(); - let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); - - for (node_id, const_props) in iter { - if let Some(node_id) = node_id { - let v = graph - .node(node_id) - .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; - v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { - v.add_constant_properties(shared_const_props.iter())?; + } else if let Some(node_id) = df.utf8::(node_id_index) { + let iter = node_id.into_iter(); + let iter = maybe_tqdm!(iter.zip(const_prop_iter), size, "Loading node properties"); + + for (node_id, const_props) in iter { + if let Some(node_id) = node_id { + let v = graph + .node(node_id) + .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; + v.add_constant_properties(const_props)?; + if let Some(shared_const_props) = &shared_const_properties { + v.add_constant_properties(shared_const_props.iter())?; + } } } - } - } else { - return Err(GraphError::LoadFailure( - "node id column must be either u64 or text, time column must be i64. Ensure these contain no NaN, Null or None values.".to_string(), - )); + } else { + return Err(GraphError::LoadFailure( + "node id column must be either u64 or text, time column must be i64. Ensure these contain no NaN, Null or None values.".to_string(), + )); + }; } Ok(()) } pub(crate) fn load_edges_props_from_df< 'a, - S: AsRef, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df: &'a DFView, + df_view: DFView>>, size: usize, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, - layer: Option, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, + layer: Option<&str>, layer_in_df: bool, graph: &G, ) -> Result<(), GraphError> { - let (_, const_prop_iter) = get_prop_rows(df, None, const_properties)?; - let layer = lift_layer(layer, layer_in_df, df); - - if let (Some(src), Some(dst)) = (df.iter_col::(src), df.iter_col::(dst)) { - let triplets = src.map(|i| i.copied()).zip(dst.map(|i| i.copied())); - let iter = maybe_tqdm!( + let const_properties = const_properties.unwrap_or(&[]); + let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let src_index = df_view.get_index(src)?; + let dst_index = df_view.get_index(dst)?; + let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; + + for chunk in df_view.chunks { + let df = chunk?; + let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + + let layer = lift_layer(layer, layer_index, &df); + + if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) { + let triplets = src.map(|i| i.copied()).zip(dst.map(|i| i.copied())); + let iter = maybe_tqdm!( triplets.zip(const_prop_iter).zip(layer), size, "Loading edge properties" ); - for (((src, dst), const_props), layer) in iter { - if let (Some(src), Some(dst)) = (src, dst) { - let e = graph - .edge(src, dst) - .ok_or(GraphError::EdgeIdError { src, dst })?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((src, dst), const_props), layer) in iter { + if let (Some(src), Some(dst)) = (src, dst) { + let e = graph + .edge(src, dst) + .ok_or(GraphError::EdgeIdError { src, dst })?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else if let (Some(src), Some(dst)) = (df.iter_col::(src), df.iter_col::(dst)) { - let triplets = src - .map(i64_opt_into_u64_opt) - .zip(dst.map(i64_opt_into_u64_opt)); - let iter = maybe_tqdm!( + } else if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) { + let triplets = src + .map(i64_opt_into_u64_opt) + .zip(dst.map(i64_opt_into_u64_opt)); + let iter = maybe_tqdm!( triplets.zip(const_prop_iter).zip(layer), size, "Loading edge properties" ); - for (((src, dst), const_props), layer) in iter { - if let (Some(src), Some(dst)) = (src, dst) { - let e = graph - .edge(src, dst) - .ok_or(GraphError::EdgeIdError { src, dst })?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((src, dst), const_props), layer) in iter { + if let (Some(src), Some(dst)) = (src, dst) { + let e = graph + .edge(src, dst) + .ok_or(GraphError::EdgeIdError { src, dst })?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else if let (Some(src), Some(dst)) = (df.utf8::(src), df.utf8::(dst)) { - let triplets = src.into_iter().zip(dst.into_iter()); - let iter = maybe_tqdm!( + } else if let (Some(src), Some(dst)) = (df.utf8::(src_index), df.utf8::(dst_index)) { + let triplets = src.into_iter().zip(dst.into_iter()); + let iter = maybe_tqdm!( triplets.zip(const_prop_iter).zip(layer), size, "Loading edge properties" ); - for (((src, dst), const_props), layer) in iter { - if let (Some(src), Some(dst)) = (src, dst) { - let e = graph - .edge(src, dst) - .ok_or_else(|| GraphError::EdgeNameError { - src: src.to_owned(), - dst: dst.to_owned(), - })?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((src, dst), const_props), layer) in iter { + if let (Some(src), Some(dst)) = (src, dst) { + let e = graph + .edge(src, dst) + .ok_or_else(|| GraphError::EdgeNameError { + src: src.to_owned(), + dst: dst.to_owned(), + })?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else if let (Some(src), Some(dst)) = (df.utf8::(src), df.utf8::(dst)) { - let triplets = src.into_iter().zip(dst.into_iter()); - let iter = maybe_tqdm!( + } else if let (Some(src), Some(dst)) = (df.utf8::(src_index), df.utf8::(dst_index)) { + let triplets = src.into_iter().zip(dst.into_iter()); + let iter = maybe_tqdm!( triplets.zip(const_prop_iter).zip(layer), size, "Loading edge properties" ); - for (((src, dst), const_props), layer) in iter { - if let (Some(src), Some(dst)) = (src, dst) { - let e = graph - .edge(src, dst) - .ok_or_else(|| GraphError::EdgeNameError { - src: src.to_owned(), - dst: dst.to_owned(), - })?; - e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { - e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + for (((src, dst), const_props), layer) in iter { + if let (Some(src), Some(dst)) = (src, dst) { + let e = graph + .edge(src, dst) + .ok_or_else(|| GraphError::EdgeNameError { + src: src.to_owned(), + dst: dst.to_owned(), + })?; + e.add_constant_properties(const_props, layer.as_deref())?; + if let Some(shared_const_props) = &shared_const_properties { + e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; + } } } - } - } else { - return Err(GraphError::LoadFailure( - "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." - .to_string(), - )); + } else { + return Err(GraphError::LoadFailure( + "Source and Target columns must be either u64 or text, Time column must be i64. Ensure these contain no NaN, Null or None values." + .to_string(), + )); + }; } Ok(()) } @@ -552,9 +607,9 @@ fn i64_opt_into_u64_opt(x: Option<&i64>) -> Option { fn load_edges_from_num_iter< 'a, S: AsRef, - I: Iterator, Option), Option)>, - PI: Iterator>, - IL: Iterator>, + I: Iterator, Option), Option)>, + PI: Iterator>, + IL: Iterator>, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( graph: &G, @@ -562,7 +617,7 @@ fn load_edges_from_num_iter< edges: I, properties: PI, const_properties: PI, - shared_const_properties: Option>, + shared_const_properties: Option<&HashMap>, layer: IL, ) -> Result<(), GraphError> { let iter = maybe_tqdm!( @@ -585,8 +640,8 @@ fn load_edges_from_num_iter< fn load_nodes_from_num_iter< 'a, S: AsRef, - I: Iterator, Option, Option<&'a str>)>, - PI: Iterator>, + I: Iterator, Option, Option<&'a str>)>, + PI: Iterator>, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( graph: &G, @@ -594,7 +649,7 @@ fn load_nodes_from_num_iter< nodes: I, properties: PI, const_properties: PI, - shared_const_properties: Option>, + shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let iter = maybe_tqdm!( nodes.zip(properties).zip(const_properties), diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 5ba8b90548..9f0595b56b 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -5,7 +5,7 @@ mod prop_handler; #[cfg(test)] mod test { use crate::{ - io::arrow::{dataframe::DFView, df_loaders::*}, + io::arrow::{dataframe::DFChunk, df_loaders::*}, prelude::*, }; use polars_arrow::array::{PrimitiveArray, Utf8Array}; @@ -13,7 +13,7 @@ mod test { #[test] fn load_edges_from_pretend_df() { - let df = DFView { + let df = DFChunk { names: vec!["src", "dst", "time", "prop1", "prop2"] .iter() .map(|s| s.to_string()) @@ -103,7 +103,7 @@ mod test { #[test] fn load_nodes_from_pretend_df() { - let df = DFView { + let df = DFChunk { names: vec!["id", "name", "time", "node_type"] .iter() .map(|s| s.to_string()) diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index c3d07979c4..ad05c0b291 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -6,51 +6,45 @@ use polars_arrow::{ use crate::{ core::{utils::errors::GraphError, IntoPropList}, - io::arrow::dataframe::DFView, + io::arrow::dataframe::DFChunk, prelude::Prop, }; pub struct PropIter<'a> { - inner: Box> + 'a>, + inner: Vec> + 'a>>, } impl<'a> Iterator for PropIter<'a> { type Item = Vec<(&'a str, Prop)>; fn next(&mut self) -> Option { - self.inner.next() + self.inner.iter_mut().map(|v| { + v.next() + }).filter_map(|r| { + match r { + Some(r1) => match r1 { + Some(r2) => Some(Some(r2)), + None => None + }, + None => Some(None) + } + }).collect() } } -pub(crate) fn get_prop_rows<'a>( - df: &'a DFView, - props: Option>, - const_props: Option>, -) -> Result<(PropIter<'a>, PropIter<'a>), GraphError> { - let prop_iter = combine_properties(props, df)?; - let const_prop_iter = combine_properties(const_props, df)?; - Ok((prop_iter, const_prop_iter)) -} - -fn combine_properties<'a>( - props: Option>, - df: &'a DFView, +pub(crate) fn combine_properties<'a>( + props: &'a [&str], + indices: &'a [usize], + df: &'a DFChunk, ) -> Result, GraphError> { - let iter = props - .unwrap_or_default() - .into_iter() - .map(|name| lift_property(name, df)) - .reduce(|i1, i2| { - let i1 = i1?; - let i2 = i2?; - Ok(Box::new(i1.zip(i2).map(|(mut v1, v2)| { - v1.extend(v2); - v1 - }))) - }) - .unwrap_or_else(|| Ok(Box::new(std::iter::repeat(vec![])))); - - Ok(PropIter { inner: iter? }) + for idx in indices { + is_data_type_supported(df.chunk[*idx].data_type())?; + } + let zipped = props.iter().zip(indices.iter()); + let iter = zipped.map(|(name, idx)| { + lift_property(*idx, name, df) + }); + Ok(PropIter { inner: iter.collect() }) } fn arr_as_prop(arr: Box) -> Prop { @@ -124,7 +118,7 @@ fn arr_as_prop(arr: Box) -> Prop { } } -fn validate_data_types(dt: &DataType) -> Result<(), GraphError> { +fn is_data_type_supported(dt: &DataType) -> Result<(), GraphError> { match dt { DataType::Boolean => {} DataType::Int32 => {} @@ -137,9 +131,9 @@ fn validate_data_types(dt: &DataType) -> Result<(), GraphError> { DataType::Float64 => {} DataType::Utf8 => {} DataType::LargeUtf8 => {} - DataType::List(v) => validate_data_types(v.data_type())?, - DataType::FixedSizeList(v, _) => validate_data_types(v.data_type())?, - DataType::LargeList(v) => validate_data_types(v.data_type())?, + DataType::List(v) => is_data_type_supported(v.data_type())?, + DataType::FixedSizeList(v, _) => is_data_type_supported(v.data_type())?, + DataType::LargeList(v) => is_data_type_supported(v.data_type())?, DataType::Timestamp(_, _) => {} _ => Err(GraphError::UnsupportedDataType)?, } @@ -147,279 +141,249 @@ fn validate_data_types(dt: &DataType) -> Result<(), GraphError> { } pub(crate) fn lift_property<'a: 'b, 'b>( + idx: usize, name: &'a str, - df: &'b DFView, -) -> Result> + 'b>, GraphError> { - let idx = df - .names - .iter() - .position(|n| n == name) - .ok_or_else(|| GraphError::ColumnDoesNotExist(name.to_string()))?; - - if let Some(first_chunk) = df.arrays.get(0) { - validate_data_types(first_chunk[idx].data_type())?; - } - - let r = df.arrays.iter().flat_map(move |arr| { - let arr: &Box = &arr[idx]; - match arr.data_type() { - DataType::Boolean => { - let arr = arr.as_any().downcast_ref::().unwrap(); - iter_as_prop(name, arr.iter()) - } - DataType::Int32 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::Int64 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::UInt8 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::UInt16 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::UInt32 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::UInt64 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::Float32 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::Float64 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter().map(|i| i.copied())) - } - DataType::Utf8 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter()) - } - DataType::LargeUtf8 => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_prop(name, arr.iter()) - } - DataType::List(_) => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_arr_prop(name, arr.iter()) - } - DataType::FixedSizeList(_, _) => { - let arr = arr.as_any().downcast_ref::().unwrap(); - iter_as_arr_prop(name, arr.iter()) - } - DataType::LargeList(_) => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - iter_as_arr_prop(name, arr.iter()) - } - DataType::Timestamp(timeunit, timezone) => { - let arr = arr.as_any().downcast_ref::>().unwrap(); - match timezone { - Some(_) => match timeunit { - TimeUnit::Second => { - println!("Timestamp(Second, Some({:?})); ", timezone); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp(*v, 0) - .expect("DateTime conversion failed"), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Millisecond => { - println!("Timestamp(Millisecond, Some({:?})); ", timezone); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp_millis(*v) - .expect("DateTime conversion failed"), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Microsecond => { - println!("Timestamp(Microsecond, Some({:?})); ", timezone); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp_micros(*v) - .expect("DateTime conversion failed"), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Nanosecond => { - println!("Timestamp(Nanosecond, Some({:?})); ", timezone); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::DTime(DateTime::::from_timestamp_nanos( - *v, - )), - ) - }) - .collect::>() - })); - r - } - }, - None => match timeunit { - TimeUnit::Second => { - println!("Timestamp(Second, None); "); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp(*v, 0) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Millisecond => { - println!("Timestamp(Millisecond, None); "); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_millis(*v) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Microsecond => { - println!("Timestamp(Microsecond, None); "); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_micros(*v) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) - .collect::>() - })); - r - } - TimeUnit::Nanosecond => { - println!("Timestamp(Nanosecond, None); "); - let r: Box> + 'b> = - Box::new(arr.iter().map(move |val| { - val.into_iter() - .map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_nanos(*v).naive_utc(), - ), - ) - }) - .collect::>() - })); - r - } - }, - } + df: &'b DFChunk, +) -> Box> + 'b> { + let arr = &df.chunk[idx]; + let r = match arr.data_type() { + DataType::Boolean => { + let arr = arr.as_any().downcast_ref::().unwrap(); + iter_as_prop(name, arr.iter()) + } + DataType::Int32 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::Int64 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::UInt8 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::UInt16 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::UInt32 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::UInt64 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::Float32 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::Float64 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter().map(|i| i.copied())) + } + DataType::Utf8 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter()) + } + DataType::LargeUtf8 => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_prop(name, arr.iter()) + } + DataType::List(_) => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_arr_prop(name, arr.iter()) + } + DataType::FixedSizeList(_, _) => { + let arr = arr.as_any().downcast_ref::().unwrap(); + iter_as_arr_prop(name, arr.iter()) + } + DataType::LargeList(_) => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + iter_as_arr_prop(name, arr.iter()) + } + DataType::Timestamp(timeunit, timezone) => { + let arr = arr.as_any().downcast_ref::>().unwrap(); + match timezone { + Some(_) => match timeunit { + TimeUnit::Second => { + println!("Timestamp(Second, Some({:?})); ", timezone); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::DTime( + DateTime::::from_timestamp(*v, 0) + .expect("DateTime conversion failed"), + ), + ) + }) + })); + r + } + TimeUnit::Millisecond => { + println!("Timestamp(Millisecond, Some({:?})); ", timezone); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::DTime( + DateTime::::from_timestamp_millis(*v) + .expect("DateTime conversion failed"), + ), + ) + }) + })); + r + } + TimeUnit::Microsecond => { + println!("Timestamp(Microsecond, Some({:?})); ", timezone); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::DTime( + DateTime::::from_timestamp_micros(*v) + .expect("DateTime conversion failed"), + ), + ) + }) + })); + r + } + TimeUnit::Nanosecond => { + println!("Timestamp(Nanosecond, Some({:?})); ", timezone); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::DTime(DateTime::::from_timestamp_nanos( + *v, + )), + ) + }) + })); + r + } + }, + None => match timeunit { + TimeUnit::Second => { + println!("Timestamp(Second, None); "); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::NDTime( + DateTime::from_timestamp(*v, 0) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) + })); + r + } + TimeUnit::Millisecond => { + println!("Timestamp(Millisecond, None); "); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::NDTime( + DateTime::from_timestamp_millis(*v) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) + })); + r + } + TimeUnit::Microsecond => { + println!("Timestamp(Microsecond, None); "); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::NDTime( + DateTime::from_timestamp_micros(*v) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) + })); + r + } + TimeUnit::Nanosecond => { + println!("Timestamp(Nanosecond, None); "); + let r: Box> + 'b> = + Box::new(arr.iter().map(move |val| { + val.map(|v| { + ( + name, + Prop::NDTime( + DateTime::from_timestamp_nanos(*v).naive_utc(), + ), + ) + }) + })); + r + } + }, } - unsupported => panic!("Data type not supported: {:?}", unsupported), } - }); + unsupported => panic!("Data type not supported: {:?}", unsupported), + }; - Ok(Box::new(r)) + r } -pub(crate) fn lift_layer<'a, S: AsRef>( - layer: Option, - layer_in_df: bool, - df: &'a DFView, -) -> Box> + 'a> { +pub(crate) fn lift_layer<'a>( + layer: Option<&str>, + layer_index: Option, + df: &'a DFChunk, +) -> Box> + 'a> { if let Some(layer) = layer { - if layer_in_df { - if let Some(col) = df.utf8::(layer.as_ref()) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else if let Some(col) = df.utf8::(layer.as_ref()) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else { - Box::new(std::iter::repeat(None)) + match layer_index { + Some(index) => { + if let Some(col) = df.utf8::(index) { + Box::new(col.map(|v| v.map(|v| v.to_string()))) + } else if let Some(col) = df.utf8::(index) { + Box::new(col.map(|v| v.map(|v| v.to_string()))) + } else { + Box::new(std::iter::repeat(None)) + } } - } else { - Box::new(std::iter::repeat(Some(layer.as_ref().to_string()))) + None => Box::new(std::iter::repeat(Some(layer.to_string()))) } } else { Box::new(std::iter::repeat(None)) } } -fn iter_as_prop<'a, T: Into + 'a, I: Iterator> + 'a>( +fn iter_as_prop<'a, T: Into + 'a, I: Iterator> + 'a>( name: &'a str, is: I, -) -> Box> + 'a> { +) -> Box> + 'a> { Box::new(is.map(move |val| { - val.into_iter() - .map(|v| (name, (v).into())) - .collect::>() + val.map(|v| (name, v.into())) })) } -fn iter_as_arr_prop<'a, I: Iterator>> + 'a>( +fn iter_as_arr_prop<'a, I: Iterator>> + 'a>( name: &'a str, is: I, -) -> Box> + 'a> { +) -> Box> + 'a> { Box::new(is.map(move |val| { - val.into_iter() - .map(|v| (name, arr_as_prop(v))) - .collect::>() + val.map(|v| (name, arr_as_prop(v))) })) } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 06e36e61b8..189acb4998 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -9,10 +9,7 @@ use crate::{ }; use itertools::Itertools; use polars_arrow::{ - array::Array, datatypes::{ArrowDataType as DataType, ArrowSchema, Field}, - legacy::error, - record_batch::RecordBatch as Chunk, }; use polars_parquet::{ read, @@ -23,6 +20,7 @@ use std::{ fs, path::{Path, PathBuf}, }; +use std::fs::File; pub fn load_nodes_from_parquet< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, @@ -33,13 +31,13 @@ pub fn load_nodes_from_parquet< time: &str, node_type: Option<&str>, node_type_in_df: Option, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; - cols_to_check.extend(properties.as_ref().unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(properties.unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); if node_type_in_df.unwrap_or(true) { if let Some(ref node_type) = node_type { cols_to_check.push(node_type.as_ref()); @@ -47,22 +45,20 @@ pub fn load_nodes_from_parquet< } for path in get_parquet_file_paths(parquet_path)? { - let df = process_parquet_file_to_df(path.as_path(), cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - let size = df.get_inner_size(); + let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; + df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( - &df, - size, + df_view, id, time, - properties.clone(), - const_properties.clone(), - shared_const_properties.clone(), + properties, + const_properties, + shared_const_properties, node_type, node_type_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -76,16 +72,16 @@ pub fn load_edges_from_parquet< src: &str, dst: &str, time: &str, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { let parquet_path = parquet_path.as_ref(); let mut cols_to_check = vec![src, dst, time]; - cols_to_check.extend(properties.as_ref().unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(properties.unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); if layer_in_df.unwrap_or(false) { if let Some(ref layer) = layer { cols_to_check.push(layer.as_ref()); @@ -93,23 +89,23 @@ pub fn load_edges_from_parquet< } for path in get_parquet_file_paths(parquet_path)? { - let df = process_parquet_file_to_df(path.as_path(), cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; + let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; + df_view.check_cols_exist(&cols_to_check)?; let size = cols_to_check.len(); load_edges_from_df( - &df, + df_view, size, src, dst, time, - properties.clone(), - const_properties.clone(), - shared_const_properties.clone(), + properties, + const_properties, + shared_const_properties, layer, layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -121,25 +117,26 @@ pub fn load_node_props_from_parquet< graph: &G, parquet_path: &Path, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { - let df = process_parquet_file_to_df(path.as_path(), cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - let size = cols_to_check.len(); + let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; + df_view.check_cols_exist(&cols_to_check)?; + load_node_props_from_df( - &df, + df_view, size, id, - const_properties.clone(), - shared_const_properties.clone(), + const_properties, + shared_const_properties, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -152,8 +149,8 @@ pub fn load_edge_props_from_parquet< parquet_path: &Path, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -163,24 +160,24 @@ pub fn load_edge_props_from_parquet< cols_to_check.push(layer.as_ref()); } } - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { - let df = process_parquet_file_to_df(path.as_path(), cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - let size = cols_to_check.len(); + let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; + df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( - &df, + df_view, size, src, dst, - const_properties.clone(), - shared_const_properties.clone(), + const_properties, + shared_const_properties, layer, layer_in_df.unwrap_or(true), graph.core_graph(), ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -203,13 +200,12 @@ pub fn load_edges_deletions_from_parquet< cols_to_check.push(layer.as_ref()); } } - + let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { - let df = process_parquet_file_to_df(path.as_path(), cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - let size = cols_to_check.len(); + let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; + df_view.check_cols_exist(&cols_to_check)?; load_edges_deletions_from_df( - &df, + df_view, size, src, dst, @@ -218,36 +214,43 @@ pub fn load_edges_deletions_from_parquet< layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } - Ok(()) } pub(crate) fn process_parquet_file_to_df( parquet_file_path: &Path, - col_names: Vec<&str>, -) -> Result { - let (names, arrays) = read_parquet_file(parquet_file_path, &col_names)?; + col_names: &[&str], +) -> Result>>, GraphError> { + let (names, chunks) = read_parquet_file(parquet_file_path, col_names)?; - let names = names + let names: Vec = names .into_iter() .filter(|x| col_names.contains(&x.as_str())) .collect(); - let arrays = arrays - .map_ok(|r| r.into_iter().map(|boxed| boxed.clone()).collect_vec()) - .collect::, _>>()?; - Ok(DFView { names, arrays }) + let chunks = chunks.into_iter().map(move |result| { + result + .map(|r| DFChunk { + chunk: r.into_iter().map(|boxed| boxed.clone()).collect_vec(), + }) + .map_err(|e| GraphError::LoadFailure(format!("Failed to process Parquet file: {:?}", e))) + }); + + Ok(DFView { + names, + chunks, + }) } fn read_parquet_file( path: impl AsRef, - col_names: &Vec<&str>, + col_names: &[&str], ) -> Result< ( Vec, - impl Iterator>, error::PolarsError>>, + FileReader, ), GraphError, > { @@ -307,7 +310,7 @@ fn get_parquet_file_paths(parquet_path: &Path) -> Result, GraphErro #[cfg(test)] mod test { use super::*; - use polars_arrow::array::{PrimitiveArray, Utf8Array}; + use polars_arrow::array::{Array, PrimitiveArray, Utf8Array}; use std::path::PathBuf; #[test] @@ -315,28 +318,34 @@ mod test { let parquet_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/test/test_data.parquet"); - let col_names = vec!["src", "dst", "time", "weight", "marbles"]; + let col_names: &[&str] = &["src", "dst", "time", "weight", "marbles"]; let df = process_parquet_file_to_df(parquet_file_path.as_path(), col_names).unwrap(); - let df1 = DFView { - names: vec!["src", "dst", "time", "weight", "marbles"] - .iter() - .map(|s| s.to_string()) - .collect(), - arrays: vec![vec![ - Box::new(PrimitiveArray::::from_values(vec![1, 2, 3, 4, 5])), - Box::new(PrimitiveArray::::from_values(vec![2, 3, 4, 5, 6])), - Box::new(PrimitiveArray::::from_values(vec![1, 2, 3, 4, 5])), - Box::new(PrimitiveArray::::from_values(vec![ - 1f64, 2f64, 3f64, 4f64, 5f64, - ])), - Box::new(Utf8Array::::from_iter_values( - vec!["red", "blue", "green", "yellow", "purple"].into_iter(), - )), - ]], - }; + let expected_names: Vec = vec!["src", "dst", "time", "weight", "marbles"] + .iter() + .map(|s| s.to_string()) + .collect(); + let expected_chunks: Vec>> = vec![vec![ + Box::new(PrimitiveArray::::from_values(vec![1, 2, 3, 4, 5])), + Box::new(PrimitiveArray::::from_values(vec![2, 3, 4, 5, 6])), + Box::new(PrimitiveArray::::from_values(vec![1, 2, 3, 4, 5])), + Box::new(PrimitiveArray::::from_values(vec![ + 1f64, 2f64, 3f64, 4f64, 5f64, + ])), + Box::new(Utf8Array::::from_iter_values( + vec!["red", "blue", "green", "yellow", "purple"].into_iter(), + )), + ]]; + + let actual_names = df.names; + let chunks: Vec> = df.chunks.collect_vec(); + let chunks: Result, GraphError> = chunks.into_iter().collect(); + let chunks: Vec = chunks.unwrap(); + let actual_chunks: Vec>> = chunks.into_iter().map(|c: DFChunk| { + c.chunk + }).collect_vec(); - assert_eq!(df.names, df1.names); - assert_eq!(df.arrays, df1.arrays); + assert_eq!(actual_names, expected_names); + assert_eq!(actual_chunks, expected_chunks); } } diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index c5e6426b31..f8077d017a 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -7,7 +7,7 @@ use crate::{ core::utils::errors::GraphError, db::graph::views::deletion_graph::PersistentGraph, disk_graph::{graph_impl::ParquetLayerCols, DiskGraphError, DiskGraphStorage}, - io::arrow::dataframe::DFView, + io::arrow::dataframe::DFChunk, prelude::Graph, python::{ graph::graph::PyGraph, types::repr::StructReprBuilder, utils::errors::adapt_err_value, @@ -20,6 +20,7 @@ use pyo3::{ types::{PyDict, PyList, PyString}, }; use std::path::Path; +use crate::io::arrow::dataframe::DFView; impl From for PyErr { fn from(value: DiskGraphError) -> Self { @@ -148,18 +149,17 @@ impl PyDiskGraph { dst_col: &str, time_col: &str, ) -> Result { - let graph: Result = Python::with_gil(|py| { + let graph: Result = Python::with_gil(|py| { let cols_to_check = vec![src_col, dst_col, time_col]; let df_columns: Vec = edge_df.getattr("columns")?.extract()?; let df_columns: Vec<&str> = df_columns.iter().map(|x| x.as_str()).collect(); - let df = process_pandas_py_df(edge_df, py, df_columns)?; + let df_view = process_pandas_py_df(edge_df, py, df_columns)?; + df_view.check_cols_exist(&cols_to_check)?; + let graph = Self::from_pandas(graph_dir, df_view, src_col, dst_col, time_col)?; - df.check_cols_exist(&cols_to_check)?; - let graph = Self::from_pandas(graph_dir, df, src_col, dst_col, time_col)?; - - Ok::<_, PyErr>(graph) + Ok::<_, GraphError>(graph) }); graph.map_err(|e| { @@ -177,7 +177,9 @@ impl PyDiskGraph { } #[staticmethod] - #[pyo3(signature = (graph_dir, layer_parquet_cols, node_properties, chunk_size, t_props_chunk_size, read_chunk_size, concurrent_files, num_threads, node_type_col))] + #[pyo3( + signature = (graph_dir, layer_parquet_cols, node_properties, chunk_size, t_props_chunk_size, read_chunk_size, concurrent_files, num_threads, node_type_col) + )] fn load_from_parquets( graph_dir: &str, layer_parquet_cols: ParquetLayerColsList, @@ -231,51 +233,38 @@ impl PyDiskGraph { impl PyDiskGraph { fn from_pandas( graph_dir: &str, - df: DFView, + df_view: DFView>>, src: &str, dst: &str, time: &str, ) -> Result { - let src_col_idx = df.names.iter().position(|x| x == src).unwrap(); - let dst_col_idx = df.names.iter().position(|x| x == dst).unwrap(); - let time_col_idx = df.names.iter().position(|x| x == time).unwrap(); - - let chunk_size = df - .arrays - .first() - .map(|arr| arr.len()) - .ok_or_else(|| GraphError::LoadFailure("Empty pandas dataframe".to_owned()))?; - - let t_props_chunk_size = chunk_size; - - let names = df.names.clone(); - - let edge_lists = df - .arrays - .into_iter() - .map(|arr| { - let fields = arr - .iter() - .zip(names.iter()) - .map(|(arr, col_name)| { - Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) - }) - .collect_vec(); - let s_array = StructArray::new(DataType::Struct(fields), arr, None); - s_array - }) - .collect::>(); + let src_index = df_view.get_index(src)?; + let dst_index = df_view.get_index(dst)?; + let time_index = df_view.get_index(time)?; + let chunk_size = usize::MAX; + + let edge_lists = df_view.chunks.map_ok(|df| { + let fields = df.chunk + .iter() + .zip(df_view.names.iter()) + .map(|(arr, col_name)| { + Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) + }) + .collect_vec(); + let s_array = StructArray::new(DataType::Struct(fields), df.chunk, None); + s_array + }).collect::, GraphError>>()?; DiskGraphStorage::load_from_edge_lists( &edge_lists, chunk_size, - t_props_chunk_size, + chunk_size, graph_dir, - src_col_idx, - dst_col_idx, - time_col_idx, + src_index, + dst_index, + time_index, ) - .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) + .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } fn from_parquets( @@ -300,6 +289,6 @@ impl PyDiskGraph { num_threads, node_type_col, ) - .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) + .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } } diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 040bbfb53c..511af4df48 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -491,9 +491,11 @@ impl PyGraph { /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] + #[pyo3( + signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, + node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) + )] fn load_from_pandas( edge_df: &PyAny, edge_src: &str, @@ -513,33 +515,33 @@ impl PyGraph { node_type: Option<&str>, node_type_in_df: Option, ) -> Result { - let graph = PyGraph { - graph: Graph::new(), - }; + let graph = Graph::new(); if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { - graph.load_nodes_from_pandas( + load_nodes_from_pandas( + &graph.core_graph(), node_df, node_id, node_time, node_type, node_type_in_df, - node_properties, - node_const_properties, - node_shared_const_properties, + node_properties.as_ref().map(|props| props.as_ref()), + node_const_properties.as_ref().map(|props| props.as_ref()), + node_shared_const_properties.as_ref(), )?; } - graph.load_edges_from_pandas( + load_edges_from_pandas( + &graph.core_graph(), edge_df, edge_src, edge_dst, edge_time, - edge_properties, - edge_const_properties, - edge_shared_const_properties, + edge_properties.as_ref().map(|props| props.as_ref()), + edge_const_properties.as_ref().map(|props| props.as_ref()), + edge_shared_const_properties.as_ref(), edge_layer, layer_in_df, )?; - Ok(graph.graph) + Ok(graph) } /// Load a graph from Parquet file. @@ -566,9 +568,11 @@ impl PyGraph { /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] + #[pyo3( + signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, + node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) + )] fn load_from_parquet( edge_parquet_path: PathBuf, edge_src: &str, @@ -588,35 +592,37 @@ impl PyGraph { node_type: Option<&str>, node_type_in_df: Option, ) -> Result { - let graph = PyGraph { - graph: Graph::new(), - }; + let graph = Graph::new(); + if let (Some(node_parquet_path), Some(node_id), Some(node_time)) = (node_parquet_path, node_id, node_time) { - graph.load_nodes_from_parquet( - node_parquet_path, + load_nodes_from_parquet( + &graph, + &node_parquet_path, node_id, node_time, node_type, node_type_in_df, - node_properties, - node_const_properties, - node_shared_const_properties, + node_properties.as_ref().map(|props| props.as_ref()), + node_const_properties.as_ref().map(|props| props.as_ref()), + node_shared_const_properties.as_ref(), )?; } - graph.load_edges_from_parquet( + load_edges_from_parquet( + &graph, edge_parquet_path, edge_src, edge_dst, edge_time, - edge_properties, - edge_const_properties, - edge_shared_const_properties, + edge_properties.as_ref().map(|props| props.as_ref()), + edge_const_properties.as_ref().map(|props| props.as_ref()), + edge_shared_const_properties.as_ref(), edge_layer, layer_in_df, )?; - Ok(graph.graph) + + Ok(graph) } /// Load nodes from a Pandas DataFrame into the graph. @@ -632,7 +638,9 @@ impl PyGraph { /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + #[pyo3( + signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + )] fn load_nodes_from_pandas( &self, df: &PyAny, @@ -651,9 +659,9 @@ impl PyGraph { time, node_type, node_type_in_df, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref() ) } @@ -670,7 +678,9 @@ impl PyGraph { /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + #[pyo3( + signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + )] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, @@ -689,9 +699,9 @@ impl PyGraph { time, node_type, node_type_in_df, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref() ) } @@ -710,7 +720,9 @@ impl PyGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3( + signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + )] fn load_edges_from_pandas( &self, df: &PyAny, @@ -729,9 +741,9 @@ impl PyGraph { src, dst, time, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -752,7 +764,9 @@ impl PyGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3( + signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + )] fn load_edges_from_parquet( &self, parquet_path: PathBuf, @@ -771,9 +785,9 @@ impl PyGraph { src, dst, time, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -801,8 +815,8 @@ impl PyGraph { self.graph.core_graph(), df, id, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref() ) } @@ -828,8 +842,8 @@ impl PyGraph { &self.graph, parquet_path.as_path(), id, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), ) } @@ -846,7 +860,9 @@ impl PyGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3( + signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + )] fn load_edge_props_from_pandas( &self, df: &PyAny, @@ -862,8 +878,8 @@ impl PyGraph { df, src, dst, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -882,7 +898,9 @@ impl PyGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3( + signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + )] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, @@ -898,8 +916,8 @@ impl PyGraph { parquet_path.as_path(), src, dst, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 162cc2a4f8..47d3d550cc 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -562,9 +562,9 @@ impl PyPersistentGraph { time, node_type, node_type_in_df, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), ) } @@ -600,9 +600,9 @@ impl PyPersistentGraph { time, node_type, node_type_in_df, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), ) } @@ -640,9 +640,9 @@ impl PyPersistentGraph { src, dst, time, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -682,9 +682,9 @@ impl PyPersistentGraph { src, dst, time, - properties, - const_properties, - shared_const_properties, + properties.as_ref().map(|props| props.as_ref()), + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -770,8 +770,8 @@ impl PyPersistentGraph { &self.graph.0, df, id, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), ) } @@ -797,8 +797,8 @@ impl PyPersistentGraph { &self.graph, parquet_path.as_path(), id, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), ) } @@ -831,8 +831,8 @@ impl PyPersistentGraph { df, src, dst, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) @@ -867,8 +867,8 @@ impl PyPersistentGraph { parquet_path.as_path(), src, dst, - const_properties, - shared_const_properties, + const_properties.as_ref().map(|props| props.as_ref()), + shared_const_properties.as_ref(), layer, layer_in_df, ) diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index b211de46a0..89326d2829 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -15,34 +15,24 @@ pub fn load_nodes_from_pandas( time: &str, node_type: Option<&str>, node_type_in_df: Option, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { - let size: usize = py - .eval( - "index.__len__()", - Some([("index", df.getattr("index")?)].into_py_dict(py)), - None, - )? - .extract()?; - let mut cols_to_check = vec![id, time]; - cols_to_check.extend(properties.as_ref().unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(properties.unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); if node_type_in_df.unwrap_or(true) { if let Some(ref node_type) = node_type { cols_to_check.push(node_type.as_ref()); } } - let df = process_pandas_py_df(df, py, cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - + let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( - &df, - size, + df_view, id, time, properties, @@ -52,10 +42,10 @@ pub fn load_nodes_from_pandas( node_type_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -65,9 +55,9 @@ pub fn load_edges_from_pandas( src: &str, dst: &str, time: &str, - properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + properties: Option<&[&str]>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -81,19 +71,18 @@ pub fn load_edges_from_pandas( .extract()?; let mut cols_to_check = vec![src, dst, time]; - cols_to_check.extend(properties.as_ref().unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); + cols_to_check.extend(properties.unwrap_or(&Vec::new())); + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); if layer_in_df.unwrap_or(false) { if let Some(ref layer) = layer { cols_to_check.push(layer.as_ref()); } } - let df = process_pandas_py_df(df, py, cols_to_check.clone())?; - - df.check_cols_exist(&cols_to_check)?; + let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( - &df, + df_view, size, src, dst, @@ -105,11 +94,10 @@ pub fn load_edges_from_pandas( layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; - + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -117,8 +105,8 @@ pub fn load_node_props_from_pandas( graph: &GraphStorage, df: &PyAny, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let size: usize = py @@ -129,23 +117,21 @@ pub fn load_node_props_from_pandas( )? .extract()?; let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); - let df = process_pandas_py_df(df, py, cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( - &df, + df_view, size, id, const_properties, shared_const_properties, graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; - + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -154,8 +140,8 @@ pub fn load_edge_props_from_pandas( df: &PyAny, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + const_properties: Option<&[&str]>, + shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -173,11 +159,11 @@ pub fn load_edge_props_from_pandas( cols_to_check.push(layer.as_ref()); } } - cols_to_check.extend(const_properties.as_ref().unwrap_or(&Vec::new())); - let df = process_pandas_py_df(df, py, cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; + cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( - &df, + df_view, size, src, dst, @@ -187,11 +173,10 @@ pub fn load_edge_props_from_pandas( layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; - df.check_cols_exist(&cols_to_check)?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -220,11 +205,10 @@ pub fn load_edges_deletions_from_pandas( } } - let df = process_pandas_py_df(df, py, cols_to_check.clone())?; - df.check_cols_exist(&cols_to_check)?; - + let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; load_edges_deletions_from_df( - &df, + df_view, size, src, dst, @@ -233,19 +217,18 @@ pub fn load_edges_deletions_from_pandas( layer_in_df.unwrap_or(true), graph.core_graph(), ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; - + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } -pub(crate) fn process_pandas_py_df( - df: &PyAny, - py: Python, +pub(crate) fn process_pandas_py_df<'a>( + df: &'a PyAny, + py: Python<'a>, col_names: Vec<&str>, -) -> PyResult { +) -> PyResult> + 'a>> { is_jupyter(py); py.import("pandas")?; let module = py.import("pyarrow")?; @@ -276,25 +259,29 @@ pub(crate) fn process_pandas_py_df( } else { vec![] } - .into_iter() - .filter(|x| col_names.contains(&x.as_str())) - .collect(); - - let arrays = rb - .iter() - .map(|rb| { - (0..names.len()) - .map(|i| { - let array = rb.call_method1("column", (i,))?; - let arr = array_to_rust(array)?; - Ok::, PyErr>(arr) - }) - .collect::, PyErr>>() - }) - .collect::, PyErr>>()?; + .into_iter() + .filter(|x| col_names.contains(&x.as_str())) + .collect(); - let df = DFView { names, arrays }; - Ok(df) + let names_len = names.len(); + let chunks = rb.into_iter().map(move |rb| { + let chunk = (0..names_len) + .map(|i| { + let array = rb.call_method1("column", (i,)) + .map_err(|e| GraphError::from(e))?; + let arr = array_to_rust(array) + .map_err(|e| GraphError::from(e))?; + Ok::, GraphError>(arr) + }) + .collect::, GraphError>>()?; + + Ok(DFChunk { chunk }) + }); + + Ok(DFView { + names, + chunks, + }) } pub fn array_to_rust(obj: &PyAny) -> PyResult { From bfbeae34ce1f43757a6cea736f252110b15e61b0 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:42:11 +0100 Subject: [PATCH 02/17] fix more tests --- python/tests/test_disk_graph.py | 13 ++-- raphtory/src/io/arrow/dataframe.rs | 1 + raphtory/src/io/arrow/mod.rs | 86 ++++++++++++++----------- raphtory/src/python/graph/disk_graph.rs | 15 ++++- 4 files changed, 65 insertions(+), 50 deletions(-) diff --git a/python/tests/test_disk_graph.py b/python/tests/test_disk_graph.py index 8d4a032f04..4b004feb67 100644 --- a/python/tests/test_disk_graph.py +++ b/python/tests/test_disk_graph.py @@ -1,4 +1,4 @@ -from raphtory import PyDirection, DiskGraphStorage +from raphtory import DiskGraphStorage from raphtory import algorithms import pandas as pd import tempfile @@ -35,17 +35,13 @@ ).sort_values(["src", "dst", "time"]) -def create_graph(edges, dir): - return DiskGraphStorage.load_from_pandas(dir, edges, "src", "dst", "time") - - # in every test use with to create a temporary directory that will be deleted automatically # after the with block ends - def test_counts(): - dir = tempfile.TemporaryDirectory() - graph = create_graph(edges, dir.name).to_events() + graph_dir = tempfile.TemporaryDirectory() + graph = DiskGraphStorage.load_from_pandas(graph_dir.name, edges, "src", "dst", "time") + graph = graph.to_events() assert graph.count_nodes() == 5 assert graph.count_edges() == 20 @@ -140,6 +136,7 @@ def test_disk_graph(): ) assert len(list(actual.get_all_with_names())) == 1624 + def test_disk_graph_type_filter(): curr_dir = os.path.dirname(os.path.abspath(__file__)) rsc_dir = os.path.join(curr_dir, "..", "..", "pometry-storage-private", "resources") diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index 79251d3108..32c8cea095 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -41,6 +41,7 @@ where } } +#[derive(Clone)] pub(crate) struct DFChunk { pub(crate) chunk: Vec>, } diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 9f0595b56b..33a5920fb4 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -10,48 +10,53 @@ mod test { }; use polars_arrow::array::{PrimitiveArray, Utf8Array}; use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; + use crate::io::arrow::dataframe::DFView; #[test] fn load_edges_from_pretend_df() { - let df = DFChunk { + let df = DFView { names: vec!["src", "dst", "time", "prop1", "prop2"] .iter() .map(|s| s.to_string()) .collect(), - arrays: vec![ - vec![ - Box::new(PrimitiveArray::::from(vec![Some(1)])), - Box::new(PrimitiveArray::::from(vec![Some(2)])), - Box::new(PrimitiveArray::::from(vec![Some(1)])), - Box::new(PrimitiveArray::::from(vec![Some(1.0)])), - Box::new(Utf8Array::::from(vec![Some("a")])), - ], - vec![ - Box::new(PrimitiveArray::::from(vec![Some(2), Some(3)])), - Box::new(PrimitiveArray::::from(vec![Some(3), Some(4)])), - Box::new(PrimitiveArray::::from(vec![Some(2), Some(3)])), - Box::new(PrimitiveArray::::from(vec![Some(2.0), Some(3.0)])), - Box::new(Utf8Array::::from(vec![Some("b"), Some("c")])), - ], - ], + chunks: vec![ + Ok(DFChunk { + chunk: vec![ + Box::new(PrimitiveArray::::from(vec![Some(1)])), + Box::new(PrimitiveArray::::from(vec![Some(2)])), + Box::new(PrimitiveArray::::from(vec![Some(1)])), + Box::new(PrimitiveArray::::from(vec![Some(1.0)])), + Box::new(Utf8Array::::from(vec![Some("a")])), + ] + }), + Ok(DFChunk { + chunk: vec![ + Box::new(PrimitiveArray::::from(vec![Some(2), Some(3)])), + Box::new(PrimitiveArray::::from(vec![Some(3), Some(4)])), + Box::new(PrimitiveArray::::from(vec![Some(2), Some(3)])), + Box::new(PrimitiveArray::::from(vec![Some(2.0), Some(3.0)])), + Box::new(Utf8Array::::from(vec![Some("b"), Some("c")])), + ], + }), + ].into_iter(), }; let graph = Graph::new(); let layer: Option<&str> = None; let layer_in_df: bool = true; load_edges_from_df( - &df, + df, 5, "src", "dst", "time", - Some(vec!["prop1", "prop2"]), + Some(&*vec!["prop1", "prop2"]), None, None, layer, layer_in_df, &graph, ) - .expect("failed to load edges from pretend df"); + .expect("failed to load edges from pretend df"); let actual = graph .edges() @@ -103,41 +108,44 @@ mod test { #[test] fn load_nodes_from_pretend_df() { - let df = DFChunk { + let df = DFView { names: vec!["id", "name", "time", "node_type"] .iter() .map(|s| s.to_string()) .collect(), - arrays: vec![ - vec![ - Box::new(PrimitiveArray::::from(vec![Some(1)])), - Box::new(Utf8Array::::from(vec![Some("a")])), - Box::new(PrimitiveArray::::from(vec![Some(1)])), - Box::new(Utf8Array::::from(vec![Some("atype")])), - ], - vec![ - Box::new(PrimitiveArray::::from(vec![Some(2)])), - Box::new(Utf8Array::::from(vec![Some("b")])), - Box::new(PrimitiveArray::::from(vec![Some(2)])), - Box::new(Utf8Array::::from(vec![Some("btype")])), - ], - ], + chunks: vec![ + Ok(DFChunk { + chunk: vec![ + Box::new(PrimitiveArray::::from(vec![Some(1)])), + Box::new(Utf8Array::::from(vec![Some("a")])), + Box::new(PrimitiveArray::::from(vec![Some(1)])), + Box::new(Utf8Array::::from(vec![Some("atype")])), + ] + }), + Ok(DFChunk { + chunk: vec![ + Box::new(PrimitiveArray::::from(vec![Some(2)])), + Box::new(Utf8Array::::from(vec![Some("b")])), + Box::new(PrimitiveArray::::from(vec![Some(2)])), + Box::new(Utf8Array::::from(vec![Some("btype")])), + ], + }), + ].into_iter(), }; let graph = Graph::new(); load_nodes_from_df( - &df, - 3, + df, "id", "time", - Some(vec!["name"]), + Some(&*vec!["name"]), None, None, Some("node_type"), false, &graph, ) - .expect("failed to load nodes from pretend df"); + .expect("failed to load nodes from pretend df"); let actual = graph .nodes() diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index f8077d017a..328cba5185 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -241,9 +241,18 @@ impl PyDiskGraph { let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let chunk_size = usize::MAX; - - let edge_lists = df_view.chunks.map_ok(|df| { + + let mut chunks_iter = df_view.chunks.peekable(); + let chunk_size = if let Some(result) = chunks_iter.peek() { + match result { + Ok(df) => df.chunk.len(), + Err(e) => return Err(GraphError::LoadFailure(format!("Failed to load graph {e:?}"))), + } + } else { + return Err(GraphError::LoadFailure("No chunks available".to_string())); + }; + + let edge_lists = chunks_iter.map_ok(|df| { let fields = df.chunk .iter() .zip(df_view.names.iter()) From cde043f8639bccf5efcce7b7ad244c9aae7a4733 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:44:02 +0100 Subject: [PATCH 03/17] fmt --- raphtory/src/core/utils/errors.rs | 4 +- raphtory/src/io/arrow/dataframe.rs | 24 +- raphtory/src/io/arrow/df_loaders.rs | 215 +++++++++++------- raphtory/src/io/arrow/mod.rs | 20 +- raphtory/src/io/arrow/prop_handler.rs | 189 ++++++++------- raphtory/src/io/parquet_loaders.rs | 40 ++-- raphtory/src/python/graph/disk_graph.rs | 40 ++-- raphtory/src/python/graph/graph.rs | 6 +- .../src/python/graph/io/pandas_loaders.rs | 39 ++-- 9 files changed, 308 insertions(+), 269 deletions(-) diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index 2e7593fc84..4258f39074 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -1,10 +1,10 @@ use crate::core::{utils::time::error::ParseTimeError, Prop, PropType}; #[cfg(feature = "arrow")] use polars_arrow::legacy::error; -use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; -use std::path::PathBuf; #[cfg(feature = "python")] use pyo3::PyErr; +use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; +use std::path::PathBuf; #[cfg(feature = "search")] use tantivy; #[cfg(feature = "search")] diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index 32c8cea095..f18913aa05 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -17,7 +17,7 @@ pub(crate) struct DFView { impl DFView where - I: Iterator>, + I: Iterator>, { pub(crate) fn new(names: Vec, chunks: I) -> Self { Self { names, chunks } @@ -36,7 +36,9 @@ where } pub(crate) fn get_index(&self, name: &str) -> Result { - self.names.iter().position(|n| n == name) + self.names + .iter() + .position(|n| n == name) .ok_or_else(|| GraphError::ColumnDoesNotExist(name.to_string())) } } @@ -54,23 +56,21 @@ impl DFChunk { pub(crate) fn iter_col( &self, idx: usize, - ) -> Option> + '_> { + ) -> Option> + '_> { let col_arr = (&self.chunk)[idx] .as_any() .downcast_ref::>()?; Some(col_arr.iter()) } - pub fn utf8(&self, idx: usize) -> Option> + '_> { + pub fn utf8(&self, idx: usize) -> Option> + '_> { // test that it's actually a utf8 array - let col_arr = (&self.chunk)[idx] - .as_any() - .downcast_ref::>()?; + let col_arr = (&self.chunk)[idx].as_any().downcast_ref::>()?; Some(col_arr.iter()) } - pub fn time_iter_col(&self, idx: usize) -> Option> + '_> { + pub fn time_iter_col(&self, idx: usize) -> Option> + '_> { let col_arr = (&self.chunk)[idx] .as_any() .downcast_ref::>()?; @@ -81,8 +81,12 @@ impl DFChunk { &DataType::Timestamp(TimeUnit::Millisecond, Some("UTC".to_string())), CastOptions::default(), ) - .unwrap(); - array.as_any().downcast_ref::>().unwrap().clone() + .unwrap(); + array + .as_any() + .downcast_ref::>() + .unwrap() + .clone() } else { col_arr.clone() }; diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 6377e8cb33..e030b738ec 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -4,13 +4,15 @@ use crate::{ mutation::{internal::*, AdditionOps}, view::StaticGraphViewOps, }, - io::arrow::{dataframe::DFChunk, prop_handler::*}, + io::arrow::{ + dataframe::{DFChunk, DFView}, + prop_handler::*, + }, prelude::*, }; #[cfg(feature = "python")] use kdam::tqdm; use std::{collections::HashMap, iter}; -use crate::io::arrow::dataframe::DFView; #[cfg(feature = "python")] macro_rules! maybe_tqdm { @@ -36,7 +38,7 @@ pub(crate) fn load_nodes_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df_view: DFView>>, + df_view: DFView>>, node_id: &str, time: &str, properties: Option<&[&str]>, @@ -49,10 +51,19 @@ pub(crate) fn load_nodes_from_df< let properties = properties.unwrap_or(&[]); let const_properties = const_properties.unwrap_or(&[]); - let properties_indices = properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; - let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; - - let node_type_index = node_type.filter(|_| node_type_in_df).map(|node_type| df_view.get_index(node_type)).transpose()?; + let properties_indices = properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; + let const_properties_indices = const_properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; + + let node_type_index = node_type + .filter(|_| node_type_in_df) + .map(|node_type| df_view.get_index(node_type)) + .transpose()?; let node_id_index = df_view.get_index(node_id)?; let time_index = df_view.get_index(time)?; @@ -62,29 +73,31 @@ pub(crate) fn load_nodes_from_df< let prop_iter = combine_properties(properties, &properties_indices, &df)?; let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; - let node_type: Box>> = match node_type { - Some(node_type) => { - match node_type_index { - Some(index) => { - let iter_res: Result>>, GraphError> = - if let Some(node_types) = df.utf8::(index) { - Ok(Box::new(node_types)) - } else if let Some(node_types) = df.utf8::(index) { - Ok(Box::new(node_types)) - } else { - Err(GraphError::LoadFailure( - "Unable to convert / find node_type column in dataframe.".to_string(), - )) - }; - iter_res? - } - None => Box::new(iter::repeat(Some(node_type))) + let node_type: Box>> = match node_type { + Some(node_type) => match node_type_index { + Some(index) => { + let iter_res: Result>>, GraphError> = + if let Some(node_types) = df.utf8::(index) { + Ok(Box::new(node_types)) + } else if let Some(node_types) = df.utf8::(index) { + Ok(Box::new(node_types)) + } else { + Err(GraphError::LoadFailure( + "Unable to convert / find node_type column in dataframe." + .to_string(), + )) + }; + iter_res? } - } + None => Box::new(iter::repeat(Some(node_type))), + }, None => Box::new(iter::repeat(None)), }; - if let (Some(node_id), Some(time)) = (df.iter_col::(node_id_index), df.time_iter_col(time_index)) { + if let (Some(node_id), Some(time)) = ( + df.iter_col::(node_id_index), + df.time_iter_col(time_index), + ) { let iter = node_id .map(|i| i.copied()) .zip(time) @@ -98,9 +111,10 @@ pub(crate) fn load_nodes_from_df< const_prop_iter, shared_const_properties, )?; - } else if let (Some(node_id), Some(time)) = - (df.iter_col::(node_id_index), df.time_iter_col(time_index)) - { + } else if let (Some(node_id), Some(time)) = ( + df.iter_col::(node_id_index), + df.time_iter_col(time_index), + ) { let iter = node_id.map(i64_opt_into_u64_opt).zip(time); let iter = iter .zip(node_type) @@ -114,17 +128,19 @@ pub(crate) fn load_nodes_from_df< const_prop_iter, shared_const_properties, )?; - } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) { + } else if let (Some(node_id), Some(time)) = + (df.utf8::(node_id_index), df.time_iter_col(time_index)) + { let iter = node_id.into_iter().zip(time); let iter = iter .zip(node_type) .map(|((node_id, time), n_t)| (node_id, time, n_t)); let iter = maybe_tqdm!( - iter.zip(prop_iter).zip(const_prop_iter), - size, - "Loading nodes" - ); + iter.zip(prop_iter).zip(const_prop_iter), + size, + "Loading nodes" + ); for (((node_id, time, n_t), props), const_props) in iter { if let (Some(node_id), Some(time), n_t) = (node_id, time, n_t) { @@ -136,17 +152,19 @@ pub(crate) fn load_nodes_from_df< } } } - } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) { + } else if let (Some(node_id), Some(time)) = + (df.utf8::(node_id_index), df.time_iter_col(time_index)) + { let iter = node_id.into_iter().zip(time); let iter = iter .zip(node_type) .map(|((node_id, time), n_t)| (node_id, time, n_t)); let iter = maybe_tqdm!( - iter.zip(prop_iter).zip(const_prop_iter), - size, - "Loading nodes" - ); + iter.zip(prop_iter).zip(const_prop_iter), + size, + "Loading nodes" + ); for (((node_id, time, n_t), props), const_props) in iter { let actual_type = extract_out_default_type(n_t); @@ -180,7 +198,7 @@ pub(crate) fn load_edges_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df_view: DFView>>, + df_view: DFView>>, size: usize, src: &str, dst: &str, @@ -195,13 +213,22 @@ pub(crate) fn load_edges_from_df< let properties = properties.unwrap_or(&[]); let const_properties = const_properties.unwrap_or(&[]); - let properties_indices = properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; - let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let properties_indices = properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; + let const_properties_indices = const_properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; + let layer_index = layer + .filter(|_| layer_in_df) + .map(|layer| df_view.get_index(layer.as_ref())) + .transpose()?; for chunk in df_view.chunks { let df = chunk?; @@ -254,10 +281,10 @@ pub(crate) fn load_edges_from_df< let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); let iter = maybe_tqdm!( - triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), - size, - "Loading edges" - ); + triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), + size, + "Loading edges" + ); for (((((src, dst), time), props), const_props), layer) in iter { if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { @@ -275,10 +302,10 @@ pub(crate) fn load_edges_from_df< ) { let triplets = src.into_iter().zip(dst.into_iter()).zip(time.into_iter()); let iter = maybe_tqdm!( - triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), - size, - "Loading edges" - ); + triplets.zip(prop_iter).zip(const_prop_iter).zip(layer), + size, + "Loading edges" + ); for (((((src, dst), time), props), const_props), layer) in iter { if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { @@ -303,7 +330,7 @@ pub(crate) fn load_edges_deletions_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( - df_view: DFView>>, + df_view: DFView>>, size: usize, src: &str, dst: &str, @@ -315,7 +342,10 @@ pub(crate) fn load_edges_deletions_from_df< let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; + let layer_index = layer + .filter(|_| layer_in_df) + .map(|layer| df_view.get_index(layer.as_ref())) + .transpose()?; for chunk in df_view.chunks { let df = chunk?; @@ -396,7 +426,7 @@ pub(crate) fn load_node_props_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df_view: DFView>>, + df_view: DFView>>, size: usize, node_id: &str, const_properties: Option<&[&str]>, @@ -404,7 +434,10 @@ pub(crate) fn load_node_props_from_df< graph: &G, ) -> Result<(), GraphError> { let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let const_properties_indices = const_properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; let node_id_index = df_view.get_index(node_id)?; for chunk in df_view.chunks { @@ -484,7 +517,7 @@ pub(crate) fn load_edges_props_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( - df_view: DFView>>, + df_view: DFView>>, size: usize, src: &str, dst: &str, @@ -495,24 +528,32 @@ pub(crate) fn load_edges_props_from_df< graph: &G, ) -> Result<(), GraphError> { let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties.iter().map(|name| df_view.get_index(name)).collect::, GraphError>>()?; + let const_properties_indices = const_properties + .iter() + .map(|name| df_view.get_index(name)) + .collect::, GraphError>>()?; let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; - let layer_index = layer.filter(|_| layer_in_df).map(|layer| df_view.get_index(layer.as_ref())).transpose()?; - + let layer_index = layer + .filter(|_| layer_in_df) + .map(|layer| df_view.get_index(layer.as_ref())) + .transpose()?; + for chunk in df_view.chunks { let df = chunk?; let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; let layer = lift_layer(layer, layer_index, &df); - if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) { + if let (Some(src), Some(dst)) = + (df.iter_col::(src_index), df.iter_col::(dst_index)) + { let triplets = src.map(|i| i.copied()).zip(dst.map(|i| i.copied())); let iter = maybe_tqdm!( - triplets.zip(const_prop_iter).zip(layer), - size, - "Loading edge properties" - ); + triplets.zip(const_prop_iter).zip(layer), + size, + "Loading edge properties" + ); for (((src, dst), const_props), layer) in iter { if let (Some(src), Some(dst)) = (src, dst) { @@ -525,15 +566,17 @@ pub(crate) fn load_edges_props_from_df< } } } - } else if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) { + } else if let (Some(src), Some(dst)) = + (df.iter_col::(src_index), df.iter_col::(dst_index)) + { let triplets = src .map(i64_opt_into_u64_opt) .zip(dst.map(i64_opt_into_u64_opt)); let iter = maybe_tqdm!( - triplets.zip(const_prop_iter).zip(layer), - size, - "Loading edge properties" - ); + triplets.zip(const_prop_iter).zip(layer), + size, + "Loading edge properties" + ); for (((src, dst), const_props), layer) in iter { if let (Some(src), Some(dst)) = (src, dst) { @@ -546,13 +589,15 @@ pub(crate) fn load_edges_props_from_df< } } } - } else if let (Some(src), Some(dst)) = (df.utf8::(src_index), df.utf8::(dst_index)) { + } else if let (Some(src), Some(dst)) = + (df.utf8::(src_index), df.utf8::(dst_index)) + { let triplets = src.into_iter().zip(dst.into_iter()); let iter = maybe_tqdm!( - triplets.zip(const_prop_iter).zip(layer), - size, - "Loading edge properties" - ); + triplets.zip(const_prop_iter).zip(layer), + size, + "Loading edge properties" + ); for (((src, dst), const_props), layer) in iter { if let (Some(src), Some(dst)) = (src, dst) { @@ -568,13 +613,15 @@ pub(crate) fn load_edges_props_from_df< } } } - } else if let (Some(src), Some(dst)) = (df.utf8::(src_index), df.utf8::(dst_index)) { + } else if let (Some(src), Some(dst)) = + (df.utf8::(src_index), df.utf8::(dst_index)) + { let triplets = src.into_iter().zip(dst.into_iter()); let iter = maybe_tqdm!( - triplets.zip(const_prop_iter).zip(layer), - size, - "Loading edge properties" - ); + triplets.zip(const_prop_iter).zip(layer), + size, + "Loading edge properties" + ); for (((src, dst), const_props), layer) in iter { if let (Some(src), Some(dst)) = (src, dst) { @@ -607,9 +654,9 @@ fn i64_opt_into_u64_opt(x: Option<&i64>) -> Option { fn load_edges_from_num_iter< 'a, S: AsRef, - I: Iterator, Option), Option)>, - PI: Iterator>, - IL: Iterator>, + I: Iterator, Option), Option)>, + PI: Iterator>, + IL: Iterator>, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( graph: &G, @@ -640,8 +687,8 @@ fn load_edges_from_num_iter< fn load_nodes_from_num_iter< 'a, S: AsRef, - I: Iterator, Option, Option<&'a str>)>, - PI: Iterator>, + I: Iterator, Option, Option<&'a str>)>, + PI: Iterator>, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( graph: &G, diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 33a5920fb4..2df49e2dc4 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -5,12 +5,14 @@ mod prop_handler; #[cfg(test)] mod test { use crate::{ - io::arrow::{dataframe::DFChunk, df_loaders::*}, + io::arrow::{ + dataframe::{DFChunk, DFView}, + df_loaders::*, + }, prelude::*, }; use polars_arrow::array::{PrimitiveArray, Utf8Array}; use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; - use crate::io::arrow::dataframe::DFView; #[test] fn load_edges_from_pretend_df() { @@ -27,7 +29,7 @@ mod test { Box::new(PrimitiveArray::::from(vec![Some(1)])), Box::new(PrimitiveArray::::from(vec![Some(1.0)])), Box::new(Utf8Array::::from(vec![Some("a")])), - ] + ], }), Ok(DFChunk { chunk: vec![ @@ -38,7 +40,8 @@ mod test { Box::new(Utf8Array::::from(vec![Some("b"), Some("c")])), ], }), - ].into_iter(), + ] + .into_iter(), }; let graph = Graph::new(); let layer: Option<&str> = None; @@ -56,7 +59,7 @@ mod test { layer_in_df, &graph, ) - .expect("failed to load edges from pretend df"); + .expect("failed to load edges from pretend df"); let actual = graph .edges() @@ -120,7 +123,7 @@ mod test { Box::new(Utf8Array::::from(vec![Some("a")])), Box::new(PrimitiveArray::::from(vec![Some(1)])), Box::new(Utf8Array::::from(vec![Some("atype")])), - ] + ], }), Ok(DFChunk { chunk: vec![ @@ -130,7 +133,8 @@ mod test { Box::new(Utf8Array::::from(vec![Some("btype")])), ], }), - ].into_iter(), + ] + .into_iter(), }; let graph = Graph::new(); @@ -145,7 +149,7 @@ mod test { false, &graph, ) - .expect("failed to load nodes from pretend df"); + .expect("failed to load nodes from pretend df"); let actual = graph .nodes() diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index ad05c0b291..acbda631d4 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -11,24 +11,24 @@ use crate::{ }; pub struct PropIter<'a> { - inner: Vec> + 'a>>, + inner: Vec> + 'a>>, } impl<'a> Iterator for PropIter<'a> { type Item = Vec<(&'a str, Prop)>; fn next(&mut self) -> Option { - self.inner.iter_mut().map(|v| { - v.next() - }).filter_map(|r| { - match r { + self.inner + .iter_mut() + .map(|v| v.next()) + .filter_map(|r| match r { Some(r1) => match r1 { Some(r2) => Some(Some(r2)), - None => None + None => None, }, - None => Some(None) - } - }).collect() + None => Some(None), + }) + .collect() } } @@ -41,10 +41,10 @@ pub(crate) fn combine_properties<'a>( is_data_type_supported(df.chunk[*idx].data_type())?; } let zipped = props.iter().zip(indices.iter()); - let iter = zipped.map(|(name, idx)| { - lift_property(*idx, name, df) - }); - Ok(PropIter { inner: iter.collect() }) + let iter = zipped.map(|(name, idx)| lift_property(*idx, name, df)); + Ok(PropIter { + inner: iter.collect(), + }) } fn arr_as_prop(arr: Box) -> Prop { @@ -144,7 +144,7 @@ pub(crate) fn lift_property<'a: 'b, 'b>( idx: usize, name: &'a str, df: &'b DFChunk, -) -> Box> + 'b> { +) -> Box> + 'b> { let arr = &df.chunk[idx]; let r = match arr.data_type() { DataType::Boolean => { @@ -209,64 +209,59 @@ pub(crate) fn lift_property<'a: 'b, 'b>( Some(_) => match timeunit { TimeUnit::Second => { println!("Timestamp(Second, Some({:?})); ", timezone); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp(*v, 0) - .expect("DateTime conversion failed"), - ), - ) - }) + ( + name, + Prop::DTime( + DateTime::::from_timestamp(*v, 0) + .expect("DateTime conversion failed"), + ), + ) + }) })); r } TimeUnit::Millisecond => { println!("Timestamp(Millisecond, Some({:?})); ", timezone); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp_millis(*v) - .expect("DateTime conversion failed"), - ), - ) - }) + ( + name, + Prop::DTime( + DateTime::::from_timestamp_millis(*v) + .expect("DateTime conversion failed"), + ), + ) + }) })); r } TimeUnit::Microsecond => { println!("Timestamp(Microsecond, Some({:?})); ", timezone); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::DTime( - DateTime::::from_timestamp_micros(*v) - .expect("DateTime conversion failed"), - ), - ) - }) + ( + name, + Prop::DTime( + DateTime::::from_timestamp_micros(*v) + .expect("DateTime conversion failed"), + ), + ) + }) })); r } TimeUnit::Nanosecond => { println!("Timestamp(Nanosecond, Some({:?})); ", timezone); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::DTime(DateTime::::from_timestamp_nanos( - *v, - )), - ) - }) + (name, Prop::DTime(DateTime::::from_timestamp_nanos(*v))) + }) })); r } @@ -274,67 +269,67 @@ pub(crate) fn lift_property<'a: 'b, 'b>( None => match timeunit { TimeUnit::Second => { println!("Timestamp(Second, None); "); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp(*v, 0) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) + ( + name, + Prop::NDTime( + DateTime::from_timestamp(*v, 0) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) })); r } TimeUnit::Millisecond => { println!("Timestamp(Millisecond, None); "); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_millis(*v) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) + ( + name, + Prop::NDTime( + DateTime::from_timestamp_millis(*v) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) })); r } TimeUnit::Microsecond => { println!("Timestamp(Microsecond, None); "); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_micros(*v) - .expect("DateTime conversion failed") - .naive_utc(), - ), - ) - }) + ( + name, + Prop::NDTime( + DateTime::from_timestamp_micros(*v) + .expect("DateTime conversion failed") + .naive_utc(), + ), + ) + }) })); r } TimeUnit::Nanosecond => { println!("Timestamp(Nanosecond, None); "); - let r: Box> + 'b> = + let r: Box> + 'b> = Box::new(arr.iter().map(move |val| { val.map(|v| { - ( - name, - Prop::NDTime( - DateTime::from_timestamp_nanos(*v).naive_utc(), - ), - ) - }) + ( + name, + Prop::NDTime( + DateTime::from_timestamp_nanos(*v).naive_utc(), + ), + ) + }) })); r } @@ -351,7 +346,7 @@ pub(crate) fn lift_layer<'a>( layer: Option<&str>, layer_index: Option, df: &'a DFChunk, -) -> Box> + 'a> { +) -> Box> + 'a> { if let Some(layer) = layer { match layer_index { Some(index) => { @@ -363,27 +358,23 @@ pub(crate) fn lift_layer<'a>( Box::new(std::iter::repeat(None)) } } - None => Box::new(std::iter::repeat(Some(layer.to_string()))) + None => Box::new(std::iter::repeat(Some(layer.to_string()))), } } else { Box::new(std::iter::repeat(None)) } } -fn iter_as_prop<'a, T: Into + 'a, I: Iterator> + 'a>( +fn iter_as_prop<'a, T: Into + 'a, I: Iterator> + 'a>( name: &'a str, is: I, -) -> Box> + 'a> { - Box::new(is.map(move |val| { - val.map(|v| (name, v.into())) - })) +) -> Box> + 'a> { + Box::new(is.map(move |val| val.map(|v| (name, v.into())))) } -fn iter_as_arr_prop<'a, I: Iterator>> + 'a>( +fn iter_as_arr_prop<'a, I: Iterator>> + 'a>( name: &'a str, is: I, -) -> Box> + 'a> { - Box::new(is.map(move |val| { - val.map(|v| (name, arr_as_prop(v))) - })) +) -> Box> + 'a> { + Box::new(is.map(move |val| val.map(|v| (name, arr_as_prop(v))))) } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 189acb4998..23f92f86aa 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -8,9 +8,7 @@ use crate::{ prelude::DeletionOps, }; use itertools::Itertools; -use polars_arrow::{ - datatypes::{ArrowDataType as DataType, ArrowSchema, Field}, -}; +use polars_arrow::datatypes::{ArrowDataType as DataType, ArrowSchema, Field}; use polars_parquet::{ read, read::{read_metadata, FileMetaData, FileReader}, @@ -18,9 +16,9 @@ use polars_parquet::{ use std::{ collections::HashMap, fs, + fs::File, path::{Path, PathBuf}, }; -use std::fs::File; pub fn load_nodes_from_parquet< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, @@ -58,7 +56,7 @@ pub fn load_nodes_from_parquet< node_type_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -105,7 +103,7 @@ pub fn load_edges_from_parquet< layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -136,7 +134,7 @@ pub fn load_node_props_from_parquet< shared_const_properties, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -177,7 +175,7 @@ pub fn load_edge_props_from_parquet< layer_in_df.unwrap_or(true), graph.core_graph(), ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -214,7 +212,7 @@ pub fn load_edges_deletions_from_parquet< layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) } @@ -222,7 +220,7 @@ pub fn load_edges_deletions_from_parquet< pub(crate) fn process_parquet_file_to_df( parquet_file_path: &Path, col_names: &[&str], -) -> Result>>, GraphError> { +) -> Result>>, GraphError> { let (names, chunks) = read_parquet_file(parquet_file_path, col_names)?; let names: Vec = names @@ -235,25 +233,18 @@ pub(crate) fn process_parquet_file_to_df( .map(|r| DFChunk { chunk: r.into_iter().map(|boxed| boxed.clone()).collect_vec(), }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to process Parquet file: {:?}", e))) + .map_err(|e| { + GraphError::LoadFailure(format!("Failed to process Parquet file: {:?}", e)) + }) }); - Ok(DFView { - names, - chunks, - }) + Ok(DFView { names, chunks }) } fn read_parquet_file( path: impl AsRef, col_names: &[&str], -) -> Result< - ( - Vec, - FileReader, - ), - GraphError, -> { +) -> Result<(Vec, FileReader), GraphError> { let read_schema = |metadata: &FileMetaData| -> Result { let schema = read::infer_schema(metadata)?; let fields = schema @@ -341,9 +332,8 @@ mod test { let chunks: Vec> = df.chunks.collect_vec(); let chunks: Result, GraphError> = chunks.into_iter().collect(); let chunks: Vec = chunks.unwrap(); - let actual_chunks: Vec>> = chunks.into_iter().map(|c: DFChunk| { - c.chunk - }).collect_vec(); + let actual_chunks: Vec>> = + chunks.into_iter().map(|c: DFChunk| c.chunk).collect_vec(); assert_eq!(actual_names, expected_names); assert_eq!(actual_chunks, expected_chunks); diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index 328cba5185..9d66f6ebaf 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -7,7 +7,7 @@ use crate::{ core::utils::errors::GraphError, db::graph::views::deletion_graph::PersistentGraph, disk_graph::{graph_impl::ParquetLayerCols, DiskGraphError, DiskGraphStorage}, - io::arrow::dataframe::DFChunk, + io::arrow::dataframe::{DFChunk, DFView}, prelude::Graph, python::{ graph::graph::PyGraph, types::repr::StructReprBuilder, utils::errors::adapt_err_value, @@ -20,7 +20,6 @@ use pyo3::{ types::{PyDict, PyList, PyString}, }; use std::path::Path; -use crate::io::arrow::dataframe::DFView; impl From for PyErr { fn from(value: DiskGraphError) -> Self { @@ -233,7 +232,7 @@ impl PyDiskGraph { impl PyDiskGraph { fn from_pandas( graph_dir: &str, - df_view: DFView>>, + df_view: DFView>>, src: &str, dst: &str, time: &str, @@ -246,23 +245,30 @@ impl PyDiskGraph { let chunk_size = if let Some(result) = chunks_iter.peek() { match result { Ok(df) => df.chunk.len(), - Err(e) => return Err(GraphError::LoadFailure(format!("Failed to load graph {e:?}"))), + Err(e) => { + return Err(GraphError::LoadFailure(format!( + "Failed to load graph {e:?}" + ))) + } } } else { return Err(GraphError::LoadFailure("No chunks available".to_string())); }; - let edge_lists = chunks_iter.map_ok(|df| { - let fields = df.chunk - .iter() - .zip(df_view.names.iter()) - .map(|(arr, col_name)| { - Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) - }) - .collect_vec(); - let s_array = StructArray::new(DataType::Struct(fields), df.chunk, None); - s_array - }).collect::, GraphError>>()?; + let edge_lists = chunks_iter + .map_ok(|df| { + let fields = df + .chunk + .iter() + .zip(df_view.names.iter()) + .map(|(arr, col_name)| { + Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) + }) + .collect_vec(); + let s_array = StructArray::new(DataType::Struct(fields), df.chunk, None); + s_array + }) + .collect::, GraphError>>()?; DiskGraphStorage::load_from_edge_lists( &edge_lists, @@ -273,7 +279,7 @@ impl PyDiskGraph { dst_index, time_index, ) - .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) + .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } fn from_parquets( @@ -298,6 +304,6 @@ impl PyDiskGraph { num_threads, node_type_col, ) - .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) + .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } } diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 511af4df48..107f32bbe8 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -661,7 +661,7 @@ impl PyGraph { node_type_in_df, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref() + shared_const_properties.as_ref(), ) } @@ -701,7 +701,7 @@ impl PyGraph { node_type_in_df, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref() + shared_const_properties.as_ref(), ) } @@ -816,7 +816,7 @@ impl PyGraph { df, id, const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref() + shared_const_properties.as_ref(), ) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 89326d2829..111ea5d88d 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -42,10 +42,10 @@ pub fn load_nodes_from_pandas( node_type_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -94,10 +94,10 @@ pub fn load_edges_from_pandas( layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -128,10 +128,10 @@ pub fn load_node_props_from_pandas( shared_const_properties, graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -173,10 +173,10 @@ pub fn load_edge_props_from_pandas( layer_in_df.unwrap_or(true), graph, ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -217,10 +217,10 @@ pub fn load_edges_deletions_from_pandas( layer_in_df.unwrap_or(true), graph.core_graph(), ) - .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; + .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; Ok::<(), PyErr>(()) }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; Ok(()) } @@ -228,7 +228,7 @@ pub(crate) fn process_pandas_py_df<'a>( df: &'a PyAny, py: Python<'a>, col_names: Vec<&str>, -) -> PyResult> + 'a>> { +) -> PyResult> + 'a>> { is_jupyter(py); py.import("pandas")?; let module = py.import("pyarrow")?; @@ -259,18 +259,18 @@ pub(crate) fn process_pandas_py_df<'a>( } else { vec![] } - .into_iter() - .filter(|x| col_names.contains(&x.as_str())) - .collect(); + .into_iter() + .filter(|x| col_names.contains(&x.as_str())) + .collect(); let names_len = names.len(); let chunks = rb.into_iter().map(move |rb| { let chunk = (0..names_len) .map(|i| { - let array = rb.call_method1("column", (i,)) - .map_err(|e| GraphError::from(e))?; - let arr = array_to_rust(array) + let array = rb + .call_method1("column", (i,)) .map_err(|e| GraphError::from(e))?; + let arr = array_to_rust(array).map_err(|e| GraphError::from(e))?; Ok::, GraphError>(arr) }) .collect::, GraphError>>()?; @@ -278,10 +278,7 @@ pub(crate) fn process_pandas_py_df<'a>( Ok(DFChunk { chunk }) }); - Ok(DFView { - names, - chunks, - }) + Ok(DFView { names, chunks }) } pub fn array_to_rust(obj: &PyAny) -> PyResult { From e887aedad90fb676f848d096bf75eb65460c6976 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Wed, 14 Aug 2024 16:55:41 +0100 Subject: [PATCH 04/17] fix time, src, dst order --- python/tests/test_disk_graph.py | 2 +- python/tests/test_graph_conversions.py | 4 +- python/tests/test_load_from_pandas.py | 80 +++++++++---------- raphtory-cypher/src/lib.rs | 2 +- raphtory/src/disk_graph/mod.rs | 4 +- raphtory/src/io/arrow/df_loaders.rs | 4 +- raphtory/src/io/arrow/mod.rs | 2 +- raphtory/src/io/parquet_loaders.rs | 8 +- raphtory/src/python/graph/disk_graph.rs | 10 +-- raphtory/src/python/graph/graph.rs | 30 +++---- .../src/python/graph/graph_with_deletions.rs | 40 +++++----- .../src/python/graph/io/pandas_loaders.rs | 8 +- 12 files changed, 97 insertions(+), 97 deletions(-) diff --git a/python/tests/test_disk_graph.py b/python/tests/test_disk_graph.py index 4b004feb67..eef688abbb 100644 --- a/python/tests/test_disk_graph.py +++ b/python/tests/test_disk_graph.py @@ -40,7 +40,7 @@ def test_counts(): graph_dir = tempfile.TemporaryDirectory() - graph = DiskGraphStorage.load_from_pandas(graph_dir.name, edges, "src", "dst", "time") + graph = DiskGraphStorage.load_from_pandas(graph_dir.name, edges, "time", "src", "dst") graph = graph.to_events() assert graph.count_nodes() == 5 assert graph.count_edges() == 20 diff --git a/python/tests/test_graph_conversions.py b/python/tests/test_graph_conversions.py index 5e24b394ed..9b54bb00d3 100644 --- a/python/tests/test_graph_conversions.py +++ b/python/tests/test_graph_conversions.py @@ -24,9 +24,9 @@ def build_graph(): return Graph.load_from_pandas( edge_df=edges_df, + edge_time="timestamp", edge_src="source", edge_dst="destination", - edge_time="timestamp", edge_properties=["data_size_MB"], edge_layer="transaction_type", edge_const_properties=["is_encrypted"], @@ -49,9 +49,9 @@ def build_graph_without_datetime_type(): return Graph.load_from_pandas( edge_df=edges_df, + edge_time="timestamp", edge_src="source", edge_dst="destination", - edge_time="timestamp", edge_properties=["data_size_MB"], edge_layer="transaction_type", edge_const_properties=["is_encrypted"], diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index e966d1a21b..5599accf34 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -19,9 +19,9 @@ def test_load_from_pandas(): df = pd.DataFrame( { + "time": [1, 2, 3, 4, 5], "src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, 5, 6], - "time": [1, 2, 3, 4, 5], "weight": [1.0, 2.0, 3.0, 4.0, 5.0], "marbles": ["red", "blue", "green", "yellow", "purple"], } @@ -46,11 +46,11 @@ def assertions(g): assert g.nodes.id.collect() == expected_nodes assert edges == expected_edges - g = Graph.load_from_pandas(df, "src", "dst", "time", ["weight", "marbles"]) + g = Graph.load_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) g = PersistentGraph.load_from_pandas( - df, "src", "dst", "time", ["weight", "marbles"] + df, "time", "src", "dst", ["weight", "marbles"] ) assertions(g) @@ -77,12 +77,12 @@ def assertions(exc_info): # Use pytest.raises to expect an exception with pytest.raises(Exception) as exc_info: - g = Graph.load_from_pandas(df, "src", "dst", "time", ["weight", "marbles"]) + g = Graph.load_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(exc_info) with pytest.raises(Exception) as exc_info: g = PersistentGraph.load_from_pandas( - df, "src", "dst", "time", ["weight", "marbles"] + df, "time", "src", "dst", ["weight", "marbles"] ) assertions(exc_info) @@ -149,12 +149,12 @@ def assertions(g): g = Graph() g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) - g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"]) + g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) g = PersistentGraph() g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) - g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"]) + g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) @@ -212,9 +212,9 @@ def assertions(g): g = Graph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="id", @@ -226,9 +226,9 @@ def assertions(g): g = PersistentGraph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="id", @@ -375,9 +375,9 @@ def assertions3(g): g = Graph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", properties=["weight", "marbles"], const_properties=["marbles_const"], shared_const_properties={"type": "Edge", "tag": "test_tag"}, @@ -389,9 +389,9 @@ def assertions3(g): g = PersistentGraph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", properties=["weight", "marbles"], const_properties=["marbles_const"], shared_const_properties={"type": "Edge", "tag": "test_tag"}, @@ -416,13 +416,13 @@ def assertions4(g): g = Graph() g.load_edges_from_pandas( - edges_df, "src", "dst", "time", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer="layers" ) assertions4(g) g = PersistentGraph() g.load_edges_from_pandas( - edges_df, "src", "dst", "time", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer="layers" ) assertions4(g) @@ -439,9 +439,9 @@ def assertions5(g): g = Graph.load_from_pandas( edges_df, + "time", "src", "dst", - "time", edge_layer="test_layer", layer_in_df=False, node_df=nodes_df, @@ -454,9 +454,9 @@ def assertions5(g): g = PersistentGraph.load_from_pandas( edges_df, + "time", "src", "dst", - "time", edge_layer="test_layer", layer_in_df=False, node_df=nodes_df, @@ -491,9 +491,9 @@ def assertions6(g): g = Graph.load_from_pandas( edges_df, + "time", "src", "dst", - "time", edge_layer="layers", node_df=nodes_df, node_id="id", @@ -505,9 +505,9 @@ def assertions6(g): g = PersistentGraph.load_from_pandas( edges_df, + "time", "src", "dst", - "time", edge_layer="layers", node_df=nodes_df, node_id="id", @@ -537,9 +537,9 @@ def assertions7(g): g = Graph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="id", @@ -583,9 +583,9 @@ def assertions8(g): g = PersistentGraph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="id", @@ -631,9 +631,9 @@ def assertions_layers_in_df(g): g = Graph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", ["weight", "marbles"], const_properties=["marbles_const"], shared_const_properties={"type": "Edge", "tag": "test_tag"}, @@ -645,9 +645,9 @@ def assertions_layers_in_df(g): g = PersistentGraph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", ["weight", "marbles"], const_properties=["marbles_const"], shared_const_properties={"type": "Edge", "tag": "test_tag"}, @@ -684,9 +684,9 @@ def test_missing_columns(): ): g = Graph.load_from_pandas( edges_df, + edge_time="not_time", edge_src="not_src", edge_dst="not_dst", - edge_time="not_time", ) with pytest.raises( @@ -697,9 +697,9 @@ def test_missing_columns(): ): g = PersistentGraph.load_from_pandas( edges_df, + edge_time="not_time", edge_src="not_src", edge_dst="not_dst", - edge_time="not_time", ) with pytest.raises( @@ -710,9 +710,9 @@ def test_missing_columns(): ): g = Graph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["not_weight", "marbles"], edge_const_properties=["bleep_bloop"], node_df=nodes_df, @@ -729,9 +729,9 @@ def test_missing_columns(): ): g = PersistentGraph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["not_weight", "marbles"], edge_const_properties=["bleep_bloop"], node_df=nodes_df, @@ -748,9 +748,9 @@ def test_missing_columns(): ): g = Graph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="not_id", @@ -766,9 +766,9 @@ def test_missing_columns(): ): g = PersistentGraph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight", "marbles"], node_df=nodes_df, node_id="not_id", @@ -838,12 +838,12 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + Graph.load_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, None, 6], "time": [1, 2, 3, 4, 5]} @@ -851,11 +851,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + Graph.load_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, 5, 6], "time": [1, 2, None, 4, 5]} @@ -863,11 +863,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + Graph.load_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") def test_loading_list_as_properties(): @@ -883,9 +883,9 @@ def test_loading_list_as_properties(): g = Graph.load_from_pandas( df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["marbles"], ) @@ -929,9 +929,9 @@ def test_unparsable_props(): ): Graph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight"], ) with pytest.raises( @@ -942,9 +942,9 @@ def test_unparsable_props(): ): PersistentGraph.load_from_pandas( edges_df, + edge_time="time", edge_src="src", edge_dst="dst", - edge_time="time", edge_properties=["weight"], ) @@ -1034,9 +1034,9 @@ def edges_assertions(g): ) nodes_assertions3(g) - g = Graph.load_from_pandas(edges_df, "src", "dst", "time") + g = Graph.load_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) - g = Graph.load_from_pandas(edges_df, "src", "dst", "time") + g = Graph.load_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) @@ -1057,7 +1057,7 @@ def test_load_edge_deletions_from_pandas(): ) g = PersistentGraph() - g.load_edges_from_pandas(edges_df, "src", "dst", "time") + g.load_edges_from_pandas(edges_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] - g.load_edges_deletions_from_pandas(edge_dels_df, "src", "dst", "time") + g.load_edges_deletions_from_pandas(edge_dels_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] diff --git a/raphtory-cypher/src/lib.rs b/raphtory-cypher/src/lib.rs index 6661c86a67..23a1f5fa52 100644 --- a/raphtory-cypher/src/lib.rs +++ b/raphtory-cypher/src/lib.rs @@ -312,7 +312,7 @@ mod cypher { let edge_lists = vec![chunk]; let graph = - DiskGraphStorage::load_from_edge_lists(&edge_lists, 20, 20, graph_dir, 0, 1, 2) + DiskGraphStorage::load_from_edge_lists(&edge_lists, 20, 20, graph_dir, 2, 0, 1) .unwrap(); let df = run_cypher("match ()-[e]->() RETURN *", &graph, true) diff --git a/raphtory/src/disk_graph/mod.rs b/raphtory/src/disk_graph/mod.rs index 37d9f4c8c8..3aed7ee7cc 100644 --- a/raphtory/src/disk_graph/mod.rs +++ b/raphtory/src/disk_graph/mod.rs @@ -214,9 +214,9 @@ impl DiskGraphStorage { chunk_size, t_props_chunk_size, graph_dir.as_ref(), + 2, 0, 1, - 2, ) .expect("failed to create graph") } @@ -306,9 +306,9 @@ impl DiskGraphStorage { chunk_size: usize, t_props_chunk_size: usize, graph_dir: impl AsRef + Sync, + time_col_idx: usize, src_col_idx: usize, dst_col_idx: usize, - time_col_idx: usize, ) -> Result { let inner = TemporalGraph::from_sorted_edge_list( graph_dir, diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index e030b738ec..a6bf69a19a 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -200,9 +200,9 @@ pub(crate) fn load_edges_from_df< >( df_view: DFView>>, size: usize, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, const_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, @@ -332,9 +332,9 @@ pub(crate) fn load_edges_deletions_from_df< >( df_view: DFView>>, size: usize, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, layer_in_df: bool, graph: &G, diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 2df49e2dc4..8b64093d1e 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -49,9 +49,9 @@ mod test { load_edges_from_df( df, 5, + "time", "src", "dst", - "time", Some(&*vec!["prop1", "prop2"]), None, None, diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 23f92f86aa..7c5971d450 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -67,9 +67,9 @@ pub fn load_edges_from_parquet< >( graph: &G, parquet_path: impl AsRef, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, const_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, @@ -93,9 +93,9 @@ pub fn load_edges_from_parquet< load_edges_from_df( df_view, size, + time, src, dst, - time, properties, const_properties, shared_const_properties, @@ -186,9 +186,9 @@ pub fn load_edges_deletions_from_parquet< >( graph: &G, parquet_path: &Path, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -205,9 +205,9 @@ pub fn load_edges_deletions_from_parquet< load_edges_deletions_from_df( df_view, size, + time, src, dst, - time, layer, layer_in_df.unwrap_or(true), graph, diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index 9d66f6ebaf..afc1648661 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -140,13 +140,13 @@ impl PyDiskGraph { } #[staticmethod] - #[pyo3(signature = (graph_dir, edge_df, src_col, dst_col, time_col))] + #[pyo3(signature = (graph_dir, edge_df, time_col, src_col, dst_col))] pub fn load_from_pandas( graph_dir: &str, edge_df: &PyAny, + time_col: &str, src_col: &str, dst_col: &str, - time_col: &str, ) -> Result { let graph: Result = Python::with_gil(|py| { let cols_to_check = vec![src_col, dst_col, time_col]; @@ -156,7 +156,7 @@ impl PyDiskGraph { let df_view = process_pandas_py_df(edge_df, py, df_columns)?; df_view.check_cols_exist(&cols_to_check)?; - let graph = Self::from_pandas(graph_dir, df_view, src_col, dst_col, time_col)?; + let graph = Self::from_pandas(graph_dir, df_view, time_col, src_col, dst_col)?; Ok::<_, GraphError>(graph) }); @@ -233,9 +233,9 @@ impl PyDiskGraph { fn from_pandas( graph_dir: &str, df_view: DFView>>, + time: &str, src: &str, dst: &str, - time: &str, ) -> Result { let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; @@ -275,9 +275,9 @@ impl PyDiskGraph { chunk_size, chunk_size, graph_dir, + time_index, src_index, dst_index, - time_index, ) .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 107f32bbe8..c677b8041a 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -471,9 +471,9 @@ impl PyGraph { /// /// Args: /// edge_df (pandas.DataFrame): The DataFrame containing the edges. + /// edge_time (str): The column name for the timestamps. /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. @@ -492,15 +492,15 @@ impl PyGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3( - signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_pandas( edge_df: &PyAny, + edge_time: &str, edge_src: &str, edge_dst: &str, - edge_time: &str, edge_properties: Option>, edge_const_properties: Option>, edge_shared_const_properties: Option>, @@ -532,9 +532,9 @@ impl PyGraph { load_edges_from_pandas( &graph.core_graph(), edge_df, + edge_time, edge_src, edge_dst, - edge_time, edge_properties.as_ref().map(|props| props.as_ref()), edge_const_properties.as_ref().map(|props| props.as_ref()), edge_shared_const_properties.as_ref(), @@ -548,9 +548,9 @@ impl PyGraph { /// /// Args: /// edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. + /// edge_time (str): The column name for the timestamps. /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. @@ -569,15 +569,15 @@ impl PyGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3( - signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_parquet( edge_parquet_path: PathBuf, + edge_time: &str, edge_src: &str, edge_dst: &str, - edge_time: &str, edge_properties: Option>, edge_const_properties: Option>, edge_shared_const_properties: Option>, @@ -612,9 +612,9 @@ impl PyGraph { load_edges_from_parquet( &graph, edge_parquet_path, + edge_time, edge_src, edge_dst, - edge_time, edge_properties.as_ref().map(|props| props.as_ref()), edge_const_properties.as_ref().map(|props| props.as_ref()), edge_shared_const_properties.as_ref(), @@ -709,9 +709,9 @@ impl PyGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. /// properties (List): List of edge property column names. Defaults to None. (optional) /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) @@ -721,14 +721,14 @@ impl PyGraph { /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (df, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) )] fn load_edges_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, const_properties: Option>, shared_const_properties: Option>, @@ -738,9 +738,9 @@ impl PyGraph { load_edges_from_pandas( self.graph.core_graph(), df, + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), shared_const_properties.as_ref(), @@ -765,14 +765,14 @@ impl PyGraph { /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) )] fn load_edges_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, const_properties: Option>, shared_const_properties: Option>, @@ -782,9 +782,9 @@ impl PyGraph { load_edges_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), shared_const_properties.as_ref(), diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 47d3d550cc..dd6308512d 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -382,9 +382,9 @@ impl PyPersistentGraph { /// /// Args: /// edge_df (pandas.DataFrame): The DataFrame containing the edges. + /// edge_time (str): The column name for the timestamps. /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. @@ -402,14 +402,14 @@ impl PyPersistentGraph { /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] fn load_from_pandas( edge_df: &PyAny, + edge_time: &str, edge_src: &str, edge_dst: &str, - edge_time: &str, edge_properties: Option>, edge_const_properties: Option>, edge_shared_const_properties: Option>, @@ -429,9 +429,9 @@ impl PyPersistentGraph { }; graph.load_edges_from_pandas( edge_df, + edge_time, edge_src, edge_dst, - edge_time, edge_properties, edge_const_properties, edge_shared_const_properties, @@ -477,14 +477,14 @@ impl PyPersistentGraph { /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] fn load_from_parquet( edge_parquet_path: PathBuf, + edge_time: &str, edge_src: &str, edge_dst: &str, - edge_time: &str, edge_properties: Option>, edge_const_properties: Option>, edge_shared_const_properties: Option>, @@ -518,9 +518,9 @@ impl PyPersistentGraph { } graph.load_edges_from_parquet( edge_parquet_path, + edge_time, edge_src, edge_dst, - edge_time, edge_properties, edge_const_properties, edge_shared_const_properties, @@ -621,13 +621,13 @@ impl PyPersistentGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] fn load_edges_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, const_properties: Option>, shared_const_properties: Option>, @@ -637,9 +637,9 @@ impl PyPersistentGraph { load_edges_from_pandas( &self.graph.0, df, + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), shared_const_properties.as_ref(), @@ -663,13 +663,13 @@ impl PyPersistentGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, const_properties: Option>, shared_const_properties: Option>, @@ -679,9 +679,9 @@ impl PyPersistentGraph { load_edges_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), const_properties.as_ref().map(|props| props.as_ref()), shared_const_properties.as_ref(), @@ -694,25 +694,25 @@ impl PyPersistentGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. /// layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, time, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, time, src, dst, layer = None, layer_in_df = true))] fn load_edges_deletions_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { - load_edges_deletions_from_pandas(&self.graph.0, df, src, dst, time, layer, layer_in_df) + load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_in_df) } /// Load edges deletions from a Parquet file into the graph. @@ -727,22 +727,22 @@ impl PyPersistentGraph { /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, time, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_in_df = true))] fn load_edges_deletions_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { load_edges_deletions_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, layer, layer_in_df, ) diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 111ea5d88d..766e5e288c 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -52,9 +52,9 @@ pub fn load_nodes_from_pandas( pub fn load_edges_from_pandas( graph: &GraphStorage, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, const_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, @@ -84,9 +84,9 @@ pub fn load_edges_from_pandas( load_edges_from_df( df_view, size, + time, src, dst, - time, properties, const_properties, shared_const_properties, @@ -183,9 +183,9 @@ pub fn load_edge_props_from_pandas( pub fn load_edges_deletions_from_pandas( graph: &GraphStorage, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -210,9 +210,9 @@ pub fn load_edges_deletions_from_pandas( load_edges_deletions_from_df( df_view, size, + time, src, dst, - time, layer, layer_in_df.unwrap_or(true), graph.core_graph(), From 95c5fb73d1a3b5dde62f05c056bcd8fd96b5742a Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:57:35 +0100 Subject: [PATCH 05/17] const_prop rename --- python/tests/test_graph_conversions.py | 16 +- python/tests/test_load_from_pandas.py | 60 +++--- python/tests/test_load_from_parquet.py | 36 ++-- raphtory/src/io/arrow/df_loaders.rs | 92 ++++----- raphtory/src/io/parquet_loaders.rs | 24 +-- raphtory/src/python/graph/graph.rs | 176 +++++++++--------- .../src/python/graph/graph_with_deletions.rs | 168 ++++++++--------- .../src/python/graph/io/pandas_loaders.rs | 24 +-- 8 files changed, 304 insertions(+), 292 deletions(-) diff --git a/python/tests/test_graph_conversions.py b/python/tests/test_graph_conversions.py index 9b54bb00d3..fc005ed6a9 100644 --- a/python/tests/test_graph_conversions.py +++ b/python/tests/test_graph_conversions.py @@ -29,14 +29,14 @@ def build_graph(): edge_dst="destination", edge_properties=["data_size_MB"], edge_layer="transaction_type", - edge_const_properties=["is_encrypted"], - edge_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + edge_constant_properties=["is_encrypted"], + edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, node_df=nodes_df, node_id="server_id", node_time="timestamp", node_properties=["OS_version", "primary_function", "uptime_days"], - node_const_properties=["server_name", "hardware_type"], - node_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + node_constant_properties=["server_name", "hardware_type"], + node_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) @@ -54,14 +54,14 @@ def build_graph_without_datetime_type(): edge_dst="destination", edge_properties=["data_size_MB"], edge_layer="transaction_type", - edge_const_properties=["is_encrypted"], - edge_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + edge_constant_properties=["is_encrypted"], + edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, node_df=nodes_df, node_id="server_id", node_time="timestamp", node_properties=["OS_version", "primary_function", "uptime_days"], - node_const_properties=["server_name", "hardware_type"], - node_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + node_constant_properties=["server_name", "hardware_type"], + node_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index 5599accf34..3d572eb4cd 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -300,7 +300,7 @@ def assertions1(g): "time", "node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assertions1(g) @@ -311,7 +311,7 @@ def assertions1(g): "time", "node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assertions1(g) @@ -332,7 +332,7 @@ def assertions2(g): "time", "node_type", properties=["name"], - const_properties=["type"], + constant_properties=["type"], ) assertions2(g) @@ -343,7 +343,7 @@ def assertions2(g): "time", "node_type", properties=["name"], - const_properties=["type"], + constant_properties=["type"], ) assertions2(g) @@ -379,8 +379,8 @@ def assertions3(g): "src", "dst", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", layer_in_df=False, ) @@ -393,8 +393,8 @@ def assertions3(g): "src", "dst", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", layer_in_df=False, ) @@ -448,7 +448,7 @@ def assertions5(g): node_id="id", node_time="time", node_properties=["name"], - node_shared_const_properties={"type": "Person"}, + node_shared_constant_properties={"type": "Person"}, ) assertions5(g) @@ -463,7 +463,7 @@ def assertions5(g): node_id="id", node_time="time", node_properties=["name"], - node_shared_const_properties={"type": "Person"}, + node_shared_constant_properties={"type": "Person"}, ) assertions5(g) @@ -499,7 +499,7 @@ def assertions6(g): node_id="id", node_time="time", node_properties=["name"], - node_const_properties=["type"], + node_constant_properties=["type"], ) assertions6(g) @@ -513,7 +513,7 @@ def assertions6(g): node_id="id", node_time="time", node_properties=["name"], - node_const_properties=["type"], + node_constant_properties=["type"], ) assertions6(g) @@ -550,8 +550,8 @@ def assertions7(g): g.load_node_props_from_pandas( nodes_df, "id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assertions7(g) @@ -575,8 +575,8 @@ def assertions8(g): edges_df, "src", "dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, layer="layers", ) assertions8(g) @@ -596,8 +596,8 @@ def assertions8(g): g.load_node_props_from_pandas( nodes_df, "id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assertions7(g) @@ -605,8 +605,8 @@ def assertions8(g): edges_df, "src", "dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, layer="layers", ) assertions8(g) @@ -635,8 +635,8 @@ def assertions_layers_in_df(g): "src", "dst", ["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="layers", layer_in_df=True, ) @@ -649,8 +649,8 @@ def assertions_layers_in_df(g): "src", "dst", ["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="layers", layer_in_df=True, ) @@ -714,7 +714,7 @@ def test_missing_columns(): edge_src="src", edge_dst="dst", edge_properties=["not_weight", "marbles"], - edge_const_properties=["bleep_bloop"], + edge_constant_properties=["bleep_bloop"], node_df=nodes_df, node_id="id", node_time="time", @@ -733,7 +733,7 @@ def test_missing_columns(): edge_src="src", edge_dst="dst", edge_properties=["not_weight", "marbles"], - edge_const_properties=["bleep_bloop"], + edge_constant_properties=["bleep_bloop"], node_df=nodes_df, node_id="id", node_time="time", @@ -787,7 +787,7 @@ def test_missing_columns(): edges_df, src="sauce", dst="dist", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -801,7 +801,7 @@ def test_missing_columns(): edges_df, src="sauce", dst="dist", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -814,7 +814,7 @@ def test_missing_columns(): g.load_node_props_from_pandas( nodes_df, id="sauce", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -827,7 +827,7 @@ def test_missing_columns(): g.load_node_props_from_pandas( nodes_df, id="sauce", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index 6785df097a..f10716f6fc 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -238,8 +238,8 @@ def test_load_from_parquet_graphs(parquet_files): g.load_node_props_from_parquet( parquet_path=nodes_parquet_file_path, id="id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_property_tag(g) assert_expected_node_property_type(g) @@ -248,8 +248,8 @@ def test_load_from_parquet_graphs(parquet_files): parquet_path=edges_parquet_file_path, src="src", dst="dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, layer="layers", ) assert_expected_edge_properties(g) @@ -262,7 +262,7 @@ def test_load_from_parquet_graphs(parquet_files): time="time", node_type="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_types(g) assert_expected_node_property_tag(g) @@ -274,8 +274,8 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", layer_in_df=False, ) @@ -293,7 +293,7 @@ def test_load_from_parquet_graphs(parquet_files): node_id="id", node_time="time", node_properties=["name"], - node_shared_const_properties={"dept": "Sales"}, + node_shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) @@ -308,7 +308,7 @@ def test_load_from_parquet_graphs(parquet_files): node_id="id", node_time="time", node_properties=["name"], - node_const_properties=["type"], + node_constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) @@ -355,8 +355,8 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g.load_node_props_from_parquet( parquet_path=nodes_parquet_file_path, id="id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_property_tag(g) assert_expected_node_property_type(g) @@ -365,8 +365,8 @@ def test_load_from_parquet_persistent_graphs(parquet_files): parquet_path=edges_parquet_file_path, src="src", dst="dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, layer="layers", ) assert_expected_edge_properties(g) @@ -379,7 +379,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): time="time", node_type="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_types(g) assert_expected_node_property_tag(g) @@ -391,8 +391,8 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", layer_in_df=False, ) @@ -410,7 +410,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): node_id="id", node_time="time", node_properties=["name"], - node_shared_const_properties={"dept": "Sales"}, + node_shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) @@ -425,7 +425,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): node_id="id", node_time="time", node_properties=["name"], - node_const_properties=["type"], + node_constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index a6bf69a19a..9d0923f78c 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -42,20 +42,20 @@ pub(crate) fn load_nodes_from_df< node_id: &str, time: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, node_type: Option<&str>, node_type_in_df: bool, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); - let const_properties = const_properties.unwrap_or(&[]); + let constant_properties = constant_properties.unwrap_or(&[]); let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let const_properties_indices = const_properties + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; @@ -71,7 +71,8 @@ pub(crate) fn load_nodes_from_df< let df = chunk?; let size = df.get_inner_size(); let prop_iter = combine_properties(properties, &properties_indices, &df)?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; let node_type: Box>> = match node_type { Some(node_type) => match node_type_index { @@ -109,7 +110,7 @@ pub(crate) fn load_nodes_from_df< iter, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, )?; } else if let (Some(node_id), Some(time)) = ( df.iter_col::(node_id_index), @@ -126,7 +127,7 @@ pub(crate) fn load_nodes_from_df< iter, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, )?; } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) @@ -147,7 +148,7 @@ pub(crate) fn load_nodes_from_df< let actual_type = extract_out_default_type(n_t); let v = graph.add_node(time, node_id, props, actual_type)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -171,7 +172,7 @@ pub(crate) fn load_nodes_from_df< if let (Some(node_id), Some(time), n_t) = (node_id, time, actual_type) { let v = graph.add_node(time, node_id, props, n_t)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = shared_const_properties { + if let Some(shared_const_props) = shared_constant_properties { v.add_constant_properties(shared_const_props)?; } } @@ -204,20 +205,20 @@ pub(crate) fn load_edges_from_df< src: &str, dst: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: bool, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); - let const_properties = const_properties.unwrap_or(&[]); + let constant_properties = constant_properties.unwrap_or(&[]); let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let const_properties_indices = const_properties + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; @@ -233,7 +234,8 @@ pub(crate) fn load_edges_from_df< for chunk in df_view.chunks { let df = chunk?; let prop_iter = combine_properties(properties, &properties_indices, &df)?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; let layer = lift_layer(layer, layer_index, &df); @@ -252,7 +254,7 @@ pub(crate) fn load_edges_from_df< triplets, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, layer, )?; } else if let (Some(src), Some(dst), Some(time)) = ( @@ -270,7 +272,7 @@ pub(crate) fn load_edges_from_df< triplets, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, layer, )?; } else if let (Some(src), Some(dst), Some(time)) = ( @@ -290,7 +292,7 @@ pub(crate) fn load_edges_from_df< if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -311,7 +313,7 @@ pub(crate) fn load_edges_from_df< if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -429,12 +431,12 @@ pub(crate) fn load_node_props_from_df< df_view: DFView>>, size: usize, node_id: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, graph: &G, ) -> Result<(), GraphError> { - let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties + let constant_properties = constant_properties.unwrap_or(&[]); + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; @@ -442,7 +444,8 @@ pub(crate) fn load_node_props_from_df< for chunk in df_view.chunks { let df = chunk?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(|i| i.copied()); @@ -454,7 +457,7 @@ pub(crate) fn load_node_props_from_df< .node(node_id) .ok_or(GraphError::NodeIdError(node_id))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -469,7 +472,7 @@ pub(crate) fn load_node_props_from_df< .node(node_id) .ok_or(GraphError::NodeIdError(node_id))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -484,7 +487,7 @@ pub(crate) fn load_node_props_from_df< .node(node_id) .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -499,7 +502,7 @@ pub(crate) fn load_node_props_from_df< .node(node_id) .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -521,14 +524,14 @@ pub(crate) fn load_edges_props_from_df< size: usize, src: &str, dst: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: bool, graph: &G, ) -> Result<(), GraphError> { - let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties + let constant_properties = constant_properties.unwrap_or(&[]); + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; @@ -541,7 +544,8 @@ pub(crate) fn load_edges_props_from_df< for chunk in df_view.chunks { let df = chunk?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; let layer = lift_layer(layer, layer_index, &df); @@ -561,7 +565,7 @@ pub(crate) fn load_edges_props_from_df< .edge(src, dst) .ok_or(GraphError::EdgeIdError { src, dst })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -584,7 +588,7 @@ pub(crate) fn load_edges_props_from_df< .edge(src, dst) .ok_or(GraphError::EdgeIdError { src, dst })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -608,7 +612,7 @@ pub(crate) fn load_edges_props_from_df< dst: dst.to_owned(), })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -632,7 +636,7 @@ pub(crate) fn load_edges_props_from_df< dst: dst.to_owned(), })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -663,12 +667,12 @@ fn load_edges_from_num_iter< size: usize, edges: I, properties: PI, - const_properties: PI, - shared_const_properties: Option<&HashMap>, + constant_properties: PI, + shared_constant_properties: Option<&HashMap>, layer: IL, ) -> Result<(), GraphError> { let iter = maybe_tqdm!( - edges.zip(properties).zip(const_properties).zip(layer), + edges.zip(properties).zip(constant_properties).zip(layer), size, "Loading edges" ); @@ -676,7 +680,7 @@ fn load_edges_from_num_iter< if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, edge_props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -695,11 +699,11 @@ fn load_nodes_from_num_iter< size: usize, nodes: I, properties: PI, - const_properties: PI, - shared_const_properties: Option<&HashMap>, + constant_properties: PI, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let iter = maybe_tqdm!( - nodes.zip(properties).zip(const_properties), + nodes.zip(properties).zip(constant_properties), size, "Loading nodes" ); @@ -711,7 +715,7 @@ fn load_nodes_from_num_iter< let v = graph.add_node(t, v, props, actual_node_type)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 7c5971d450..82817abfbe 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -30,12 +30,12 @@ pub fn load_nodes_from_parquet< node_type: Option<&str>, node_type_in_df: Option, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); if node_type_in_df.unwrap_or(true) { if let Some(ref node_type) = node_type { cols_to_check.push(node_type.as_ref()); @@ -50,7 +50,7 @@ pub fn load_nodes_from_parquet< id, time, properties, - const_properties, + constant_properties, shared_const_properties, node_type, node_type_in_df.unwrap_or(true), @@ -71,7 +71,7 @@ pub fn load_edges_from_parquet< src: &str, dst: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, @@ -79,7 +79,7 @@ pub fn load_edges_from_parquet< let parquet_path = parquet_path.as_ref(); let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); if layer_in_df.unwrap_or(false) { if let Some(ref layer) = layer { cols_to_check.push(layer.as_ref()); @@ -97,7 +97,7 @@ pub fn load_edges_from_parquet< src, dst, properties, - const_properties, + constant_properties, shared_const_properties, layer, layer_in_df.unwrap_or(true), @@ -115,11 +115,11 @@ pub fn load_node_props_from_parquet< graph: &G, parquet_path: &Path, id: &str, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { @@ -130,7 +130,7 @@ pub fn load_node_props_from_parquet< df_view, size, id, - const_properties, + constant_properties, shared_const_properties, graph, ) @@ -147,7 +147,7 @@ pub fn load_edge_props_from_parquet< parquet_path: &Path, src: &str, dst: &str, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, @@ -158,7 +158,7 @@ pub fn load_edge_props_from_parquet< cols_to_check.push(layer.as_ref()); } } - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { @@ -169,7 +169,7 @@ pub fn load_edge_props_from_parquet< size, src, dst, - const_properties, + constant_properties, shared_const_properties, layer, layer_in_df.unwrap_or(true), diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index c677b8041a..67e0f728bf 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -475,16 +475,16 @@ impl PyGraph { /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. + /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. + /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. /// edge_layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. + /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. + /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// @@ -492,9 +492,9 @@ impl PyGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3( - signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_pandas( edge_df: &PyAny, @@ -502,16 +502,16 @@ impl PyGraph { edge_src: &str, edge_dst: &str, edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, + edge_constant_properties: Option>, + edge_shared_constant_properties: Option>, edge_layer: Option<&str>, layer_in_df: Option, node_df: Option<&PyAny>, node_id: Option<&str>, node_time: Option<&str>, node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, + node_constant_properties: Option>, + node_shared_constant_properties: Option>, node_type: Option<&str>, node_type_in_df: Option, ) -> Result { @@ -525,8 +525,10 @@ impl PyGraph { node_type, node_type_in_df, node_properties.as_ref().map(|props| props.as_ref()), - node_const_properties.as_ref().map(|props| props.as_ref()), - node_shared_const_properties.as_ref(), + node_constant_properties + .as_ref() + .map(|props| props.as_ref()), + node_shared_constant_properties.as_ref(), )?; } load_edges_from_pandas( @@ -536,8 +538,10 @@ impl PyGraph { edge_src, edge_dst, edge_properties.as_ref().map(|props| props.as_ref()), - edge_const_properties.as_ref().map(|props| props.as_ref()), - edge_shared_const_properties.as_ref(), + edge_constant_properties + .as_ref() + .map(|props| props.as_ref()), + edge_shared_constant_properties.as_ref(), edge_layer, layer_in_df, )?; @@ -552,16 +556,16 @@ impl PyGraph { /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. + /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. + /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. /// edge_layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. + /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. + /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// @@ -569,9 +573,9 @@ impl PyGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3( - signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_parquet( edge_parquet_path: PathBuf, @@ -579,16 +583,16 @@ impl PyGraph { edge_src: &str, edge_dst: &str, edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, + edge_constant_properties: Option>, + edge_shared_constant_properties: Option>, edge_layer: Option<&str>, layer_in_df: Option, node_parquet_path: Option, node_id: Option<&str>, node_time: Option<&str>, node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, + node_constant_properties: Option>, + node_shared_constant_properties: Option>, node_type: Option<&str>, node_type_in_df: Option, ) -> Result { @@ -605,8 +609,10 @@ impl PyGraph { node_type, node_type_in_df, node_properties.as_ref().map(|props| props.as_ref()), - node_const_properties.as_ref().map(|props| props.as_ref()), - node_shared_const_properties.as_ref(), + node_constant_properties + .as_ref() + .map(|props| props.as_ref()), + node_shared_constant_properties.as_ref(), )?; } load_edges_from_parquet( @@ -616,8 +622,10 @@ impl PyGraph { edge_src, edge_dst, edge_properties.as_ref().map(|props| props.as_ref()), - edge_const_properties.as_ref().map(|props| props.as_ref()), - edge_shared_const_properties.as_ref(), + edge_constant_properties + .as_ref() + .map(|props| props.as_ref()), + edge_shared_constant_properties.as_ref(), edge_layer, layer_in_df, )?; @@ -634,12 +642,12 @@ impl PyGraph { /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_pandas( &self, @@ -649,8 +657,8 @@ impl PyGraph { node_type: Option<&str>, node_type_in_df: Option, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_pandas( self.graph.core_graph(), @@ -660,8 +668,8 @@ impl PyGraph { node_type, node_type_in_df, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -674,12 +682,12 @@ impl PyGraph { /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_parquet( &self, @@ -689,8 +697,8 @@ impl PyGraph { node_type: Option<&str>, node_type_in_df: Option, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_parquet( &self.graph, @@ -700,8 +708,8 @@ impl PyGraph { node_type, node_type_in_df, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -713,15 +721,15 @@ impl PyGraph { /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dateframe or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) )] fn load_edges_from_pandas( &self, @@ -730,8 +738,8 @@ impl PyGraph { src: &str, dst: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -742,8 +750,8 @@ impl PyGraph { src, dst, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -757,15 +765,15 @@ impl PyGraph { /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) )] fn load_edges_from_parquet( &self, @@ -774,8 +782,8 @@ impl PyGraph { src: &str, dst: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -786,8 +794,8 @@ impl PyGraph { src, dst, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -798,25 +806,25 @@ impl PyGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (df, id, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_pandas( self.graph.core_graph(), df, id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -825,25 +833,25 @@ impl PyGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (parquet_path, id, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -853,23 +861,23 @@ impl PyGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): Layer name. Defaults to None. (optional) /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) )] fn load_edge_props_from_pandas( &self, df: &PyAny, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -878,8 +886,8 @@ impl PyGraph { df, src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -891,23 +899,23 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): Layer name. Defaults to None. (optional) /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) )] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -916,8 +924,8 @@ impl PyGraph { parquet_path.as_path(), src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index dd6308512d..da5451c27d 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -386,41 +386,41 @@ impl PyPersistentGraph { /// edge_src (str): The column name for the source node ids. /// edge_dst (str): The column name for the destination node ids. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. + /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. + /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. /// edge_layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. + /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. + /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] fn load_from_pandas( edge_df: &PyAny, edge_time: &str, edge_src: &str, edge_dst: &str, edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, + edge_constant_properties: Option>, + edge_shared_constant_properties: Option>, edge_layer: Option<&str>, layer_in_df: Option, node_df: Option<&PyAny>, node_id: Option<&str>, node_time: Option<&str>, node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, + node_constant_properties: Option>, + node_shared_constant_properties: Option>, node_type: Option<&str>, node_type_in_df: Option, ) -> Result { @@ -433,8 +433,8 @@ impl PyPersistentGraph { edge_src, edge_dst, edge_properties, - edge_const_properties, - edge_shared_const_properties, + edge_constant_properties, + edge_shared_constant_properties, edge_layer, layer_in_df, )?; @@ -446,8 +446,8 @@ impl PyPersistentGraph { node_type, node_type_in_df, node_properties, - node_const_properties, - node_shared_const_properties, + node_constant_properties, + node_shared_constant_properties, )?; } Ok(graph.graph) @@ -461,41 +461,41 @@ impl PyPersistentGraph { /// edge_dst (str): The column name for the destination node ids. /// edge_time (str): The column name for the timestamps. /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. + /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. + /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. /// edge_layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. + /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. + /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// /// Returns: /// Graph: The loaded Graph object. #[staticmethod] - #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, + #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] fn load_from_parquet( edge_parquet_path: PathBuf, edge_time: &str, edge_src: &str, edge_dst: &str, edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, + edge_constant_properties: Option>, + edge_shared_constant_properties: Option>, edge_layer: Option<&str>, layer_in_df: Option, node_parquet_path: Option, node_id: Option<&str>, node_time: Option<&str>, node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, + node_constant_properties: Option>, + node_shared_constant_properties: Option>, node_type: Option<&str>, node_type_in_df: Option, ) -> Result { @@ -512,8 +512,8 @@ impl PyPersistentGraph { node_type, node_type_in_df, node_properties, - node_const_properties, - node_shared_const_properties, + node_constant_properties, + node_shared_constant_properties, )?; } graph.load_edges_from_parquet( @@ -522,8 +522,8 @@ impl PyPersistentGraph { edge_src, edge_dst, edge_properties, - edge_const_properties, - edge_shared_const_properties, + edge_constant_properties, + edge_shared_constant_properties, edge_layer, layer_in_df, )?; @@ -539,11 +539,11 @@ impl PyPersistentGraph { /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_pandas( &self, df: &PyAny, @@ -552,8 +552,8 @@ impl PyPersistentGraph { node_type: Option<&str>, node_type_in_df: Option, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_pandas( &self.graph.0, @@ -563,8 +563,8 @@ impl PyPersistentGraph { node_type, node_type_in_df, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -577,11 +577,11 @@ impl PyPersistentGraph { /// node_type (str): the column name for the node type /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, @@ -590,8 +590,8 @@ impl PyPersistentGraph { node_type: Option<&str>, node_type_in_df: Option, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_parquet( &self.graph, @@ -601,8 +601,8 @@ impl PyPersistentGraph { node_type, node_type_in_df, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -614,14 +614,14 @@ impl PyPersistentGraph { /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] fn load_edges_from_pandas( &self, df: &PyAny, @@ -629,8 +629,8 @@ impl PyPersistentGraph { src: &str, dst: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -641,8 +641,8 @@ impl PyPersistentGraph { src, dst, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -656,14 +656,14 @@ impl PyPersistentGraph { /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): The edge layer name (optional) Defaults to None. /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, time, src, dst, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, @@ -671,8 +671,8 @@ impl PyPersistentGraph { src: &str, dst: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -683,8 +683,8 @@ impl PyPersistentGraph { src, dst, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -753,25 +753,25 @@ impl PyPersistentGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (df, id, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_pandas( &self.graph.0, df, id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -780,25 +780,25 @@ impl PyPersistentGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, const_properties = None, shared_const_properties = None))] + #[pyo3(signature = (parquet_path, id, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -808,21 +808,21 @@ impl PyPersistentGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): Layer name. Defaults to None. (optional) /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] fn load_edge_props_from_pandas( &self, df: &PyAny, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -831,8 +831,8 @@ impl PyPersistentGraph { df, src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) @@ -844,21 +844,21 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) /// layer (str): Layer name. Defaults to None. (optional) /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, layer_in_df: Option, ) -> Result<(), GraphError> { @@ -867,8 +867,8 @@ impl PyPersistentGraph { parquet_path.as_path(), src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, layer_in_df, ) diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 766e5e288c..27e93c0a31 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -16,13 +16,13 @@ pub fn load_nodes_from_pandas( node_type: Option<&str>, node_type_in_df: Option, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); if node_type_in_df.unwrap_or(true) { if let Some(ref node_type) = node_type { cols_to_check.push(node_type.as_ref()); @@ -36,7 +36,7 @@ pub fn load_nodes_from_pandas( id, time, properties, - const_properties, + constant_properties, shared_const_properties, node_type, node_type_in_df.unwrap_or(true), @@ -56,7 +56,7 @@ pub fn load_edges_from_pandas( src: &str, dst: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, @@ -72,7 +72,7 @@ pub fn load_edges_from_pandas( let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); if layer_in_df.unwrap_or(false) { if let Some(ref layer) = layer { cols_to_check.push(layer.as_ref()); @@ -88,7 +88,7 @@ pub fn load_edges_from_pandas( src, dst, properties, - const_properties, + constant_properties, shared_const_properties, layer, layer_in_df.unwrap_or(true), @@ -105,7 +105,7 @@ pub fn load_node_props_from_pandas( graph: &GraphStorage, df: &PyAny, id: &str, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { @@ -117,14 +117,14 @@ pub fn load_node_props_from_pandas( )? .extract()?; let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( df_view, size, id, - const_properties, + constant_properties, shared_const_properties, graph, ) @@ -140,7 +140,7 @@ pub fn load_edge_props_from_pandas( df: &PyAny, src: &str, dst: &str, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, layer: Option<&str>, layer_in_df: Option, @@ -159,7 +159,7 @@ pub fn load_edge_props_from_pandas( cols_to_check.push(layer.as_ref()); } } - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( @@ -167,7 +167,7 @@ pub fn load_edge_props_from_pandas( size, src, dst, - const_properties, + constant_properties, shared_const_properties, layer, layer_in_df.unwrap_or(true), From 437342a199fc68ae93165df00b34ce4cb18fe4d9 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Thu, 15 Aug 2024 14:03:42 +0100 Subject: [PATCH 06/17] replace layer, layer_in_df with layer_name and layer_col --- python/tests/test_graph_conversions.py | 4 +- python/tests/test_load_from_pandas.py | 34 +++--- python/tests/test_load_from_parquet.py | 24 ++-- raphtory/src/core/utils/errors.rs | 2 + raphtory/src/io/arrow/df_loaders.rs | 48 ++++---- raphtory/src/io/arrow/mod.rs | 8 +- raphtory/src/io/arrow/prop_handler.rs | 28 +++-- raphtory/src/io/parquet_loaders.rs | 51 +++----- raphtory/src/python/graph/graph.rs | 84 ++++++------- .../src/python/graph/graph_with_deletions.rs | 110 +++++++++--------- .../src/python/graph/io/pandas_loaders.rs | 42 +++---- 11 files changed, 205 insertions(+), 230 deletions(-) diff --git a/python/tests/test_graph_conversions.py b/python/tests/test_graph_conversions.py index fc005ed6a9..c06b27edc7 100644 --- a/python/tests/test_graph_conversions.py +++ b/python/tests/test_graph_conversions.py @@ -28,7 +28,7 @@ def build_graph(): edge_src="source", edge_dst="destination", edge_properties=["data_size_MB"], - edge_layer="transaction_type", + layer_col="transaction_type", edge_constant_properties=["is_encrypted"], edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, node_df=nodes_df, @@ -53,7 +53,7 @@ def build_graph_without_datetime_type(): edge_src="source", edge_dst="destination", edge_properties=["data_size_MB"], - edge_layer="transaction_type", + layer_col="transaction_type", edge_constant_properties=["is_encrypted"], edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, node_df=nodes_df, diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index 3d572eb4cd..fa357d5bc4 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -381,8 +381,7 @@ def assertions3(g): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="test_layer", - layer_in_df=False, + layer_name="test_layer", ) assertions3(g) @@ -395,8 +394,7 @@ def assertions3(g): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="test_layer", - layer_in_df=False, + layer_name="test_layer", ) assertions3(g) @@ -416,13 +414,13 @@ def assertions4(g): g = Graph() g.load_edges_from_pandas( - edges_df, "time", "src", "dst", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer_col="layers" ) assertions4(g) g = PersistentGraph() g.load_edges_from_pandas( - edges_df, "time", "src", "dst", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer_col="layers" ) assertions4(g) @@ -442,8 +440,7 @@ def assertions5(g): "time", "src", "dst", - edge_layer="test_layer", - layer_in_df=False, + layer_name="test_layer", node_df=nodes_df, node_id="id", node_time="time", @@ -457,8 +454,7 @@ def assertions5(g): "time", "src", "dst", - edge_layer="test_layer", - layer_in_df=False, + layer_name="test_layer", node_df=nodes_df, node_id="id", node_time="time", @@ -494,7 +490,7 @@ def assertions6(g): "time", "src", "dst", - edge_layer="layers", + layer_col="layers", node_df=nodes_df, node_id="id", node_time="time", @@ -508,7 +504,7 @@ def assertions6(g): "time", "src", "dst", - edge_layer="layers", + layer_col="layers", node_df=nodes_df, node_id="id", node_time="time", @@ -545,7 +541,7 @@ def assertions7(g): node_id="id", node_time="time", node_properties=["name"], - edge_layer="layers", + layer_col="layers", ) g.load_node_props_from_pandas( nodes_df, @@ -577,7 +573,7 @@ def assertions8(g): "dst", constant_properties=["marbles_const"], shared_constant_properties={"tag": "test_tag"}, - layer="layers", + layer_col="layers", ) assertions8(g) @@ -591,7 +587,7 @@ def assertions8(g): node_id="id", node_time="time", node_properties=["name"], - edge_layer="layers", + layer_col="layers", ) g.load_node_props_from_pandas( nodes_df, @@ -607,7 +603,7 @@ def assertions8(g): "dst", constant_properties=["marbles_const"], shared_constant_properties={"tag": "test_tag"}, - layer="layers", + layer_col="layers", ) assertions8(g) @@ -637,8 +633,7 @@ def assertions_layers_in_df(g): ["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="layers", - layer_in_df=True, + layer_col="layers", ) assertions_layers_in_df(g) @@ -651,8 +646,7 @@ def assertions_layers_in_df(g): ["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="layers", - layer_in_df=True, + layer_col="layers", ) assertions_layers_in_df(g) diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index f10716f6fc..e17dd93fa0 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -229,7 +229,7 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer="layers" + layer_col="layers" ) assert_expected_nodes(g) assert_expected_edges(g) @@ -250,7 +250,7 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", constant_properties=["marbles_const"], shared_constant_properties={"tag": "test_tag"}, - layer="layers", + layer_col="layers", ) assert_expected_edge_properties(g) assert_expected_layers(g) @@ -276,8 +276,7 @@ def test_load_from_parquet_graphs(parquet_files): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="test_layer", - layer_in_df=False, + layer_name="test_layer", ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) @@ -287,8 +286,7 @@ def test_load_from_parquet_graphs(parquet_files): edge_src="src", edge_dst="dst", edge_time="time", - edge_layer="test_layer", - layer_in_df=False, + layer_name="test_layer", node_parquet_path=nodes_parquet_file_path, node_id="id", node_time="time", @@ -303,7 +301,7 @@ def test_load_from_parquet_graphs(parquet_files): edge_src="src", edge_dst="dst", edge_time="time", - edge_layer="layers", + layer_col="layers", node_parquet_path=nodes_parquet_file_path, node_id="id", node_time="time", @@ -346,7 +344,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer="layers" + layer_col="layers" ) assert_expected_nodes(g) assert_expected_edges(g) @@ -367,7 +365,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", constant_properties=["marbles_const"], shared_constant_properties={"tag": "test_tag"}, - layer="layers", + layer_col="layers", ) assert_expected_edge_properties(g) assert_expected_layers(g) @@ -393,8 +391,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer="test_layer", - layer_in_df=False, + layer_name="test_layer", ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) @@ -404,8 +401,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): edge_src="src", edge_dst="dst", edge_time="time", - edge_layer="test_layer", - layer_in_df=False, + layer_name="test_layer", node_parquet_path=nodes_parquet_file_path, node_id="id", node_time="time", @@ -420,7 +416,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): edge_src="src", edge_dst="dst", edge_time="time", - edge_layer="layers", + layer_col="layers", node_parquet_path=nodes_parquet_file_path, node_id="id", node_time="time", diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index 4258f39074..3376093670 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -12,6 +12,8 @@ use tantivy::query::QueryParserError; #[derive(thiserror::Error, Debug)] pub enum GraphError { + #[error("You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.")] + WrongLayerArgs, #[cfg(feature = "arrow")] #[error("Arrow error: {0}")] Arrow(#[from] error::PolarsError), diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 9d0923f78c..3952a60d10 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -207,8 +207,8 @@ pub(crate) fn load_edges_from_df< properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: bool, + layer_name: Option<&str>, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); @@ -226,10 +226,12 @@ pub(crate) fn load_edges_from_df< let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; for chunk in df_view.chunks { let df = chunk?; @@ -237,7 +239,7 @@ pub(crate) fn load_edges_from_df< let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer_name, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -337,21 +339,23 @@ pub(crate) fn load_edges_deletions_from_df< time: &str, src: &str, dst: &str, - layer: Option<&str>, - layer_in_df: bool, + layer_name: Option<&str>, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; for chunk in df_view.chunks { let df = chunk?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer_name, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -526,8 +530,8 @@ pub(crate) fn load_edges_props_from_df< dst: &str, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: bool, + layer_name: Option<&str>, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let constant_properties = constant_properties.unwrap_or(&[]); @@ -537,17 +541,19 @@ pub(crate) fn load_edges_props_from_df< .collect::, GraphError>>()?; let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; for chunk in df_view.chunks { let df = chunk?; let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer_name, layer_index, &df)?; if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 8b64093d1e..02b05f7aba 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -44,8 +44,8 @@ mod test { .into_iter(), }; let graph = Graph::new(); - let layer: Option<&str> = None; - let layer_in_df: bool = true; + let layer_name: Option<&str> = None; + let layer_col: Option<&str> = None; load_edges_from_df( df, 5, @@ -55,8 +55,8 @@ mod test { Some(&*vec!["prop1", "prop2"]), None, None, - layer, - layer_in_df, + layer_name, + layer_col, &graph, ) .expect("failed to load edges from pretend df"); diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index acbda631d4..7b39bead12 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -343,25 +343,23 @@ pub(crate) fn lift_property<'a: 'b, 'b>( } pub(crate) fn lift_layer<'a>( - layer: Option<&str>, + layer_name: Option<&str>, layer_index: Option, df: &'a DFChunk, -) -> Box> + 'a> { - if let Some(layer) = layer { - match layer_index { - Some(index) => { - if let Some(col) = df.utf8::(index) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else if let Some(col) = df.utf8::(index) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else { - Box::new(std::iter::repeat(None)) - } +) -> Result> + 'a>, GraphError> { + match (layer_name, layer_index) { + (None, None) => Ok(Box::new(std::iter::repeat(None))), + (Some(layer_name), None) => Ok(Box::new(std::iter::repeat(Some(layer_name.to_string())))), + (None, Some(layer_index)) => { + if let Some(col) = df.utf8::(layer_index) { + Ok(Box::new(col.map(|v| v.map(|v| v.to_string())))) + } else if let Some(col) = df.utf8::(layer_index) { + Ok(Box::new(col.map(|v| v.map(|v| v.to_string())))) + } else { + Ok(Box::new(std::iter::repeat(None))) } - None => Box::new(std::iter::repeat(Some(layer.to_string()))), } - } else { - Box::new(std::iter::repeat(None)) + _ => Err(GraphError::WrongLayerArgs), } } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 82817abfbe..bd2ed96747 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -73,17 +73,15 @@ pub fn load_edges_from_parquet< properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let parquet_path = parquet_path.as_ref(); let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } for path in get_parquet_file_paths(parquet_path)? { @@ -99,8 +97,8 @@ pub fn load_edges_from_parquet< properties, constant_properties, shared_const_properties, - layer, - layer_in_df.unwrap_or(true), + layer_name, + layer_col, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -149,14 +147,12 @@ pub fn load_edge_props_from_parquet< dst: &str, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let size = cols_to_check.len(); @@ -171,8 +167,8 @@ pub fn load_edge_props_from_parquet< dst, constant_properties, shared_const_properties, - layer, - layer_in_df.unwrap_or(true), + layer_name, + layer_col, graph.core_graph(), ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -189,30 +185,19 @@ pub fn load_edges_deletions_from_parquet< time: &str, src: &str, dst: &str, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; - if layer_in_df.unwrap_or(true) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } let size = cols_to_check.len(); for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df( - df_view, - size, - time, - src, - dst, - layer, - layer_in_df.unwrap_or(true), - graph, - ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + load_edges_deletions_from_df(df_view, size, time, src, dst, layer_name, layer_col, graph) + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) } diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 67e0f728bf..0542c53a53 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -477,8 +477,8 @@ impl PyGraph { /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. @@ -493,7 +493,7 @@ impl PyGraph { #[staticmethod] #[pyo3( signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, + layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_pandas( @@ -504,8 +504,8 @@ impl PyGraph { edge_properties: Option>, edge_constant_properties: Option>, edge_shared_constant_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, node_df: Option<&PyAny>, node_id: Option<&str>, node_time: Option<&str>, @@ -542,8 +542,8 @@ impl PyGraph { .as_ref() .map(|props| props.as_ref()), edge_shared_constant_properties.as_ref(), - edge_layer, - layer_in_df, + layer_name, + layer_col, )?; Ok(graph) } @@ -558,8 +558,8 @@ impl PyGraph { /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. @@ -574,7 +574,7 @@ impl PyGraph { #[staticmethod] #[pyo3( signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, + layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) )] fn load_from_parquet( @@ -585,8 +585,8 @@ impl PyGraph { edge_properties: Option>, edge_constant_properties: Option>, edge_shared_constant_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, node_parquet_path: Option, node_id: Option<&str>, node_time: Option<&str>, @@ -626,8 +626,8 @@ impl PyGraph { .as_ref() .map(|props| props.as_ref()), edge_shared_constant_properties.as_ref(), - edge_layer, - layer_in_df, + layer_name, + layer_col, )?; Ok(graph) @@ -723,13 +723,13 @@ impl PyGraph { /// properties (List): List of edge property column names. Defaults to None. (optional) /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dateframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) + signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) )] fn load_edges_from_pandas( &self, @@ -740,8 +740,8 @@ impl PyGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( self.graph.core_graph(), @@ -752,8 +752,8 @@ impl PyGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -767,13 +767,13 @@ impl PyGraph { /// properties (List): List of edge property column names. Defaults to None. (optional) /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) )] fn load_edges_from_parquet( &self, @@ -784,8 +784,8 @@ impl PyGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( &self.graph, @@ -796,8 +796,8 @@ impl PyGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -863,13 +863,13 @@ impl PyGraph { /// dst (str): The column name for the destination node. /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) + signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) )] fn load_edge_props_from_pandas( &self, @@ -878,8 +878,8 @@ impl PyGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( self.graph.core_graph(), @@ -888,8 +888,8 @@ impl PyGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -901,13 +901,13 @@ impl PyGraph { /// dst (str): The column name for the destination node. /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) )] fn load_edge_props_from_parquet( &self, @@ -916,8 +916,8 @@ impl PyGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( &self.graph, @@ -926,8 +926,8 @@ impl PyGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } } diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index da5451c27d..28e07cfafd 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -388,8 +388,8 @@ impl PyPersistentGraph { /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. @@ -403,7 +403,7 @@ impl PyPersistentGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, + layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] fn load_from_pandas( edge_df: &PyAny, @@ -413,8 +413,8 @@ impl PyPersistentGraph { edge_properties: Option>, edge_constant_properties: Option>, edge_shared_constant_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, node_df: Option<&PyAny>, node_id: Option<&str>, node_time: Option<&str>, @@ -435,8 +435,8 @@ impl PyPersistentGraph { edge_properties, edge_constant_properties, edge_shared_constant_properties, - edge_layer, - layer_in_df, + layer_name, + layer_col, )?; if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { graph.load_nodes_from_pandas( @@ -463,8 +463,8 @@ impl PyPersistentGraph { /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. /// node_id (str): The column name for the node ids (optional) Defaults to None. /// node_time (str): The column name for the node timestamps (optional) Defaults to None. @@ -478,7 +478,7 @@ impl PyPersistentGraph { /// Graph: The loaded Graph object. #[staticmethod] #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, + layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] fn load_from_parquet( edge_parquet_path: PathBuf, @@ -488,8 +488,8 @@ impl PyPersistentGraph { edge_properties: Option>, edge_constant_properties: Option>, edge_shared_constant_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, node_parquet_path: Option, node_id: Option<&str>, node_time: Option<&str>, @@ -524,8 +524,8 @@ impl PyPersistentGraph { edge_properties, edge_constant_properties, edge_shared_constant_properties, - edge_layer, - layer_in_df, + layer_name, + layer_col, )?; Ok(graph.graph) } @@ -616,12 +616,12 @@ impl PyPersistentGraph { /// properties (List): List of edge property column names. Defaults to None. (optional) /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] fn load_edges_from_pandas( &self, df: &PyAny, @@ -631,8 +631,8 @@ impl PyPersistentGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( &self.graph.0, @@ -643,8 +643,8 @@ impl PyPersistentGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -658,12 +658,12 @@ impl PyPersistentGraph { /// properties (List): List of edge property column names. Defaults to None. (optional) /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, @@ -673,8 +673,8 @@ impl PyPersistentGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( &self.graph, @@ -685,8 +685,8 @@ impl PyPersistentGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -697,22 +697,22 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, time, src, dst, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, time, src, dst, layer_name = None, layer_col = None))] fn load_edges_deletions_from_pandas( &self, df: &PyAny, time: &str, src: &str, dst: &str, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_in_df) + load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer_name, layer_col) } /// Load edges deletions from a Parquet file into the graph. @@ -722,20 +722,20 @@ impl PyPersistentGraph { /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, time, src, dst, layer_name = None, layer_col = None))] fn load_edges_deletions_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_deletions_from_parquet( &self.graph, @@ -743,8 +743,8 @@ impl PyPersistentGraph { time, src, dst, - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -810,12 +810,12 @@ impl PyPersistentGraph { /// dst (str): The column name for the destination node. /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] fn load_edge_props_from_pandas( &self, df: &PyAny, @@ -823,8 +823,8 @@ impl PyPersistentGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( &self.graph.0, @@ -833,8 +833,8 @@ impl PyPersistentGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } @@ -846,12 +846,12 @@ impl PyPersistentGraph { /// dst (str): The column name for the destination node. /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_in_df = true))] + #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, @@ -859,8 +859,8 @@ impl PyPersistentGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( &self.graph, @@ -869,8 +869,8 @@ impl PyPersistentGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer, - layer_in_df, + layer_name, + layer_col, ) } } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 27e93c0a31..4ee82e3625 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -58,8 +58,8 @@ pub fn load_edges_from_pandas( properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let size: usize = py @@ -73,10 +73,8 @@ pub fn load_edges_from_pandas( let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; @@ -90,8 +88,8 @@ pub fn load_edges_from_pandas( properties, constant_properties, shared_const_properties, - layer, - layer_in_df.unwrap_or(true), + layer_name, + layer_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -142,8 +140,8 @@ pub fn load_edge_props_from_pandas( dst: &str, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let size: usize = py @@ -154,10 +152,8 @@ pub fn load_edge_props_from_pandas( )? .extract()?; let mut cols_to_check = vec![src, dst]; - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; @@ -169,8 +165,8 @@ pub fn load_edge_props_from_pandas( dst, constant_properties, shared_const_properties, - layer, - layer_in_df.unwrap_or(true), + layer_name, + layer_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -186,8 +182,8 @@ pub fn load_edges_deletions_from_pandas( time: &str, src: &str, dst: &str, - layer: Option<&str>, - layer_in_df: Option, + layer_name: Option<&str>, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let size: usize = py @@ -199,10 +195,8 @@ pub fn load_edges_deletions_from_pandas( .extract()?; let mut cols_to_check = vec![src, dst, time]; - if layer_in_df.unwrap_or(true) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; @@ -213,8 +207,8 @@ pub fn load_edges_deletions_from_pandas( time, src, dst, - layer, - layer_in_df.unwrap_or(true), + layer_name, + layer_col, graph.core_graph(), ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; From 9a3d29d3358ee2fd18601810ccd3e2f7bf497b40 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:04:11 +0100 Subject: [PATCH 07/17] replace node_type_in_df with node_type_col --- python/tests/test_load_from_pandas.py | 20 +++++----- python/tests/test_load_from_parquet.py | 4 +- raphtory/src/core/utils/errors.rs | 4 +- raphtory/src/io/arrow/df_loaders.rs | 37 ++++++++++------- raphtory/src/io/arrow/mod.rs | 2 +- raphtory/src/io/arrow/prop_handler.rs | 5 ++- raphtory/src/io/parquet_loaders.rs | 10 ++--- raphtory/src/python/graph/graph.rs | 40 +++++++++---------- .../src/python/graph/graph_with_deletions.rs | 40 +++++++++---------- .../src/python/graph/io/pandas_loaders.rs | 10 ++--- 10 files changed, 89 insertions(+), 83 deletions(-) diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index fa357d5bc4..912813bc9d 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -298,7 +298,7 @@ def assertions1(g): nodes_df, "id", "time", - "node_type", + node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, ) @@ -309,7 +309,7 @@ def assertions1(g): nodes_df, "id", "time", - "node_type", + node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, ) @@ -330,7 +330,7 @@ def assertions2(g): nodes_df, "id", "time", - "node_type", + node_type_col="node_type", properties=["name"], constant_properties=["type"], ) @@ -341,7 +341,7 @@ def assertions2(g): nodes_df, "id", "time", - "node_type", + node_type_col="node_type", properties=["name"], constant_properties=["type"], ) @@ -997,34 +997,34 @@ def edges_assertions(g): g = Graph() g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type", node_type_in_df=False + nodes_df, "id", "time", node_type="node_type" ) nodes_assertions2(g) g = PersistentGraph() g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type", node_type_in_df=False + nodes_df, "id", "time", node_type="node_type" ) nodes_assertions2(g) g = Graph() g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=False + nodes_df2, "id", "time", node_type="node_type" ) nodes_assertions2(g) g = PersistentGraph() g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=False + nodes_df2, "id", "time", node_type="node_type" ) nodes_assertions2(g) g = Graph() g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=True + nodes_df2, "id", "time", node_type_col="node_type" ) nodes_assertions3(g) g = PersistentGraph() g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=True + nodes_df2, "id", "time", node_type_col="node_type" ) nodes_assertions3(g) diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index e17dd93fa0..d8eafda207 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -260,7 +260,7 @@ def test_load_from_parquet_graphs(parquet_files): parquet_path=nodes_parquet_file_path, id="id", time="time", - node_type="node_type", + node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, ) @@ -375,7 +375,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): parquet_path=nodes_parquet_file_path, id="id", time="time", - node_type="node_type", + node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, ) diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index 3376093670..a23c4eebd7 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -12,8 +12,8 @@ use tantivy::query::QueryParserError; #[derive(thiserror::Error, Debug)] pub enum GraphError { - #[error("You cannot set ‘layer_name’ and ‘layer_col’ at the same time. Please pick one or the other.")] - WrongLayerArgs, + #[error("You cannot set ‘{0}’ and ‘{1}’ at the same time. Please pick one or the other.")] + WrongNumOfArgs(String, String), #[cfg(feature = "arrow")] #[error("Arrow error: {0}")] Arrow(#[from] error::PolarsError), diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 3952a60d10..cf413df59c 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -45,7 +45,7 @@ pub(crate) fn load_nodes_from_df< constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, node_type: Option<&str>, - node_type_in_df: bool, + node_type_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); @@ -60,10 +60,13 @@ pub(crate) fn load_nodes_from_df< .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let node_type_index = node_type - .filter(|_| node_type_in_df) - .map(|node_type| df_view.get_index(node_type)) - .transpose()?; + let node_type_index = if let Some(node_type_col) = node_type_col { + Some(df_view.get_index(node_type_col.as_ref())) + } else { + None + }; + let node_type_index = node_type_index.transpose()?; + let node_id_index = df_view.get_index(node_id)?; let time_index = df_view.get_index(time)?; @@ -74,13 +77,15 @@ pub(crate) fn load_nodes_from_df< let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; - let node_type: Box>> = match node_type { - Some(node_type) => match node_type_index { - Some(index) => { + let node_type: Result>>, GraphError> = + match (node_type, node_type_index) { + (None, None) => Ok(Box::new(iter::repeat(None))), + (Some(node_type), None) => Ok(Box::new(iter::repeat(Some(node_type)))), + (None, Some(node_type_index)) => { let iter_res: Result>>, GraphError> = - if let Some(node_types) = df.utf8::(index) { + if let Some(node_types) = df.utf8::(node_type_index) { Ok(Box::new(node_types)) - } else if let Some(node_types) = df.utf8::(index) { + } else if let Some(node_types) = df.utf8::(node_type_index) { Ok(Box::new(node_types)) } else { Err(GraphError::LoadFailure( @@ -88,12 +93,14 @@ pub(crate) fn load_nodes_from_df< .to_string(), )) }; - iter_res? + iter_res } - None => Box::new(iter::repeat(Some(node_type))), - }, - None => Box::new(iter::repeat(None)), - }; + _ => Err(GraphError::WrongNumOfArgs( + "node_type".to_string(), + "node_type_col".to_string(), + )), + }; + let node_type = node_type?; if let (Some(node_id), Some(time)) = ( df.iter_col::(node_id_index), diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 02b05f7aba..ee20a08a26 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -146,7 +146,7 @@ mod test { None, None, Some("node_type"), - false, + None, &graph, ) .expect("failed to load nodes from pretend df"); diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index 7b39bead12..2d91f6e542 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -359,7 +359,10 @@ pub(crate) fn lift_layer<'a>( Ok(Box::new(std::iter::repeat(None))) } } - _ => Err(GraphError::WrongLayerArgs), + _ => Err(GraphError::WrongNumOfArgs( + "layer_name".to_string(), + "layer_col".to_string(), + )), } } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index bd2ed96747..c5ae7ba9c0 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -28,7 +28,7 @@ pub fn load_nodes_from_parquet< id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, @@ -36,10 +36,8 @@ pub fn load_nodes_from_parquet< let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); - if node_type_in_df.unwrap_or(true) { - if let Some(ref node_type) = node_type { - cols_to_check.push(node_type.as_ref()); - } + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); } for path in get_parquet_file_paths(parquet_path)? { @@ -53,7 +51,7 @@ pub fn load_nodes_from_parquet< constant_properties, shared_const_properties, node_type, - node_type_in_df.unwrap_or(true), + node_type_col, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 0542c53a53..a186f6519a 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -485,8 +485,8 @@ impl PyGraph { /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// /// Returns: /// Graph: The loaded Graph object. @@ -494,7 +494,7 @@ impl PyGraph { #[pyo3( signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None) )] fn load_from_pandas( edge_df: &PyAny, @@ -513,7 +513,7 @@ impl PyGraph { node_constant_properties: Option>, node_shared_constant_properties: Option>, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, ) -> Result { let graph = Graph::new(); if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { @@ -523,7 +523,7 @@ impl PyGraph { node_id, node_time, node_type, - node_type_in_df, + node_type_col, node_properties.as_ref().map(|props| props.as_ref()), node_constant_properties .as_ref() @@ -566,8 +566,8 @@ impl PyGraph { /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// /// Returns: /// Graph: The loaded Graph object. @@ -575,7 +575,7 @@ impl PyGraph { #[pyo3( signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true) + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None) )] fn load_from_parquet( edge_parquet_path: PathBuf, @@ -594,7 +594,7 @@ impl PyGraph { node_constant_properties: Option>, node_shared_constant_properties: Option>, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, ) -> Result { let graph = Graph::new(); @@ -607,7 +607,7 @@ impl PyGraph { node_id, node_time, node_type, - node_type_in_df, + node_type_col, node_properties.as_ref().map(|props| props.as_ref()), node_constant_properties .as_ref() @@ -639,15 +639,15 @@ impl PyGraph { /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// properties (List): List of node property column names. Defaults to None. (optional) /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None) + signature = (df, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_pandas( &self, @@ -655,7 +655,7 @@ impl PyGraph { id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, @@ -666,7 +666,7 @@ impl PyGraph { id, time, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), @@ -679,15 +679,15 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// properties (List): List of node property column names. Defaults to None. (optional) /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. #[pyo3( - signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None) + signature = (parquet_path, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_parquet( &self, @@ -695,7 +695,7 @@ impl PyGraph { id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, @@ -706,7 +706,7 @@ impl PyGraph { id, time, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 28e07cfafd..5c2417dcd3 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -396,15 +396,15 @@ impl PyPersistentGraph { /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// /// Returns: /// Graph: The loaded Graph object. #[staticmethod] #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None))] fn load_from_pandas( edge_df: &PyAny, edge_time: &str, @@ -422,7 +422,7 @@ impl PyPersistentGraph { node_constant_properties: Option>, node_shared_constant_properties: Option>, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, ) -> Result { let graph = PyPersistentGraph { graph: PersistentGraph::new(), @@ -444,7 +444,7 @@ impl PyPersistentGraph { node_id, node_time, node_type, - node_type_in_df, + node_type_col, node_properties, node_constant_properties, node_shared_constant_properties, @@ -471,15 +471,15 @@ impl PyPersistentGraph { /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// /// Returns: /// Graph: The loaded Graph object. #[staticmethod] #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_in_df = true))] + node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None))] fn load_from_parquet( edge_parquet_path: PathBuf, edge_time: &str, @@ -497,7 +497,7 @@ impl PyPersistentGraph { node_constant_properties: Option>, node_shared_constant_properties: Option>, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, ) -> Result { let graph = PyPersistentGraph { graph: PersistentGraph::new(), @@ -510,7 +510,7 @@ impl PyPersistentGraph { node_id, node_time, node_type, - node_type_in_df, + node_type_col, node_properties, node_constant_properties, node_shared_constant_properties, @@ -536,21 +536,21 @@ impl PyPersistentGraph { /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// properties (List): List of node property column names. Defaults to None. (optional) /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None))] + #[pyo3(signature = (df, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_pandas( &self, df: &PyAny, id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, @@ -561,7 +561,7 @@ impl PyPersistentGraph { id, time, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), @@ -574,21 +574,21 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type + /// node_type (str): The node type (optional). Defaults to None. + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. /// properties (List): List of node property column names. Defaults to None. (optional) /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, constant_properties = None, shared_constant_properties = None))] + #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, @@ -599,7 +599,7 @@ impl PyPersistentGraph { id, time, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 4ee82e3625..00d0cb0421 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -14,7 +14,7 @@ pub fn load_nodes_from_pandas( id: &str, time: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, @@ -23,10 +23,8 @@ pub fn load_nodes_from_pandas( let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); - if node_type_in_df.unwrap_or(true) { - if let Some(ref node_type) = node_type { - cols_to_check.push(node_type.as_ref()); - } + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; @@ -39,7 +37,7 @@ pub fn load_nodes_from_pandas( constant_properties, shared_const_properties, node_type, - node_type_in_df.unwrap_or(true), + node_type_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; From e2a73905eeda6b64ba808d760d098964dea10555 Mon Sep 17 00:00:00 2001 From: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:07:45 +0100 Subject: [PATCH 08/17] fix notebook --- examples/python/socio-patterns/example.ipynb | 66 ++++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/examples/python/socio-patterns/example.ipynb b/examples/python/socio-patterns/example.ipynb index 833be8e6c8..b727f28446 100644 --- a/examples/python/socio-patterns/example.ipynb +++ b/examples/python/socio-patterns/example.ipynb @@ -99,13 +99,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16de15732c834eafb018c88c0b052c00", + "model_id": "ec2b2e5c92b54503940f06b3c30f184a", "version_major": 2, "version_minor": 0 }, @@ -120,7 +120,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Graph(number_of_nodes=22, number_of_edges=290, number_of_temporal_edges=3196, earliest_time=1560419400000, latest_time=1562756700000)\n" + "Graph(number_of_nodes=22, number_of_edges=290, number_of_temporal_edges=6392, earliest_time=1560419400000, latest_time=1562756700000)\n" ] } ], @@ -130,7 +130,7 @@ " src=\"Actor\",\n", " dst=\"Recipient\",\n", " time=\"DateTime\",\n", - " layer=\"Behavior\",\n", + " layer_col=\"Behavior\",\n", " properties=[\"Weight\"],\n", ")\n", "print(g)" @@ -149,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -159,8 +159,8 @@ "Stats on the graph structure:\n", "Number of nodes (Baboons): 22\n", "Number of unique edges (src,dst,layer): 290\n", - "Total interactions (edge updates): 3196\n", - "Unique layers: ['_default', 'Grooming', 'Resting', 'Presenting', 'Playing with', 'Grunting-Lipsmacking', 'Supplanting', 'Threatening', 'Submission', 'Touching', 'Avoiding', 'Attacking', 'Carrying', 'Embracing', 'Mounting', 'Copulating', 'Chasing'] \n", + "Total interactions (edge updates): 6392\n", + "Unique layers: ['_default', 'Behavior', 'Grooming', 'Resting', 'Presenting', 'Playing with', 'Grunting-Lipsmacking', 'Supplanting', 'Threatening', 'Submission', 'Touching', 'Avoiding', 'Attacking', 'Carrying', 'Embracing', 'Mounting', 'Copulating', 'Chasing'] \n", "\n", "Stats on the graphs time range:\n", "Earliest datetime: 2019-06-13 09:50:00+00:00\n", @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -226,11 +226,11 @@ "\n", "Getting individual nodes and edges:\n", "Node(name=LOME, earliest_time=1560419520000, latest_time=1562756100000)\n", - "Edge(source=LOME, target=NEKKE, earliest_time=1560421080000, latest_time=1562755980000, properties={Weight: 1}) \n", + "Edge(source=LOME, target=NEKKE, earliest_time=1560421080000, latest_time=1562755980000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}) \n", "\n", "Getting iterators over all nodes and edges:\n", "[Node(name=ANGELE, earliest_time=1560419400000, latest_time=1562754600000), Node(name=FELIPE, earliest_time=1560419400000, latest_time=1562756700000), Node(name=LIPS, earliest_time=1560419460000, latest_time=1562756700000), Node(name=NEKKE, earliest_time=1560419520000, latest_time=1562756700000), Node(name=LOME, earliest_time=1560419520000, latest_time=1562756100000)]\n", - "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1}), Edge(source=LOME, target=FEYA, earliest_time=1560421260000, latest_time=1562328420000, properties={Weight: 1}), Edge(source=VIOLETTE, target=LIPS, earliest_time=1560423600000, latest_time=1560423600000, properties={Weight: -1})]\n" + "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=FELIPE, target=ANGELE, earliest_time=1560419460000, latest_time=1562754600000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=FELIPE, target=LIPS, earliest_time=1560419460000, latest_time=1562251080000, properties={Weight: 1, Weight: 1, Weight: 1})]\n" ] } ], @@ -263,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -307,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -318,7 +318,7 @@ "text": [ "FELIPE has 17 incoming interactions and 18 outgoing interactions.\n", "\n", - "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1}), Edge(source=LIPS, target=FELIPE, earliest_time=1560423600000, latest_time=1562756700000, properties={Weight: 1}), Edge(source=NEKKE, target=FELIPE, earliest_time=1560443040000, latest_time=1562596380000, properties={Weight: 1})]\n", + "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=LIPS, target=FELIPE, earliest_time=1560423600000, latest_time=1562756700000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=NEKKE, target=FELIPE, earliest_time=1560443040000, latest_time=1562596380000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1})]\n", "[Node(name=ANGELE, earliest_time=1560419400000, latest_time=1562754600000), Node(name=LIPS, earliest_time=1560419460000, latest_time=1562756700000), Node(name=NEKKE, earliest_time=1560419520000, latest_time=1562756700000)] \n", "\n", "FELIPE interacted with the following baboons ['ANGELE', 'LIPS', 'NEKKE', 'LOME', 'BOBO', 'ATMOSPHERE', 'FEYA', 'FANA', 'PIPO', 'MUSE', 'MAKO', 'MALI', 'PETOULETTE', 'ARIELLE', 'HARLEM', 'VIOLETTE', 'EWINE', 'SELF']\n" @@ -361,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -371,18 +371,18 @@ "output_type": "stream", "text": [ "Update history per layer:\n", + "FELIPE interacted with MAKO with the following behaviour 'Behavior' at this times: [1560437400000, 1560437640000, 1560935460000, 1561043280000, 1561043280000, 1561043340000, 1561117620000, 1561373880000, 1561373880000, 1561373940000, 1561373940000, 1561373940000, 1561373940000, 1561373940000, 1561390860000, 1561390860000, 1561390860000, 1561390920000, 1561643580000, 1561717080000, 1561717140000, 1561970760000, 1562148960000, 1562148960000, 1562149020000, 1562149020000, 1562149080000, 1562671020000]\n", "FELIPE interacted with MAKO with the following behaviour 'Grooming' at this times: [1561043280000, 1561043340000]\n", "FELIPE interacted with MAKO with the following behaviour 'Resting' at this times: [1560437400000, 1560437640000, 1560935460000, 1561117620000, 1561373880000, 1561390860000, 1561390860000, 1561390860000, 1561643580000, 1561970760000, 1562149020000, 1562671020000]\n", "FELIPE interacted with MAKO with the following behaviour 'Playing with' at this times: [1561373880000, 1561373940000, 1561373940000, 1561390920000, 1562148960000, 1562148960000, 1562149080000]\n", "FELIPE interacted with MAKO with the following behaviour 'Grunting-Lipsmacking' at this times: [1561373940000, 1561717080000, 1561717140000]\n", - "FELIPE interacted with MAKO with the following behaviour 'Touching' at this times: [1562149020000]\n", "\n", "Individual updates as edges:\n", + "At 2019-06-13 14:50:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "At 2019-06-13 14:50:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", + "At 2019-06-13 14:54:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "At 2019-06-13 14:54:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", - "At 2019-06-19 09:11:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", - "At 2019-06-20 15:08:00+00:00 FELIPE interacted with MAKO in the following manner: 'Carrying'\n", - "At 2019-06-20 15:08:00+00:00 FELIPE interacted with MAKO in the following manner: 'Grooming'\n", + "At 2019-06-19 09:11:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "...\n", "\n", "Individual updates for 'Touching' and 'Carrying:\n", @@ -435,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "scrolled": true }, @@ -485,15 +485,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Felipe's favourite baboons in descending order are [('NEKKE', 41), ('ANGELE', 31), ('MAKO', 26), ('LOME', 23), ('LIPS', 11), ('HARLEM', 10), ('FANA', 8), ('MALI', 6), ('FEYA', 5), ('ARIELLE', 5), ('EWINE', 5), ('PIPO', 3), ('SELF', 2), ('BOBO', 1), ('ATMOSPHERE', 1), ('PETOULETTE', 1), ('VIOLETTE', 1), ('MUSE', -1)]\n", - "EXTERNE is the most annoying monkey with an average score of -2.0\n" + "Felipe's favourite baboons in descending order are [('NEKKE', 82), ('ANGELE', 62), ('MAKO', 52), ('LOME', 46), ('LIPS', 22), ('HARLEM', 20), ('FANA', 16), ('MALI', 12), ('FEYA', 10), ('ARIELLE', 10), ('EWINE', 10), ('PIPO', 6), ('SELF', 4), ('BOBO', 2), ('ATMOSPHERE', 2), ('PETOULETTE', 2), ('VIOLETTE', 2), ('MUSE', -2)]\n", + "EXTERNE is the most annoying monkey with an average score of -4.0\n" ] } ], @@ -547,16 +547,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Across the full dataset LOME interacted with NEKKE 41 times\n", - "Between None and 2019-06-13 12:17:19+00:00, LOME interacted with NEKKE 8 times\n", - "Window start: 2019-06-13 00:00:00+00:00, First update: 2019-06-13 10:18:00+00:00, Last update: 2019-06-13 15:05:00+00:00, Window End: 2019-06-14 00:00:00+00:00\n" + "Across the full dataset LOME interacted with NEKKE 82 times\n", + "Between None and 2019-06-13 12:17:19+00:00, LOME interacted with NEKKE 16 times\n", + "Window start: 2019-06-12 23:00:00+00:00, First update: 2019-06-13 10:18:00+00:00, Last update: 2019-06-13 15:05:00+00:00, Window End: 2019-06-13 23:00:00+00:00\n" ] } ], @@ -601,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -646,14 +646,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Total weight across all edges is 2948.\n", + "Total weight across all edges is 5896.\n", "Total weight across Grooming and Resting is 1685.\n", "Total weight across Grooming and Resting between 2019-06-13 00:00:00 and 2019-06-20 00:00:00 is 403.\n" ] @@ -701,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -759,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -801,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -848,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": {}, "outputs": [ { From 99b565080274b984ffbb27aa5c8ec82cf1ae4d60 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Tue, 20 Aug 2024 22:25:47 +0100 Subject: [PATCH 09/17] Attemted merge --- raphtory/src/io/arrow/df_loaders.rs | 9 ++++----- raphtory/src/python/graph/io/pandas_loaders.rs | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index e51a1abaa7..209ab295bf 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -39,8 +39,8 @@ pub(crate) fn load_nodes_from_df< node_id: &str, time: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, node_type: Option<&str>, node_type_col: Option<&str>, graph: &G, @@ -71,7 +71,6 @@ pub(crate) fn load_nodes_from_df< for chunk in df_view.chunks { let df = chunk?; - let size = df.get_inner_size(); let prop_iter = combine_properties(properties, &properties_indices, &df)?; let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; @@ -640,7 +639,7 @@ fn load_edges_from_num_iter< layer: IL, ) -> Result<(), GraphError> { for (((((src, dst), time), edge_props), const_props), layer) in - edges.zip(properties).zip(const_properties).zip(layer) + edges.zip(properties).zip(constant_properties).zip(layer) { if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, edge_props, layer.as_deref())?; @@ -669,7 +668,7 @@ fn load_nodes_from_num_iter< shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { for (((node, time, node_type), props), const_props) in - nodes.zip(properties).zip(const_properties) + nodes.zip(properties).zip(constant_properties) { if let (Some(v), Some(t), n_t, props, const_props) = (node, time, node_type, props, const_props) diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index fd1a6fe490..65edf15577 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -75,7 +75,6 @@ pub fn load_edges_from_pandas( df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( df_view, - size, time, src, dst, From 5f254e77b3ee2ac1aa398a6d14f9deebd69d502f Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Tue, 20 Aug 2024 22:26:24 +0100 Subject: [PATCH 10/17] fmt --- raphtory/src/io/arrow/df_loaders.rs | 6 ++---- raphtory/src/python/graph/disk_graph.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 209ab295bf..28ea8f87de 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -68,7 +68,6 @@ pub(crate) fn load_nodes_from_df< let time_index = df_view.get_index(time)?; let mut pb = build_progress_bar("Loading nodes".to_string(), df_view.num_rows)?; - for chunk in df_view.chunks { let df = chunk?; let prop_iter = combine_properties(properties, &properties_indices, &df)?; @@ -337,7 +336,6 @@ pub(crate) fn load_edges_deletions_from_df< let layer_index = layer_index.transpose()?; let mut pb = build_progress_bar("Loading edge deletions".to_string(), df_view.num_rows)?; - for chunk in df_view.chunks { let df = chunk?; let layer = lift_layer(layer_name, layer_index, &df)?; @@ -428,10 +426,10 @@ pub(crate) fn load_node_props_from_df< let node_id_index = df_view.get_index(node_id)?; let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; - for chunk in df_view.chunks { let df = chunk?; - let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(|i| i.copied()); diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index 34e32fd93b..afc1648661 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -156,7 +156,7 @@ impl PyDiskGraph { let df_view = process_pandas_py_df(edge_df, py, df_columns)?; df_view.check_cols_exist(&cols_to_check)?; - let graph = Self::from_pandas(graph_dir, df_view,time_col, src_col, dst_col)?; + let graph = Self::from_pandas(graph_dir, df_view, time_col, src_col, dst_col)?; Ok::<_, GraphError>(graph) }); From b55f9346292f3dd4b14188a7cdfac1bce42bfe75 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Tue, 20 Aug 2024 22:48:51 +0100 Subject: [PATCH 11/17] missed a const --- raphtory/src/io/parquet_loaders.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 7b9fb5b9d5..c3d70d78ae 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -34,7 +34,7 @@ pub fn load_nodes_from_parquet< node_type_col: Option<&str>, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); @@ -52,7 +52,7 @@ pub fn load_nodes_from_parquet< time, properties, constant_properties, - shared_const_properties, + shared_constant_properties, node_type, node_type_col, graph, From ec74a9c7dc6787a0470365e6febe2362f630d8b9 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Tue, 20 Aug 2024 22:50:10 +0100 Subject: [PATCH 12/17] forgot const --- raphtory/src/io/parquet_loaders.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index c3d70d78ae..84a249d804 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -73,7 +73,7 @@ pub fn load_edges_from_parquet< dst: &str, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + shared_constant_properties: Option<&HashMap>, layer_name: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { @@ -95,7 +95,7 @@ pub fn load_edges_from_parquet< dst, properties, constant_properties, - shared_const_properties, + shared_constant_properties, layer_name, layer_col, graph, From 96eb888c6102452bf7544d1bfabc4193e04718a1 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Wed, 21 Aug 2024 12:48:55 +0100 Subject: [PATCH 13/17] fixed tests, added node types --- python/python/raphtory/__init__.pyi | 859 +++++++----------- .../python/raphtory/algorithms/__init__.pyi | 24 +- .../python/raphtory/graph_loader/__init__.pyi | 11 +- python/python/raphtory/graphql/__init__.pyi | 28 +- python/python/raphtory/vectors/__init__.pyi | 32 +- python/tests/test_disk_graph.py | 21 +- python/tests/test_graph_conversions.py | 103 ++- python/tests/test_graphql.py | 665 ++++++++++---- python/tests/test_iterables.py | 2 +- python/tests/test_load_from_pandas.py | 374 ++++---- python/tests/test_load_from_parquet.py | 247 +++-- raphtory/src/io/arrow/df_loaders.rs | 68 +- raphtory/src/io/arrow/mod.rs | 2 +- raphtory/src/io/parquet_loaders.rs | 31 +- raphtory/src/python/graph/graph.rs | 318 ++----- .../src/python/graph/graph_with_deletions.rs | 338 +++---- .../src/python/graph/io/pandas_loaders.rs | 39 +- 17 files changed, 1574 insertions(+), 1588 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index e3384bcdf6..4f0d99980d 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -8,10 +8,8 @@ ############################################################################### class AlgorithmResult: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ Returns the value corresponding to the provided key @@ -19,7 +17,6 @@ class AlgorithmResult: Arguments: key: The key of type `H` for which the value is to be retrieved. """ - def get_all(self): """ Returns a Dict containing all the nodes (as keys) and their corresponding values (values) or none. @@ -27,10 +24,8 @@ class AlgorithmResult: Returns: A dict of nodes and their values """ - def get_all_values(self): """Returns a a list of all values""" - def get_all_with_names(self): """ Returns a dict with node names and values @@ -38,7 +33,6 @@ class AlgorithmResult: Returns: a dict with node names and values """ - def group_by(self): """ Groups the `AlgorithmResult` by its values. @@ -47,16 +41,12 @@ class AlgorithmResult: A `HashMap` where keys are unique values from the `AlgorithmResult` and values are vectors containing keys of type `H` that share the same value. """ - def max(self): """Returns a tuple of the max result with its key""" - def median(self): """Returns a tuple of the median result with its key""" - def min(self): """Returns a tuple of the min result with its key""" - def sort_by_node(self, reverse=True): """ Sorts by node id in ascending or descending order. @@ -67,7 +57,6 @@ class AlgorithmResult: Returns: A sorted list of tuples containing node names and values. """ - def sort_by_node_name(self, reverse=True): """ The function `sort_by_node_name` sorts a vector of tuples containing a node and an optional @@ -81,7 +70,6 @@ class AlgorithmResult: Returns: The function sort_by_node_name returns a vector of tuples. Each tuple contains a Node and value """ - def sort_by_value(self, reverse=True): """ Sorts the `AlgorithmResult` by its values in ascending or descending order. @@ -92,7 +80,6 @@ class AlgorithmResult: Returns: A sorted vector of tuples containing keys of type `H` and values of type `Y`. """ - def to_df(self): """ Creates a dataframe from the result @@ -100,10 +87,8 @@ class AlgorithmResult: Returns: A `pandas.DataFrame` containing the result """ - def to_string(self): """Returns a formatted string representation of the algorithm.""" - def top_k(self, k, percentage=False, reverse=True): """ Retrieves the top-k elements from the `AlgorithmResult` based on its values. @@ -125,14 +110,12 @@ class ConstProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """ as_dict() -> dict[str, Any] convert the properties view to a python dict """ - def get(self, key): """ get(key: str) -> Any | None @@ -142,21 +125,18 @@ class ConstProperties: get property value by key (returns `None` if key does not exist) """ - def items(self): """ items() -> list[tuple[str, Any]] lists the property keys together with the corresponding value """ - def keys(self): """ keys() -> list[str] lists the available property keys """ - def values(self): """ values() -> list[Any] @@ -172,7 +152,6 @@ class Edge: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -183,7 +162,6 @@ class Edge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -194,7 +172,6 @@ class Edge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -205,7 +182,6 @@ class Edge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -214,14 +190,12 @@ class Edge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -229,7 +203,6 @@ class Edge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -237,11 +210,9 @@ class Edge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -250,7 +221,6 @@ class Edge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -259,7 +229,6 @@ class Edge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -268,7 +237,6 @@ class Edge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -277,7 +245,6 @@ class Edge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -289,7 +256,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -301,7 +267,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -311,7 +276,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -321,7 +285,6 @@ class Edge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -334,16 +297,11 @@ class Edge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -352,7 +310,6 @@ class Edge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -361,20 +318,15 @@ class Edge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -383,7 +335,6 @@ class Edge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -392,7 +343,6 @@ class Edge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -401,7 +351,6 @@ class Edge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -410,7 +359,6 @@ class Edge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -419,7 +367,6 @@ class Edge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -431,11 +378,9 @@ class Edge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -444,7 +389,6 @@ class Edge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -458,7 +402,6 @@ class Edge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -468,7 +411,6 @@ class Edge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -479,7 +421,6 @@ class Edge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -487,11 +428,9 @@ class Edge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -500,7 +439,6 @@ class Edge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -509,7 +447,6 @@ class Edge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -518,7 +455,6 @@ class Edge: Returns: (int) The time of an exploded edge """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -530,7 +466,6 @@ class Edge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -542,17 +477,15 @@ class Edge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class Edges: """A list of edges that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edges including all events after `start` (exclusive). @@ -563,7 +496,6 @@ class Edges: Returns: A Edges object. """ - def at(self, time): """ Create a view of the Edges including all events at `time`. @@ -574,7 +506,6 @@ class Edges: Returns: A Edges object. """ - def before(self, end): """ Create a view of the Edges including all events before `end` (exclusive). @@ -585,7 +516,6 @@ class Edges: Returns: A Edges object. """ - def collect(self): """ Collect all edges into a list @@ -593,10 +523,8 @@ class Edges: Returns: list[Edge]: the list of edges """ - def count(self): """Returns the number of edges""" - @property def date_time(self): """ @@ -605,14 +533,12 @@ class Edges: Returns: A list of date times. """ - def default_layer(self): """ Return a view of Edges containing only the default edge layer Returns: Edges: The layered view """ - def deletions(self): """ Returns all timestamps of edges where an edge is deleted @@ -620,7 +546,6 @@ class Edges: Returns: A list of lists of unix timestamps """ - def deletions_date_time(self): """ Returns all timestamps of edges where an edge is deleted @@ -628,11 +553,9 @@ class Edges: Returns: A list of lists of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -641,7 +564,6 @@ class Edges: Returns: Earliest date time of the edges. """ - @property def earliest_time(self): """ @@ -650,7 +572,6 @@ class Edges: Returns: Earliest time of the edges. """ - @property def end(self): """ @@ -659,7 +580,6 @@ class Edges: Returns: The latest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def end_date_time(self): """ @@ -668,7 +588,6 @@ class Edges: Returns: The latest datetime that this Edges is valid or None if the Edges is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -680,7 +599,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -692,7 +610,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -702,7 +619,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -712,7 +628,6 @@ class Edges: Returns: Edges: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -725,16 +640,11 @@ class Edges: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edges has the layer `"name"`""" - + """Check if Edges has the layer `"name"`""" def history(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -743,7 +653,6 @@ class Edges: A list of lists unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -752,20 +661,15 @@ class Edges: A list of lists of timestamps. """ - @property def id(self): """Returns all ids of the edges.""" - def is_deleted(self): """Check if the edges are deleted""" - def is_self_loop(self): """Check if the edges are on the same node""" - def is_valid(self): """Check if the edges are valid (i.e. not deleted)""" - @property def latest_date_time(self): """ @@ -774,7 +678,6 @@ class Edges: Returns: Latest date time of the edges. """ - @property def latest_time(self): """ @@ -783,7 +686,6 @@ class Edges: Returns: Latest time of the edges. """ - def layer(self, name): """ Return a view of Edges containing the layer `"name"` @@ -792,7 +694,6 @@ class Edges: Returns: Edges: The layered view """ - @property def layer_name(self): """ @@ -801,7 +702,6 @@ class Edges: Returns: The name of the layer """ - @property def layer_names(self): """ @@ -810,7 +710,6 @@ class Edges: Returns: A list of layer names """ - def layers(self, names): """ Return a view of Edges containing all layers `names` @@ -822,15 +721,12 @@ class Edges: Returns: Edges: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """Returns all properties of the edges""" - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -844,7 +740,6 @@ class Edges: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -854,7 +749,6 @@ class Edges: Returns: A Edges object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -865,7 +759,6 @@ class Edges: Returns: A Edges object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -873,11 +766,9 @@ class Edges: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -886,7 +777,6 @@ class Edges: Returns: The earliest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def start_date_time(self): """ @@ -895,7 +785,6 @@ class Edges: Returns: The earliest datetime that this Edges is valid or None if the Edges is valid for all times. """ - @property def time(self): """ @@ -904,8 +793,9 @@ class Edges: Returns: Time of edge """ - - def to_df(self, include_property_history=True, convert_datetime=False, explode=False): + def to_df( + self, include_property_history=True, convert_datetime=False, explode=False + ): """ Converts the graph's edges into a Pandas DataFrame. @@ -924,7 +814,6 @@ class Edges: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - def valid_layers(self, names): """ Return a view of Edges containing all layers `names` @@ -936,7 +825,6 @@ class Edges: Returns: Edges: The layered view """ - def window(self, start, end): """ Create a view of the Edges including all events between `start` (inclusive) and `end` (exclusive) @@ -948,17 +836,15 @@ class Edges: Returns: r A Edges object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edges""" + """Get the window size (difference between start and end) for this Edges""" class Graph: """A temporal graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -969,7 +855,6 @@ class Graph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -984,7 +869,6 @@ class Graph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -997,7 +881,6 @@ class Graph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -1009,7 +892,6 @@ class Graph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -1020,7 +902,6 @@ class Graph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -1031,7 +912,6 @@ class Graph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -1042,10 +922,8 @@ class Graph: Returns: A GraphView object. """ - def bincode(self): """Get bincode encoded graph""" - def count_edges(self): """ Number of edges in the graph @@ -1053,7 +931,6 @@ class Graph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -1061,7 +938,6 @@ class Graph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -1069,14 +945,12 @@ class Graph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - @property def earliest_date_time(self): """ @@ -1085,7 +959,6 @@ class Graph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -1094,7 +967,6 @@ class Graph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -1106,7 +978,6 @@ class Graph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -1115,7 +986,6 @@ class Graph: Returns: the edges in the graph """ - @property def end(self): """ @@ -1124,7 +994,6 @@ class Graph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -1133,7 +1002,6 @@ class Graph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1145,7 +1013,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1157,7 +1024,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -1168,7 +1034,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1178,7 +1043,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1188,7 +1052,6 @@ class Graph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1201,7 +1064,6 @@ class Graph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -1210,7 +1072,6 @@ class Graph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -1219,7 +1080,6 @@ class Graph: Returns: the nodes that match the properties name and value """ - def get_all_node_types(self): """ Returns all the node types in the graph. @@ -1227,7 +1087,6 @@ class Graph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -1239,10 +1098,8 @@ class Graph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -1253,7 +1110,6 @@ class Graph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -1269,7 +1125,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -1285,7 +1140,6 @@ class Graph: Returns: Result), GraphError> - A Result object which is Ok if the edges were successfully imported, and Err otherwise. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -1300,7 +1154,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -1316,7 +1169,6 @@ class Graph: Returns: Result), GraphError> - A Result object which is Ok if the nodes were successfully imported, and Err otherwise. """ - def index(self): """ Indexes all node and edge properties. @@ -1326,7 +1178,6 @@ class Graph: Returns: GraphIndex - Returns a GraphIndex """ - def largest_connected_component(self): """ Gives the large connected component of a graph. @@ -1338,7 +1189,6 @@ class Graph: A raphtory graph, which essentially is a sub-graph of the graph `g` """ - @property def latest_date_time(self): """ @@ -1347,7 +1197,6 @@ class Graph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -1356,7 +1205,6 @@ class Graph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -1365,7 +1213,6 @@ class Graph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1377,8 +1224,16 @@ class Graph: Returns: GraphView: The layered view """ - - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_pandas( + self, + df, + src, + dst, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edge properties from a Pandas DataFrame. @@ -1394,8 +1249,16 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edge properties from parquet file @@ -1411,8 +1274,18 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_pandas( + self, + df, + src, + dst, + time, + properties=None, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edges from a Pandas DataFrame into the graph. @@ -1430,8 +1303,18 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_parquet( + self, + parquet_path, + src, + dst, + time, + properties=None, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edges from a Parquet file into the graph. @@ -1449,7 +1332,6 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - @staticmethod def load_from_file(path, force=False): """ @@ -1461,9 +1343,26 @@ class Graph: Returns: Graph: The loaded graph. """ - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): + def load_from_pandas( + edge_df, + edge_src, + edge_dst, + edge_time, + edge_properties=None, + edge_const_properties=None, + edge_shared_const_properties=None, + edge_layer=None, + layer_in_df=True, + node_df=None, + node_id=None, + node_time=None, + node_properties=None, + node_const_properties=None, + node_shared_const_properties=None, + node_type=None, + node_type_in_df=True, + ): """ Load a graph from a Pandas DataFrame. @@ -1489,9 +1388,26 @@ class Graph: Returns: Graph: The loaded Graph object. """ - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): + def load_from_parquet( + edge_parquet_path, + edge_src, + edge_dst, + edge_time, + edge_properties=None, + edge_const_properties=None, + edge_shared_const_properties=None, + edge_layer=None, + layer_in_df=True, + node_parquet_path=None, + node_id=None, + node_time=None, + node_properties=None, + node_const_properties=None, + node_shared_const_properties=None, + node_type=None, + node_type_in_df=True, + ): """ Load a graph from Parquet file. @@ -1517,8 +1433,9 @@ class Graph: Returns: Graph: The loaded Graph object. """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, df, id, const_properties=None, shared_const_properties=None + ): """ Load node properties from a Pandas DataFrame. @@ -1531,8 +1448,9 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_parquet( + self, parquet_path, id, const_properties=None, shared_const_properties=None + ): """ Load node properties from a parquet file. @@ -1545,8 +1463,17 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + def load_nodes_from_pandas( + self, + df, + id, + time, + node_type=None, + node_type_in_df=True, + properties=None, + const_properties=None, + shared_const_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. @@ -1562,8 +1489,17 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + def load_nodes_from_parquet( + self, + parquet_path, + id, + time, + node_type=None, + node_type_in_df=True, + properties=None, + const_properties=None, + shared_const_properties=None, + ): """ Load nodes from a Parquet file into the graph. @@ -1579,7 +1515,6 @@ class Graph: Returns: Result<(), GraphError>: Result of the operation. """ - def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -1587,7 +1522,6 @@ class Graph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -1598,7 +1532,6 @@ class Graph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -1607,10 +1540,8 @@ class Graph: Returns: the nodes in the graph """ - def persistent_graph(self): """Get persistent graph""" - @property def properties(self): """ @@ -1620,7 +1551,6 @@ class Graph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -1634,7 +1564,6 @@ class Graph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ Saves the graph to the given path. @@ -1645,7 +1574,6 @@ class Graph: Returns: None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -1655,7 +1583,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -1666,7 +1593,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -1674,7 +1600,6 @@ class Graph: Arguments: """ - @property def start(self): """ @@ -1683,7 +1608,6 @@ class Graph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -1692,7 +1616,6 @@ class Graph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -1703,7 +1626,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -1714,8 +1636,14 @@ class Graph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -1733,8 +1661,18 @@ class Graph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs, + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -1758,11 +1696,9 @@ class Graph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -1773,7 +1709,6 @@ class Graph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1785,8 +1720,16 @@ class Graph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -1801,7 +1744,6 @@ class Graph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -1813,10 +1755,9 @@ class Graph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" + """Get the window size (difference between start and end) for this GraphView""" class GraphIndex: """ @@ -1827,8 +1768,9 @@ class GraphIndex: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - - def fuzzy_search_edges(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_edges( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for edges which match the given query. This uses Tantivy's fuzzy search. @@ -1842,8 +1784,9 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - - def fuzzy_search_nodes(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_nodes( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for nodes which match the given query. This uses Tantivy's fuzzy search. If you would like to better understand the query syntax, please visit our documentation at https://docs.raphtory.com @@ -1858,7 +1801,6 @@ class GraphIndex: Returns: A list of nodes which match the query. The list will be empty if no nodes match. """ - def search_edges(self, query, limit=25, offset=0): """ Searches for edges which match the given query. This uses Tantivy's exact search. @@ -1871,7 +1813,6 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - def search_nodes(self, query, limit=25, offset=0): """ Searches for nodes which match the given query. This uses Tantivy's exact search. @@ -1886,10 +1827,8 @@ class GraphIndex: """ class MutableEdge: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties, layer=None): """ Add constant properties to an edge in the graph. @@ -1905,7 +1844,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None, layer=None): """ Add updates to an edge in the graph at a specified time. @@ -1920,7 +1858,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -1931,7 +1868,6 @@ class MutableEdge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -1942,7 +1878,6 @@ class MutableEdge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -1953,7 +1888,6 @@ class MutableEdge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -1962,14 +1896,12 @@ class MutableEdge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -1977,7 +1909,6 @@ class MutableEdge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -1985,11 +1916,9 @@ class MutableEdge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -1998,7 +1927,6 @@ class MutableEdge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -2007,7 +1935,6 @@ class MutableEdge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -2016,7 +1943,6 @@ class MutableEdge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -2025,7 +1951,6 @@ class MutableEdge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2037,7 +1962,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2049,7 +1973,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2059,7 +1982,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2069,7 +1991,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2082,16 +2003,11 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2100,7 +2016,6 @@ class MutableEdge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2109,20 +2024,15 @@ class MutableEdge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -2131,7 +2041,6 @@ class MutableEdge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -2140,7 +2049,6 @@ class MutableEdge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -2149,7 +2057,6 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -2158,7 +2065,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -2167,7 +2073,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2179,11 +2084,9 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -2192,7 +2095,6 @@ class MutableEdge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2206,7 +2108,6 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2216,7 +2117,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2227,7 +2127,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2235,11 +2134,9 @@ class MutableEdge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -2248,7 +2145,6 @@ class MutableEdge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -2257,7 +2153,6 @@ class MutableEdge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -2266,7 +2161,6 @@ class MutableEdge: Returns: (int) The time of an exploded edge """ - def update_constant_properties(self, properties, layer=None): """ Update constant properties of an edge in the graph overwriting existing values. @@ -2282,7 +2176,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2294,7 +2187,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -2306,16 +2198,13 @@ class MutableEdge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class MutableNode: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Add constant properties to a node in the graph. @@ -2330,7 +2219,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None): """ Add updates to a node in the graph at a specified time. @@ -2345,7 +2233,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2356,7 +2243,6 @@ class MutableNode: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2367,7 +2253,6 @@ class MutableNode: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2378,14 +2263,12 @@ class MutableNode: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2393,7 +2276,6 @@ class MutableNode: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2402,7 +2284,6 @@ class MutableNode: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2411,7 +2292,6 @@ class MutableNode: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2421,7 +2301,6 @@ class MutableNode: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2430,7 +2309,6 @@ class MutableNode: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2439,7 +2317,6 @@ class MutableNode: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2451,7 +2328,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2463,7 +2339,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2473,7 +2348,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2483,7 +2357,6 @@ class MutableNode: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2496,10 +2369,8 @@ class MutableNode: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2507,7 +2378,6 @@ class MutableNode: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2516,7 +2386,6 @@ class MutableNode: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2526,7 +2395,6 @@ class MutableNode: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -2534,7 +2402,6 @@ class MutableNode: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -2544,7 +2411,6 @@ class MutableNode: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -2554,7 +2420,6 @@ class MutableNode: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -2566,7 +2431,6 @@ class MutableNode: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -2575,7 +2439,6 @@ class MutableNode: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -2584,7 +2447,6 @@ class MutableNode: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -2596,7 +2458,6 @@ class MutableNode: Returns: Node: The layered view """ - @property def name(self): """ @@ -2605,7 +2466,6 @@ class MutableNode: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -2615,11 +2475,9 @@ class MutableNode: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -2627,7 +2485,6 @@ class MutableNode: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -2637,7 +2494,6 @@ class MutableNode: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -2647,7 +2503,6 @@ class MutableNode: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -2656,7 +2511,6 @@ class MutableNode: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2670,7 +2524,6 @@ class MutableNode: Returns: A `WindowSet` object. """ - def set_node_type(self, new_type): """ Set the type on the node. This only works if the type has not been previously set, otherwise will @@ -2682,7 +2535,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2692,7 +2544,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2703,7 +2554,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2711,7 +2561,6 @@ class MutableNode: Arguments: """ - @property def start(self): """ @@ -2720,7 +2569,6 @@ class MutableNode: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -2729,7 +2577,6 @@ class MutableNode: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def update_constant_properties(self, properties): """ Update constant properties of a node in the graph overwriting existing values. @@ -2744,7 +2591,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -2756,7 +2602,6 @@ class MutableNode: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -2768,17 +2613,15 @@ class MutableNode: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Node: """A node (or node) in the graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2789,7 +2632,6 @@ class Node: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2800,7 +2642,6 @@ class Node: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2811,14 +2652,12 @@ class Node: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2826,7 +2665,6 @@ class Node: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2835,7 +2673,6 @@ class Node: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2844,7 +2681,6 @@ class Node: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2854,7 +2690,6 @@ class Node: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2863,7 +2698,6 @@ class Node: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2872,7 +2706,6 @@ class Node: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2884,7 +2717,6 @@ class Node: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2896,7 +2728,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2906,7 +2737,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2916,7 +2746,6 @@ class Node: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2929,10 +2758,8 @@ class Node: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2940,7 +2767,6 @@ class Node: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2949,7 +2775,6 @@ class Node: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2959,7 +2784,6 @@ class Node: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -2967,7 +2791,6 @@ class Node: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -2977,7 +2800,6 @@ class Node: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -2987,7 +2809,6 @@ class Node: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -2999,7 +2820,6 @@ class Node: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -3008,7 +2828,6 @@ class Node: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -3017,7 +2836,6 @@ class Node: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -3029,7 +2847,6 @@ class Node: Returns: Node: The layered view """ - @property def name(self): """ @@ -3038,7 +2855,6 @@ class Node: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -3048,11 +2864,9 @@ class Node: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -3060,7 +2874,6 @@ class Node: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -3070,7 +2883,6 @@ class Node: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3080,7 +2892,6 @@ class Node: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3089,7 +2900,6 @@ class Node: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3103,7 +2913,6 @@ class Node: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3113,7 +2922,6 @@ class Node: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3124,7 +2932,6 @@ class Node: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3132,7 +2939,6 @@ class Node: Arguments: """ - @property def start(self): """ @@ -3141,7 +2947,6 @@ class Node: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -3150,7 +2955,6 @@ class Node: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -3162,7 +2966,6 @@ class Node: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -3174,17 +2977,15 @@ class Node: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Nodes: """A list of nodes that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Nodes including all events after `start` (exclusive). @@ -3195,7 +2996,6 @@ class Nodes: Returns: A Nodes object. """ - def at(self, time): """ Create a view of the Nodes including all events at `time`. @@ -3206,7 +3006,6 @@ class Nodes: Returns: A Nodes object. """ - def before(self, end): """ Create a view of the Nodes including all events before `end` (exclusive). @@ -3217,7 +3016,6 @@ class Nodes: Returns: A Nodes object. """ - def collect(self): """ Collect all nodes into a list @@ -3225,14 +3023,12 @@ class Nodes: Returns: list[Node]: the list of nodes """ - def default_layer(self): """ Return a view of Nodes containing only the default edge layer Returns: Nodes: The layered view """ - def degree(self): """ Returns the number of edges of the nodes @@ -3240,7 +3036,6 @@ class Nodes: Returns: An iterator of the number of edges of the nodes """ - @property def earliest_date_time(self): """ @@ -3249,11 +3044,9 @@ class Nodes: Returns: Earliest time of the nodes. """ - @property def earliest_time(self): """Returns an iterator over the nodes earliest time""" - @property def edges(self): """ @@ -3263,7 +3056,6 @@ class Nodes: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -3272,7 +3064,6 @@ class Nodes: Returns: The latest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def end_date_time(self): """ @@ -3281,7 +3072,6 @@ class Nodes: Returns: The latest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3293,7 +3083,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3305,7 +3094,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3315,7 +3103,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3325,7 +3112,6 @@ class Nodes: Returns: Nodes: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3338,10 +3124,8 @@ class Nodes: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Nodes has the layer `"name"`""" - + """Check if Nodes has the layer `"name"`""" def history(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3350,7 +3134,6 @@ class Nodes: A list of unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3359,11 +3142,9 @@ class Nodes: An list of timestamps. """ - @property def id(self): """Returns an iterator over the nodes ids""" - def in_degree(self): """ Returns the number of in edges of the nodes @@ -3371,7 +3152,6 @@ class Nodes: Returns: An iterator of the number of in edges of the nodes """ - @property def in_edges(self): """ @@ -3381,7 +3161,6 @@ class Nodes: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -3391,7 +3170,6 @@ class Nodes: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -3400,11 +3178,9 @@ class Nodes: Returns: Latest date time of the nodes. """ - @property def latest_time(self): """Returns an iterator over the nodes latest time""" - def layer(self, name): """ Return a view of Nodes containing the layer `"name"` @@ -3413,7 +3189,6 @@ class Nodes: Returns: Nodes: The layered view """ - def layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3425,11 +3200,9 @@ class Nodes: Returns: Nodes: The layered view """ - @property def name(self): """Returns an iterator over the nodes name""" - @property def neighbours(self): """ @@ -3439,11 +3212,9 @@ class Nodes: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Returns the number of out edges of the nodes @@ -3451,7 +3222,6 @@ class Nodes: Returns: An iterator of the number of out edges of the nodes """ - @property def out_edges(self): """ @@ -3461,7 +3231,6 @@ class Nodes: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3471,7 +3240,6 @@ class Nodes: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3480,7 +3248,6 @@ class Nodes: Returns: A List of properties """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3494,7 +3261,6 @@ class Nodes: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3504,7 +3270,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3515,7 +3280,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3523,7 +3287,6 @@ class Nodes: Arguments: """ - @property def start(self): """ @@ -3532,7 +3295,6 @@ class Nodes: Returns: The earliest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def start_date_time(self): """ @@ -3541,7 +3303,6 @@ class Nodes: Returns: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df(self, include_property_history=False, convert_datetime=False): """ Converts the graph's nodes into a Pandas DataFrame. @@ -3558,10 +3319,7 @@ class Nodes: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - - def type_filter(self, node_types): - ... - + def type_filter(self, node_types): ... def valid_layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3573,7 +3331,6 @@ class Nodes: Returns: Nodes: The layered view """ - def window(self, start, end): """ Create a view of the Nodes including all events between `start` (inclusive) and `end` (exclusive) @@ -3585,17 +3342,15 @@ class Nodes: Returns: r A Nodes object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Nodes""" + """Get the window size (difference between start and end) for this Nodes""" class PersistentGraph: """A temporal graph that allows edges and nodes to be deleted.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -3606,7 +3361,6 @@ class PersistentGraph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -3621,7 +3375,6 @@ class PersistentGraph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -3635,7 +3388,6 @@ class PersistentGraph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -3647,7 +3399,6 @@ class PersistentGraph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -3658,7 +3409,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -3669,7 +3419,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -3680,10 +3429,8 @@ class PersistentGraph: Returns: A GraphView object. """ - def bincode(self): """Get bincode encoded graph""" - def count_edges(self): """ Number of edges in the graph @@ -3691,7 +3438,6 @@ class PersistentGraph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -3699,7 +3445,6 @@ class PersistentGraph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -3707,14 +3452,12 @@ class PersistentGraph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - def delete_edge(self, timestamp, src, dst, layer=None): """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -3728,7 +3471,6 @@ class PersistentGraph: Returns: None or a GraphError if the edge could not be deleted """ - @property def earliest_date_time(self): """ @@ -3737,7 +3479,6 @@ class PersistentGraph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -3746,7 +3487,6 @@ class PersistentGraph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -3758,7 +3498,6 @@ class PersistentGraph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -3767,7 +3506,6 @@ class PersistentGraph: Returns: the edges in the graph """ - @property def end(self): """ @@ -3776,7 +3514,6 @@ class PersistentGraph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -3785,10 +3522,8 @@ class PersistentGraph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def event_graph(self): """Get event graph""" - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3800,7 +3535,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3812,7 +3546,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -3823,7 +3556,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3833,7 +3565,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3843,7 +3574,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3856,7 +3586,6 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -3865,7 +3594,6 @@ class PersistentGraph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -3874,7 +3602,6 @@ class PersistentGraph: Returns: the nodes that match the properties name and value """ - def get_all_node_types(self): """ Returns all the node types in the graph. @@ -3882,7 +3609,6 @@ class PersistentGraph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -3894,10 +3620,8 @@ class PersistentGraph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -3908,7 +3632,6 @@ class PersistentGraph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -3924,7 +3647,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -3940,7 +3662,6 @@ class PersistentGraph: Returns: Result), GraphError> - A Result object which is Ok if the edges were successfully imported, and Err otherwise. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -3955,7 +3676,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -3971,7 +3691,6 @@ class PersistentGraph: Returns: Result), GraphError> - A Result object which is Ok if the nodes were successfully imported, and Err otherwise. """ - def index(self): """ Indexes all node and edge properties. @@ -3981,7 +3700,6 @@ class PersistentGraph: Returns: GraphIndex - Returns a GraphIndex """ - @property def latest_date_time(self): """ @@ -3990,7 +3708,6 @@ class PersistentGraph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -3999,7 +3716,6 @@ class PersistentGraph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -4008,7 +3724,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4020,8 +3735,16 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_pandas( + self, + df, + src, + dst, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edge properties from a Pandas DataFrame. @@ -4037,8 +3760,16 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edge properties from parquet file @@ -4054,8 +3785,9 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_deletions_from_pandas(self, df, src, dst, time, layer=None, layer_in_df=True): + def load_edges_deletions_from_pandas( + self, df, src, dst, time, layer=None, layer_in_df=True + ): """ Load edges deletions from a Pandas DataFrame into the graph. @@ -4070,8 +3802,9 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_deletions_from_parquet(self, parquet_path, src, dst, time, layer=None, layer_in_df=True): + def load_edges_deletions_from_parquet( + self, parquet_path, src, dst, time, layer=None, layer_in_df=True + ): """ Load edges deletions from a Parquet file into the graph. @@ -4086,8 +3819,18 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_pandas( + self, + df, + src, + dst, + time, + properties=None, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edges from a Pandas DataFrame into the graph. @@ -4105,8 +3848,18 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_parquet( + self, + parquet_path, + src, + dst, + time, + properties=None, + const_properties=None, + shared_const_properties=None, + layer=None, + layer_in_df=True, + ): """ Load edges from a Parquet file into the graph. @@ -4124,7 +3877,6 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - @staticmethod def load_from_file(path, force=False): """ @@ -4136,9 +3888,26 @@ class PersistentGraph: Returns: Graph: The loaded graph. """ - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): + def load_from_pandas( + edge_df, + edge_src, + edge_dst, + edge_time, + edge_properties=None, + edge_const_properties=None, + edge_shared_const_properties=None, + edge_layer=None, + layer_in_df=True, + node_df=None, + node_id=None, + node_time=None, + node_properties=None, + node_const_properties=None, + node_shared_const_properties=None, + node_type=None, + node_type_in_df=True, + ): """ Load a graph from a Pandas DataFrame. @@ -4164,9 +3933,26 @@ class PersistentGraph: Returns: Graph: The loaded Graph object. """ - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): + def load_from_parquet( + edge_parquet_path, + edge_src, + edge_dst, + edge_time, + edge_properties=None, + edge_const_properties=None, + edge_shared_const_properties=None, + edge_layer=None, + layer_in_df=True, + node_parquet_path=None, + node_id=None, + node_time=None, + node_properties=None, + node_const_properties=None, + node_shared_const_properties=None, + node_type=None, + node_type_in_df=True, + ): """ Load a graph from Parquet file. @@ -4192,8 +3978,9 @@ class PersistentGraph: Returns: Graph: The loaded Graph object. """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, df, id, const_properties=None, shared_const_properties=None + ): """ Load node properties from a Pandas DataFrame. @@ -4206,8 +3993,9 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_parquet( + self, parquet_path, id, const_properties=None, shared_const_properties=None + ): """ Load node properties from a parquet file. @@ -4220,8 +4008,17 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + def load_nodes_from_pandas( + self, + df, + id, + time, + node_type=None, + node_type_in_df=True, + properties=None, + const_properties=None, + shared_const_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. @@ -4237,8 +4034,17 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + def load_nodes_from_parquet( + self, + parquet_path, + id, + time, + node_type=None, + node_type_in_df=True, + properties=None, + const_properties=None, + shared_const_properties=None, + ): """ Load nodes from a Parquet file into the graph. @@ -4254,7 +4060,6 @@ class PersistentGraph: Returns: Result<(), GraphError>: Result of the operation. """ - def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -4262,7 +4067,6 @@ class PersistentGraph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -4273,7 +4077,6 @@ class PersistentGraph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -4282,7 +4085,6 @@ class PersistentGraph: Returns: the nodes in the graph """ - @property def properties(self): """ @@ -4292,7 +4094,6 @@ class PersistentGraph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -4306,7 +4107,6 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ Saves the graph to the given path. @@ -4317,7 +4117,6 @@ class PersistentGraph: Returns: None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -4327,7 +4126,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -4338,7 +4136,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -4346,7 +4143,6 @@ class PersistentGraph: Arguments: """ - @property def start(self): """ @@ -4355,7 +4151,6 @@ class PersistentGraph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -4364,7 +4159,6 @@ class PersistentGraph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -4375,7 +4169,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -4386,8 +4179,14 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -4405,8 +4204,18 @@ class PersistentGraph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs, + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -4430,11 +4239,9 @@ class PersistentGraph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -4445,7 +4252,6 @@ class PersistentGraph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4457,8 +4263,16 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -4473,7 +4287,6 @@ class PersistentGraph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -4485,24 +4298,20 @@ class PersistentGraph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" + """Get the window size (difference between start and end) for this GraphView""" class Properties: """A view of the properties of an entity""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """Convert properties view to a dict""" - @property def constant(self): """Get a view of the constant properties (meta-data) only.""" - def get(self, key): """ Get property value. @@ -4510,17 +4319,13 @@ class Properties: First searches temporal properties and returns latest value if it exists. If not, it falls back to static properties. """ - def items(self): """Get a list of key-value pairs""" - def keys(self): """Get the names for all properties (includes temporal and static properties)""" - @property def temporal(self): """Get a view of the temporal properties only.""" - def values(self): """ Get the values of the properties @@ -4534,12 +4339,9 @@ class PyDirection: def __init__(self, direction): """Initialize self. See help(type(self)) for accurate signature.""" - - def as_str(self): - ... + def as_str(self): ... class PyGraphEncoder: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" @@ -4548,10 +4350,8 @@ class TemporalProp: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def at(self, t): """Get the value of the property at time `t`""" - def average(self): """ Compute the average of all property values. Alias for mean(). @@ -4559,7 +4359,6 @@ class TemporalProp: Returns: Prop: The average of each property values, or None if count is zero. """ - def count(self): """ Count the number of properties. @@ -4567,19 +4366,14 @@ class TemporalProp: Returns: int: The number of properties. """ - def history(self): """Get the timestamps at which the property was updated""" - def history_date_time(self): """Get the timestamps at which the property was updated""" - def items(self): """List update timestamps and corresponding property values""" - def items_date_time(self): """List update timestamps and corresponding property values""" - def max(self): """ Find the maximum property value and its associated time. @@ -4587,7 +4381,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the maximum property value. """ - def mean(self): """ Compute the mean of all property values. Alias for mean(). @@ -4595,7 +4388,6 @@ class TemporalProp: Returns: Prop: The mean of each property values, or None if count is zero. """ - def median(self): """ Compute the median of all property values. @@ -4603,7 +4395,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the median property value, or None if empty """ - def min(self): """ Find the minimum property value and its associated time. @@ -4611,10 +4402,7 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the minimum property value. """ - - def ordered_dedupe(self, latest_time): - ... - + def ordered_dedupe(self, latest_time): ... def sum(self): """ Compute the sum of all property values. @@ -4622,13 +4410,9 @@ class TemporalProp: Returns: Prop: The sum of all property values. """ - - def unique(self): - ... - + def unique(self): ... def value(self): """Get the latest value of the property""" - def values(self): """Get the property values for each update""" @@ -4637,7 +4421,6 @@ class TemporalProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ get(key: str) -> Optional[TemporalProp] @@ -4647,7 +4430,6 @@ class TemporalProperties: Returns: the property view if it exists, otherwise `None` """ - def histories(self): """ Get the histories of all properties @@ -4655,7 +4437,6 @@ class TemporalProperties: Returns: dict[str, list[(int, Any)]]: the mapping of property keys to histories """ - def histories_date_time(self): """ Get the histories of all properties @@ -4663,13 +4444,10 @@ class TemporalProperties: Returns: dict[str, list[(datetime, Any)]]: the mapping of property keys to histories """ - def items(self): """List the property keys together with the corresponding values""" - def keys(self): """List the available property keys""" - def latest(self): """ Get the latest value of all properties @@ -4677,7 +4455,6 @@ class TemporalProperties: Returns: dict[str, Any]: the mapping of property keys to latest values """ - def values(self): """ List the values of the properties diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index 0466b74dde..ddd851779c 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -68,7 +68,9 @@ def betweenness_centrality(g, k=None, normalized=True): AlgorithmResult[float]: Returns an `AlgorithmResult` containing the betweenness centrality of each node. """ -def cohesive_fruchterman_reingold(graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1): +def cohesive_fruchterman_reingold( + graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1 +): """Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes""" def degree_centrality(g, threads=None): @@ -85,7 +87,9 @@ def degree_centrality(g, threads=None): AlgorithmResult>: A result containing a mapping of node names to the computed sum of their associated degree centrality. """ -def dijkstra_single_source_shortest_paths(g, source, targets, direction=..., weight=...): +def dijkstra_single_source_shortest_paths( + g, source, targets, direction=..., weight=... +): """ Finds the shortest paths from a single source to multiple targets in a graph. @@ -115,7 +119,9 @@ def directed_graph_density(g): float : Directed graph density of G. """ -def fruchterman_reingold(graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1): +def fruchterman_reingold( + graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1 +): """ Fruchterman Reingold layout algorithm @@ -446,7 +452,15 @@ def strongly_connected_components(g): Vec> : List of strongly connected nodes identified by ids """ -def temporal_SEIR(graph, seeds, infection_prob, initial_infection, recovery_rate=None, incubation_rate=None, rng_seed=None): +def temporal_SEIR( + graph, + seeds, + infection_prob, + initial_infection, + recovery_rate=None, + incubation_rate=None, + rng_seed=None, +): """ Simulate an SEIR dynamic on the network @@ -470,7 +484,7 @@ def temporal_SEIR(graph, seeds, infection_prob, initial_infection, recovery_rate Returns: AlgorithmResult[Infected]: Returns an `Infected` object for each infected node with attributes - + `infected`: the time stamp of the infection event `active`: the time stamp at which the node actively starts spreading the infection (i.e., the end of the incubation period) diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index 63b6858373..9232d66a23 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -54,9 +54,7 @@ def lotr_graph(): A Graph containing the LOTR dataset """ -def neo4j_movie_graph(uri, username, password, database=...): - ... - +def neo4j_movie_graph(uri, username, password, database=...): ... def reddit_hyperlink_graph(timeout_seconds=600): """ Load (a subset of) Reddit hyperlinks dataset into a graph. @@ -96,8 +94,5 @@ def reddit_hyperlink_graph(timeout_seconds=600): A Graph containing the Reddit hyperlinks dataset """ -def reddit_hyperlink_graph_local(file_path): - ... - -def stable_coin_graph(path=None, subset=None): - ... +def reddit_hyperlink_graph_local(file_path): ... +def stable_coin_graph(path=None, subset=None): ... diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index d795c6ed6b..96ad675978 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -15,10 +15,8 @@ class GraphqlGraphs: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, name): """Return the `VectorisedGraph` with name `name` or `None` if it doesn't exist""" - def search_graph_documents(self, query, limit, window): """ Return the top documents with the smallest cosine distance to `query` @@ -31,7 +29,6 @@ class GraphqlGraphs: # Returns A list of documents """ - def search_graph_documents_with_scores(self, query, limit, window): """Same as `search_graph_documents` but it also returns the scores alongside the documents""" @@ -40,7 +37,6 @@ class RaphtoryClient: def __init__(self, url): """Initialize self. See help(type(self)) for accurate signature.""" - def load_graphs_from_path(self, path, overwrite=False): """ Set the server to load all the graphs from its path `path`. @@ -52,7 +48,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response after executing the mutation. """ - def query(self, query, variables=None): """ Make a graphQL query against the server. @@ -64,7 +59,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response. """ - def send_graph(self, name, graph): """ Send a graph to the server. @@ -76,7 +70,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response after executing the mutation. """ - def wait_for_online(self, millis=None): """ Wait for the server to be online. @@ -90,7 +83,6 @@ class RaphtoryServer: def __init__(self, graphs=None, graph_dir=None): """Initialize self. See help(type(self)) for accurate signature.""" - def run(self, port=1736, log_level=..., enable_tracing=False, enable_auth=False): """ Run the server until completion. @@ -98,7 +90,6 @@ class RaphtoryServer: Arguments: * `port`: the port to use (defaults to 1736). """ - def start(self, port=1736, log_level=..., enable_tracing=False, enable_auth=False): """ Start the server and return a handle to it. @@ -106,7 +97,6 @@ class RaphtoryServer: Arguments: * `port`: the port to use (defaults to 1736). """ - def with_document_search_function(self, name, input, function): """ Register a function in the GraphQL schema for document search over a graph. @@ -124,7 +114,6 @@ class RaphtoryServer: Returns: A new server object containing the vectorised graphs. """ - def with_global_search_function(self, name, input, function): """ Register a function in the GraphQL schema for document search among all the graphs. @@ -142,8 +131,15 @@ class RaphtoryServer: Returns: A new server object containing the vectorised graphs. """ - - def with_vectorised(self, cache, graph_names=None, embedding=None, graph_document=None, node_document=None, edge_document=None): + def with_vectorised( + self, + cache, + graph_names=None, + embedding=None, + graph_document=None, + node_document=None, + edge_document=None, + ): """ Vectorise a subset of the graphs of the server. @@ -168,7 +164,6 @@ class RunningRaphtoryServer: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def load_graphs_from_path(self, path, overwrite=False): """ Set the server to load all the graphs from its path `path`. @@ -180,7 +175,6 @@ class RunningRaphtoryServer: Returns: The `data` field from the graphQL response after executing the mutation. """ - def query(self, query, variables=None): """ Make a graphQL query against the server. @@ -192,7 +186,6 @@ class RunningRaphtoryServer: Returns: The `data` field from the graphQL response. """ - def send_graph(self, name, graph): """ Send a graph to the server. @@ -204,13 +197,10 @@ class RunningRaphtoryServer: Returns: The `data` field from the graphQL response after executing the mutation. """ - def stop(self): """Stop the server.""" - def wait(self): """Wait until server completion.""" - def wait_for_online(self, timeout_millis=None): """ Wait for the server to be online. diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index 6f42e39246..9e09acbe87 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -8,27 +8,18 @@ ############################################################################### class Document: - def __init__(self, content, life=None): """Initialize self. See help(type(self)) for accurate signature.""" - @property - def content(self): - ... - + def content(self): ... @property - def entity(self): - ... - + def entity(self): ... @property - def life(self): - ... + def life(self): ... class VectorisedGraph: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def append(self, nodes, edges): """ Add all the documents from `nodes` and `edges` to the current selection @@ -42,7 +33,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_by_similarity(self, query, limit, window=None): """ Add the top `limit` documents to the current selection using `query` @@ -55,7 +45,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_edges(self, edges): """ Add all the documents from `edges` to the current selection @@ -68,7 +57,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_edges_by_similarity(self, query, limit, window=None): """ Add the top `limit` edge documents to the current selection using `query` @@ -81,7 +69,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_nodes(self, nodes): """ Add all the documents from `nodes` to the current selection @@ -94,7 +81,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_nodes_by_similarity(self, query, limit, window=None): """ Add the top `limit` node documents to the current selection using `query` @@ -107,10 +93,8 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def edges(self): """Return the edges present in the current selection""" - def expand(self, hops, window=None): """ Add all the documents `hops` hops away to the selection @@ -127,7 +111,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent documents with higher score for `query` to the selection @@ -148,7 +131,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_edges_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent edge documents with higher score for `query` to the selection @@ -163,7 +145,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_nodes_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent node documents with higher score for `query` to the selection @@ -178,18 +159,13 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def get_documents(self): """Return the documents present in the current selection""" - def get_documents_with_scores(self): """Return the documents alongside their scores present in the current selection""" - def nodes(self): """Return the nodes present in the current selection""" - def save_embeddings(self, file): """Save the embeddings present in this graph to `file` so they can be further used in a call to `vectorise`""" -def generate_property_list(entity, filter_out=..., force_static=...): - ... +def generate_property_list(entity, filter_out=..., force_static=...): ... diff --git a/python/tests/test_disk_graph.py b/python/tests/test_disk_graph.py index eef688abbb..37b4e88269 100644 --- a/python/tests/test_disk_graph.py +++ b/python/tests/test_disk_graph.py @@ -38,9 +38,12 @@ # in every test use with to create a temporary directory that will be deleted automatically # after the with block ends + def test_counts(): graph_dir = tempfile.TemporaryDirectory() - graph = DiskGraphStorage.load_from_pandas(graph_dir.name, edges, "time", "src", "dst") + graph = DiskGraphStorage.load_from_pandas( + graph_dir.name, edges, "time", "src", "dst" + ) graph = graph.to_events() assert graph.count_nodes() == 5 assert graph.count_edges() == 20 @@ -169,7 +172,7 @@ def test_disk_graph_type_filter(): read_chunk_size, concurrent_files, num_threads, - "node_type" + "node_type", ).to_events() assert g.count_nodes() == 1619 @@ -187,14 +190,20 @@ def test_disk_graph_type_filter(): assert g.nodes.type_filter([]).name.collect() == [] - neighbor_names = g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect() + neighbor_names = ( + g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect() + ) total_length = sum(len(names) for names in neighbor_names) assert total_length == 1023 - assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == ["Comp844043"] + assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == [ + "Comp844043" + ] assert g.node("Comp175846").neighbours.type_filter(["B"]).name.collect() == [] assert g.node("Comp175846").neighbours.type_filter([]).name.collect() == [] - assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == ["Comp844043"] + assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == [ + "Comp844043" + ] neighbor_names = g.node("Comp175846").neighbours.neighbours.name.collect() - assert len(neighbor_names) == 193 \ No newline at end of file + assert len(neighbor_names) == 193 diff --git a/python/tests/test_graph_conversions.py b/python/tests/test_graph_conversions.py index c06b27edc7..c881c42dc7 100644 --- a/python/tests/test_graph_conversions.py +++ b/python/tests/test_graph_conversions.py @@ -21,23 +21,26 @@ def build_graph(): nodes_df["timestamp"] = pd.to_datetime(nodes_df["timestamp"]).astype( "datetime64[ms, UTC]" ) - - return Graph.load_from_pandas( - edge_df=edges_df, - edge_time="timestamp", - edge_src="source", - edge_dst="destination", - edge_properties=["data_size_MB"], + g = Graph() + g.load_edges_from_pandas( + edges_df, + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB"], layer_col="transaction_type", - edge_constant_properties=["is_encrypted"], - edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, - node_df=nodes_df, - node_id="server_id", - node_time="timestamp", - node_properties=["OS_version", "primary_function", "uptime_days"], - node_constant_properties=["server_name", "hardware_type"], - node_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + constant_properties=["is_encrypted"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + ) + g.load_nodes_from_pandas( + df=nodes_df, + id="server_id", + time="timestamp", + properties=["OS_version", "primary_function", "uptime_days"], + constant_properties=["server_name", "hardware_type"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) + return g def build_graph_without_datetime_type(): @@ -47,22 +50,26 @@ def build_graph_without_datetime_type(): nodes_df = pd.read_csv(base_dir / "data/network_traffic_nodes.csv") nodes_df["timestamp"] = pd.to_datetime(nodes_df["timestamp"]) - return Graph.load_from_pandas( - edge_df=edges_df, - edge_time="timestamp", - edge_src="source", - edge_dst="destination", - edge_properties=["data_size_MB"], + g = Graph() + g.load_edges_from_pandas( + edges_df, + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB"], layer_col="transaction_type", - edge_constant_properties=["is_encrypted"], - edge_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, - node_df=nodes_df, - node_id="server_id", - node_time="timestamp", - node_properties=["OS_version", "primary_function", "uptime_days"], - node_constant_properties=["server_name", "hardware_type"], - node_shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + constant_properties=["is_encrypted"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + ) + g.load_nodes_from_pandas( + df=nodes_df, + id="server_id", + time="timestamp", + properties=["OS_version", "primary_function", "uptime_days"], + constant_properties=["server_name", "hardware_type"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) + return g def test_graph_timestamp_list_properties(): @@ -125,35 +132,35 @@ def test_py_vis(): [ { "color": "#97c2fc", - "id": 'ServerA', + "id": "ServerA", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerA", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerB', + "id": "ServerB", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerB", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerC', + "id": "ServerC", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerC", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerD', + "id": "ServerD", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerD", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerE', + "id": "ServerE", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerE", "shape": "dot", @@ -168,63 +175,63 @@ def test_py_vis(): "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerA', + "from": "ServerA", "title": "", - "to": 'ServerB', + "to": "ServerB", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerA', + "from": "ServerA", "title": "", - "to": 'ServerC', + "to": "ServerC", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerB', + "from": "ServerB", "title": "", - "to": 'ServerD', + "to": "ServerD", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerC', + "from": "ServerC", "title": "", - "to": 'ServerA', + "to": "ServerA", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerD', + "from": "ServerD", "title": "", - "to": 'ServerC', + "to": "ServerC", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerD', + "from": "ServerD", "title": "", - "to": 'ServerE', + "to": "ServerE", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerE', + "from": "ServerE", "title": "", - "to": 'ServerB', + "to": "ServerB", "value": 0.0, }, ], diff --git a/python/tests/test_graphql.py b/python/tests/test_graphql.py index e0b9ec6c77..f360acdf27 100644 --- a/python/tests/test_graphql.py +++ b/python/tests/test_graphql.py @@ -12,7 +12,7 @@ def normalize_path(path): - return path.replace('\\', '/') + return path.replace("\\", "/") def test_failed_server_start_in_time(): @@ -50,7 +50,9 @@ def test_server_start_on_default_port(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -69,7 +71,9 @@ def test_server_start_on_custom_port(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -124,7 +128,14 @@ def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -181,7 +192,14 @@ def test_send_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -191,7 +209,9 @@ def assert_graph_fetch(path): query = f"""{{ graph(path: "{path}") {{ nodes {{ list {{ name }} }} }} }}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -237,17 +257,23 @@ def assert_graph_fetch(path): path = "../shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str(excinfo.value) + assert "References to the parent dir are not allowed within the path:" in str( + excinfo.value + ) path = "./shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the current dir are not allowed within the path" in str(excinfo.value) + assert "References to the current dir are not allowed within the path" in str( + excinfo.value + ) path = "shivam/../../../../investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str(excinfo.value) + assert "References to the parent dir are not allowed within the path:" in str( + excinfo.value + ) path = "//shivam/investigation/g" with pytest.raises(Exception) as excinfo: @@ -264,7 +290,7 @@ def assert_graph_fetch(path): client.send_graph(path=path, graph=g, overwrite=True) assert "Backslash not allowed in path" in str(excinfo.value) - #Test if we can escape through a symlink + # Test if we can escape through a symlink tmp_dir2 = tempfile.mkdtemp() nested_dir = os.path.join(tmp_work_dir, "shivam", "graphs") os.makedirs(nested_dir) @@ -295,7 +321,9 @@ def test_upload_graph_succeeds_if_no_graph_found_with_same_name(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -345,7 +373,14 @@ def test_upload_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -368,7 +403,9 @@ def test_upload_graph_succeeds_if_no_graph_found_with_same_name_at_namespace(): query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -418,10 +455,18 @@ def test_upload_graph_succeeds_if_graph_already_exists_at_namespace_with_overwri query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } + def test_get_graph_fails_if_graph_not_found(): work_dir = tempfile.mkdtemp() with RaphtoryServer(work_dir).start() as server: @@ -438,7 +483,9 @@ def test_get_graph_fails_if_graph_not_found_at_namespace(): with RaphtoryServer(work_dir).start() as server: client = server.get_client() - query = """{ graph(path: "shivam/g1") { name, path, nodes { list { name } } } }""" + query = ( + """{ graph(path: "shivam/g1") { name, path, nodes { list { name } } } }""" + ) with pytest.raises(Exception) as excinfo: client.query(query) assert "Graph not found" in str(excinfo.value) @@ -459,8 +506,14 @@ def test_get_graph_succeeds_if_graph_found(): query = """{ graph(path: "g1") { name, path, nodes { list { name } } } }""" assert client.query(query) == { - 'graph': {'name': 'g1', 'nodes': {'list': [{'name': 'ben'}, {'name': 'hamza'}, {'name': 'haaroon'}]}, - 'path': 'g1'}} + "graph": { + "name": "g1", + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + }, + "path": "g1", + } + } def test_get_graph_succeeds_if_graph_found_at_namespace(): @@ -476,11 +529,15 @@ def test_get_graph_succeeds_if_graph_found_at_namespace(): os.makedirs(os.path.join(work_dir, "shivam"), exist_ok=True) g.save_to_file(os.path.join(work_dir, "shivam", "g2")) - query = """{ graph(path: "shivam/g2") { name, path, nodes { list { name } } } }""" + query = ( + """{ graph(path: "shivam/g2") { name, path, nodes { list { name } } } }""" + ) response = client.query(query) - assert response['graph']['name'] == 'g2' - assert response['graph']['nodes'] == {'list': [{'name': 'ben'}, {'name': 'hamza'}, {'name': 'haaroon'}]} - assert normalize_path(response['graph']['path']) == 'shivam/g2' + assert response["graph"]["name"] == "g2" + assert response["graph"]["nodes"] == { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } + assert normalize_path(response["graph"]["path"]) == "shivam/g2" def test_get_graphs_returns_emtpy_list_if_no_graphs_found(): @@ -490,9 +547,7 @@ def test_get_graphs_returns_emtpy_list_if_no_graphs_found(): # Assert if no graphs are discoverable query = """{ graphs { name, path } }""" - assert client.query(query) == { - 'graphs': {'name': [], 'path': []} - } + assert client.query(query) == {"graphs": {"name": [], "path": []}} def test_get_graphs_returns_graph_list_if_graphs_found(): @@ -514,15 +569,15 @@ def test_get_graphs_returns_graph_list_if_graphs_found(): query = """{ graphs { name, path } }""" response = client.query(query) sorted_response = { - 'graphs': { - 'name': sorted(response['graphs']['name']), - 'path': sorted(normalize_path(p) for p in response['graphs']['path']) + "graphs": { + "name": sorted(response["graphs"]["name"]), + "path": sorted(normalize_path(p) for p in response["graphs"]["path"]), } } assert sorted_response == { - 'graphs': { - 'name': ['g1', 'g2', 'g3'], - 'path': ['g1', 'shivam/g2', 'shivam/g3'] + "graphs": { + "name": ["g1", "g2", "g3"], + "path": ["g1", "shivam/g2", "shivam/g3"], } } @@ -551,7 +606,7 @@ def test_receive_graph_succeeds_if_graph_found(): g.save_to_file(os.path.join(work_dir, "g1")) query = """{ receiveGraph(path: "g1") }""" - received_graph = client.query(query)['receiveGraph'] + received_graph = client.query(query)["receiveGraph"] decoded_bytes = base64.b64decode(received_graph) @@ -599,7 +654,7 @@ def test_receive_graph_succeeds_if_graph_found_at_namespace(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query = """{ receiveGraph(path: "shivam/g2") }""" - received_graph = client.query(query)['receiveGraph'] + received_graph = client.query(query)["receiveGraph"] decoded_bytes = base64.b64decode(received_graph) @@ -737,9 +792,18 @@ def test_move_graph_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_using_client_api_succeeds(): @@ -774,9 +838,18 @@ def test_move_graph_using_client_api_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_succeeds_at_same_namespace_as_graph(): @@ -818,9 +891,18 @@ def test_move_graph_succeeds_at_same_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_succeeds_at_diff_namespace_as_graph(): @@ -863,9 +945,18 @@ def test_move_graph_succeeds_at_diff_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_fails_if_graph_not_found(): @@ -984,7 +1075,11 @@ def test_copy_graph_succeeds(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "g4") { nodes {list {name}} @@ -996,8 +1091,14 @@ def test_copy_graph_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_using_client_api_succeeds(): @@ -1018,7 +1119,11 @@ def test_copy_graph_using_client_api_succeeds(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "ben/g4") { nodes {list {name}} @@ -1030,8 +1135,14 @@ def test_copy_graph_using_client_api_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_succeeds_at_same_namespace_as_graph(): @@ -1059,7 +1170,11 @@ def test_copy_graph_succeeds_at_same_namespace_as_graph(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -1071,8 +1186,14 @@ def test_copy_graph_succeeds_at_same_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_succeeds_at_diff_namespace_as_graph(): @@ -1101,7 +1222,11 @@ def test_copy_graph_succeeds_at_diff_namespace_as_graph(): query = """{graph(path: "ben/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -1113,8 +1238,14 @@ def test_copy_graph_succeeds_at_diff_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_delete_graph_fails_if_graph_not_found(): @@ -1263,11 +1394,11 @@ def test_create_graph_fail_if_graph_already_exists(): def test_create_graph_fail_if_graph_already_exists_at_namespace(): work_dir = tempfile.mkdtemp() os.makedirs(os.path.join(work_dir, "shivam"), exist_ok=True) - + g = Graph() g.save_to_file(os.path.join(work_dir, "g0")) g.save_to_file(os.path.join(work_dir, "shivam", "g3")) - + with RaphtoryServer(work_dir).start() as server: client = server.get_client() query = """mutation { @@ -1331,16 +1462,35 @@ def test_create_graph_succeeds(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_create_graph_succeeds_at_namespace(): @@ -1390,16 +1540,35 @@ def test_create_graph_succeeds_at_namespace(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 # Update Graph with new graph name tests (save as new graph name) @@ -1523,16 +1692,35 @@ def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_di }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_same_namespace(): @@ -1584,16 +1772,35 @@ def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_sa } }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_with_new_node_from_parent_graph_added_to_new_graph(): @@ -1655,16 +1862,33 @@ def test_update_graph_with_new_graph_name_succeeds_with_new_node_from_parent_gra }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'shivam', 'properties': {'temporal': {'get': {'values': ['engineering']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "shivam", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_with_new_node_removed_from_new_graph(): @@ -1717,16 +1941,35 @@ def test_update_graph_with_new_graph_name_succeeds_with_new_node_removed_from_ne }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 # Update Graph tests (save graph as same graph name) @@ -1820,15 +2063,31 @@ def test_update_graph_succeeds_if_parent_graph_belongs_to_different_namespace(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_if_parent_graph_belongs_to_same_namespace(): @@ -1880,15 +2139,31 @@ def test_update_graph_succeeds_if_parent_graph_belongs_to_same_namespace(): } }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_with_new_node_from_parent_graph_added_to_new_graph(): @@ -1949,15 +2224,29 @@ def test_update_graph_succeeds_with_new_node_from_parent_graph_added_to_new_grap }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'shivam', 'properties': {'temporal': {'get': {'values': ['engineering']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "shivam", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_with_new_node_removed_from_new_graph(): @@ -2010,14 +2299,25 @@ def test_update_graph_succeeds_with_new_node_removed_from_new_graph(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_last_opened_fails_if_graph_not_found(): @@ -2059,12 +2359,18 @@ def test_update_graph_last_opened_succeeds(): query_last_opened = """{ graph(path: "g1") { properties { constant { get(key: "lastOpened") { value } } } } }""" mutate_last_opened = """mutation { updateGraphLastOpened(path: "g1") }""" - assert client.query(query_last_opened) == {'graph': {'properties': {'constant': {'get': None}}}} - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened1 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(query_last_opened) == { + "graph": {"properties": {"constant": {"get": None}}} + } + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened1 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] time.sleep(1) - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened2 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened2 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] assert updated_last_opened2 > updated_last_opened1 @@ -2084,12 +2390,18 @@ def test_update_graph_last_opened_succeeds_at_namespace(): query_last_opened = """{ graph(path: "shivam/g2") { properties { constant { get(key: "lastOpened") { value } } } } }""" mutate_last_opened = """mutation { updateGraphLastOpened(path: "shivam/g2") }""" - assert client.query(query_last_opened) == {'graph': {'properties': {'constant': {'get': None}}}} - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened1 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(query_last_opened) == { + "graph": {"properties": {"constant": {"get": None}}} + } + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened1 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] time.sleep(1) - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened2 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened2 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] assert updated_last_opened2 > updated_last_opened1 @@ -2130,13 +2442,25 @@ def test_archive_graph_succeeds(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query_is_archive = """{ graph(path: "g1") { properties { constant { get(key: "isArchive") { value } } } } }""" - assert client.query(query_is_archive) == {'graph': {'properties': {'constant': {'get': None}}}} + assert client.query(query_is_archive) == { + "graph": {"properties": {"constant": {"get": None}}} + } update_archive_graph = """mutation { archiveGraph(path: "g1", isArchive: 0) }""" assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 0 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 0 + ) update_archive_graph = """mutation { archiveGraph(path: "g1", isArchive: 1) }""" assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 1 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 1 + ) def test_archive_graph_succeeds_at_namespace(): @@ -2154,13 +2478,29 @@ def test_archive_graph_succeeds_at_namespace(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query_is_archive = """{ graph(path: "shivam/g2") { properties { constant { get(key: "isArchive") { value } } } } }""" - assert client.query(query_is_archive) == {'graph': {'properties': {'constant': {'get': None}}}} - update_archive_graph = """mutation { archiveGraph(path: "shivam/g2", isArchive: 0) }""" + assert client.query(query_is_archive) == { + "graph": {"properties": {"constant": {"get": None}}} + } + update_archive_graph = ( + """mutation { archiveGraph(path: "shivam/g2", isArchive: 0) }""" + ) assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 0 - update_archive_graph = """mutation { archiveGraph(path: "shivam/g2", isArchive: 1) }""" + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 0 + ) + update_archive_graph = ( + """mutation { archiveGraph(path: "shivam/g2", isArchive: 1) }""" + ) assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 1 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 1 + ) def test_graph_windows_and_layers_query(): @@ -2347,6 +2687,7 @@ def test_graph_properties_query(): key=lambda x: x["key"], ) + # def test_disk_graph_name(): # import pandas as pd # from raphtory import DiskGraphStorage @@ -2382,4 +2723,4 @@ def test_graph_properties_query(): # tmp_work_dir = tempfile.mkdtemp() # with RaphtoryServer(tmp_work_dir).start() as server: # client = server.get_client() -# client.upload_graph(path="g", graph=g) \ No newline at end of file +# client.upload_graph(path="g", graph=g) diff --git a/python/tests/test_iterables.py b/python/tests/test_iterables.py index 8e8874a63e..c1d0edb0cd 100644 --- a/python/tests/test_iterables.py +++ b/python/tests/test_iterables.py @@ -148,7 +148,7 @@ def test_propiterable(): assert sorted(total) == [2, 17, 18, 35, 38] total = dict(zip(g.nodes.id, g.nodes.out_edges.properties.get("value_dec").sum())) - assert total == {'1': 32, '2': 5, '3': 3, '4': 15, '5': None} + assert total == {"1": 32, "2": 5, "3": 3, "4": 15, "5": None} total = g.nodes.out_edges.properties.get("value_dec").sum().sum() assert total == 55 diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index 912813bc9d..da414315fe 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -46,12 +46,12 @@ def assertions(g): assert g.nodes.id.collect() == expected_nodes assert edges == expected_edges - g = Graph.load_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) + g = Graph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) - g = PersistentGraph.load_from_pandas( - df, "time", "src", "dst", ["weight", "marbles"] - ) + g = PersistentGraph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) @@ -77,13 +77,13 @@ def assertions(exc_info): # Use pytest.raises to expect an exception with pytest.raises(Exception) as exc_info: - g = Graph.load_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) + g = Graph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(exc_info) with pytest.raises(Exception) as exc_info: - g = PersistentGraph.load_from_pandas( - df, "time", "src", "dst", ["weight", "marbles"] - ) + g = PersistentGraph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(exc_info) # Optionally, you can check the exception message or type @@ -148,12 +148,16 @@ def assertions(g): assert nodes == expected_nodes g = Graph() - g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type_col="node_type", properties=["name"] + ) g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) g = PersistentGraph() - g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type_col="node_type", properties=["name"] + ) g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) @@ -210,32 +214,23 @@ def assertions(g): assert g.nodes.id.collect() == expected_node_ids assert edges == expected_edges - g = Graph.load_from_pandas( - edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + g = Graph() + g.load_edges_from_pandas( + edges_df, time="time", src="src", dst="dst", properties=["weight", "marbles"] + ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"], node_type="node_type" ) assertions(g) - g = PersistentGraph.load_from_pandas( - edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + g = PersistentGraph() + g.load_edges_from_pandas( + edges_df, time="time", src="src", dst="dst", properties=["weight", "marbles"] + ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"], node_type="node_type" ) + assertions(g) @@ -296,8 +291,8 @@ def assertions1(g): g = Graph() g.load_nodes_from_pandas( nodes_df, - "id", "time", + "id", node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, @@ -307,14 +302,36 @@ def assertions1(g): g = PersistentGraph() g.load_nodes_from_pandas( nodes_df, - "id", "time", + "id", node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, ) assertions1(g) + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas( + nodes_df, + "id", + node_type_col="node_type", + constant_properties=["name"], + shared_constant_properties={"tag": "test_tag"}, + ) + assertions1(g) + + g = PersistentGraph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas( + nodes_df, + "id", + node_type_col="node_type", + constant_properties=["name"], + shared_constant_properties={"tag": "test_tag"}, + ) + assertions1(g) + def assertions2(g): assert g.nodes.properties.constant.get("type").collect() == [ "Person 1", @@ -328,8 +345,8 @@ def assertions2(g): g = Graph() g.load_nodes_from_pandas( nodes_df, - "id", "time", + "id", node_type_col="node_type", properties=["name"], constant_properties=["type"], @@ -381,7 +398,7 @@ def assertions3(g): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer_name="test_layer", + layer="test_layer", ) assertions3(g) @@ -394,7 +411,7 @@ def assertions3(g): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer_name="test_layer", + layer="test_layer", ) assertions3(g) @@ -435,31 +452,37 @@ def assertions5(g): ] assert g.layers(["test_layer"]).edges.src.id.collect() == [1, 2, 3, 4, 5] - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, "time", "src", "dst", - layer_name="test_layer", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_constant_properties={"type": "Person"}, + layer="test_layer", + ) + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"type": "Person"}, ) assertions5(g) - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, "time", "src", "dst", - layer_name="test_layer", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_constant_properties={"type": "Person"}, + layer="test_layer", + ) + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"type": "Person"}, ) assertions5(g) @@ -485,31 +508,25 @@ def assertions6(g): 5, ] - g = Graph.load_from_pandas( - edges_df, - "time", - "src", - "dst", - layer_col="layers", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_constant_properties=["type"], + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="layers") + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assertions6(g) - g = PersistentGraph.load_from_pandas( - edges_df, - "time", - "src", - "dst", - layer_col="layers", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_constant_properties=["type"], + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="layers") + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assertions6(g) @@ -531,18 +548,18 @@ def assertions7(g): "test_tag", ] - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], layer_col="layers", ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"] + ) g.load_node_props_from_pandas( nodes_df, "id", @@ -577,18 +594,19 @@ def assertions8(g): ) assertions8(g) - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], layer_col="layers", ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"] + ) g.load_node_props_from_pandas( nodes_df, "id", @@ -620,7 +638,9 @@ def assertions_layers_in_df(g): assert g.layers(["layer 3"]).edges.src.id.collect() == [3] with pytest.raises( Exception, - match=re.escape("Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5"), + match=re.escape( + "Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5" + ), ): g.layers(["test_layer"]) @@ -676,11 +696,12 @@ def test_missing_columns(): "columns are not present within the dataframe: not_src, not_dst, not_time" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_time="not_time", - edge_src="not_src", - edge_dst="not_dst", + time="not_time", + src="not_src", + dst="not_dst", ) with pytest.raises( @@ -689,11 +710,12 @@ def test_missing_columns(): "columns are not present within the dataframe: not_src, not_dst, not_time" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_time="not_time", - edge_src="not_src", - edge_dst="not_dst", + time="not_time", + src="not_src", + dst="not_dst", ) with pytest.raises( @@ -702,18 +724,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_weight, bleep_bloop" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["not_weight", "marbles"], - edge_constant_properties=["bleep_bloop"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["not_weight", "marbles"], + constant_properties=["bleep_bloop"], ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) with pytest.raises( Exception, @@ -721,18 +741,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_weight, bleep_bloop" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["not_weight", "marbles"], - edge_constant_properties=["bleep_bloop"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["not_weight", "marbles"], + constant_properties=["bleep_bloop"], ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) with pytest.raises( Exception, @@ -740,16 +758,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_id, not_time, not_name" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="not_id", - node_time="not_time", - node_properties=["not_name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_pandas( + df=nodes_df, time="not_time", id="not_id", properties=["not_name"] ) with pytest.raises( @@ -758,16 +776,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_id, not_time, not_name" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="not_id", - node_time="not_time", - node_properties=["not_name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_pandas( + df=nodes_df, id="not_id", time="not_time", properties=["not_name"] ) with pytest.raises( @@ -832,12 +850,13 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "time", "src", "dst") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, None, 6], "time": [1, 2, 3, 4, 5]} @@ -845,11 +864,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "time", "src", "dst") + Graph().load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, 5, 6], "time": [1, 2, None, 4, 5]} @@ -857,11 +876,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "time", "src", "dst") + Graph().load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "time", "src", "dst") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") def test_loading_list_as_properties(): @@ -875,12 +894,13 @@ def test_loading_list_as_properties(): } ) - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["marbles"], + time="time", + src="src", + dst="dst", + properties=["marbles"], ) assert g.edge(1, 2).properties["marbles"] == ["red"] @@ -896,8 +916,8 @@ def test_loading_list_as_properties(): g = Graph() g.load_nodes_from_pandas( df=df, - id="id", time="time", + id="id", properties=["marbles"], ) @@ -921,12 +941,12 @@ def test_unparsable_props(): """"Could not convert '2.0' with type str: tried to convert to double", 'Conversion failed for column weight with type object'""" ), ): - Graph.load_from_pandas( + Graph().load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight"], + time="time", + src="src", + dst="dst", + properties=["weight"], ) with pytest.raises( Exception, @@ -934,12 +954,12 @@ def test_unparsable_props(): """"Could not convert '2.0' with type str: tried to convert to double", 'Conversion failed for column weight with type object'""" ), ): - PersistentGraph.load_from_pandas( + PersistentGraph().load_edges_from_pandas( edges_df, - edge_time="time", - edge_src="src", - edge_dst="dst", - edge_properties=["weight"], + time="time", + src="src", + dst="dst", + properties=["weight"], ) @@ -989,48 +1009,38 @@ def edges_assertions(g): assert g.count_nodes() == 6 g = Graph() - g.load_nodes_from_pandas(nodes_df, "id", "time") + g.load_nodes_from_pandas(nodes_df, "time", "id") nodes_assertions(g) g = PersistentGraph() - g.load_nodes_from_pandas(nodes_df, "id", "time") + g.load_nodes_from_pandas(nodes_df, "time", "id") nodes_assertions(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type" - ) + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") nodes_assertions2(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type" - ) + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") nodes_assertions2(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type" - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type="node_type") nodes_assertions2(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type" - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type="node_type") nodes_assertions2(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type_col="node_type" - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type_col="node_type") nodes_assertions3(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type_col="node_type" - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type_col="node_type") nodes_assertions3(g) - g = Graph.load_from_pandas(edges_df, "time", "src", "dst") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) - g = Graph.load_from_pandas(edges_df, "time", "src", "dst") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index d8eafda207..7495fc42e3 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -14,7 +14,9 @@ def parquet_files(): dirname = tempfile.TemporaryDirectory() nodes_parquet_file_path = os.path.join(dirname.name, "parquet", "nodes.parquet") edges_parquet_file_path = os.path.join(dirname.name, "parquet", "edges.parquet") - edge_deletions_parquet_file_path = os.path.join(dirname.name, "parquet", "edges_deletions.parquet") + edge_deletions_parquet_file_path = os.path.join( + dirname.name, "parquet", "edges_deletions.parquet" + ) os.makedirs(os.path.dirname(nodes_parquet_file_path), exist_ok=True) @@ -22,7 +24,14 @@ def parquet_files(): "id": [1, 2, 3, 4, 5, 6], "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], "time": [1, 2, 3, 4, 5, 6], - "type": ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"], + "type": [ + "Person 1", + "Person 2", + "Person 3", + "Person 4", + "Person 5", + "Person 6", + ], "node_type": ["p", "p", "p", "p", "p", "p"], } @@ -52,7 +61,11 @@ def parquet_files(): table = pa.table(data) pq.write_table(table, edge_deletions_parquet_file_path) - print("""Created edges_deletions.parquet at loc = {}""".format(edge_deletions_parquet_file_path)) + print( + """Created edges_deletions.parquet at loc = {}""".format( + edge_deletions_parquet_file_path + ) + ) yield nodes_parquet_file_path, edges_parquet_file_path, edge_deletions_parquet_file_path @@ -139,13 +152,13 @@ def assert_expected_node_property_dept(g): def assert_expected_edge_properties(g): - assert g.layers( - ["layer 1", "layer 2", "layer 3"] - ).edges.properties.constant.get("marbles_const").collect() == [ - {"layer 1": "red"}, - {"layer 2": "blue"}, - {"layer 3": "green"}, - ] + assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.properties.constant.get( + "marbles_const" + ).collect() == [ + {"layer 1": "red"}, + {"layer 2": "blue"}, + {"layer 3": "green"}, + ] assert g.edges.properties.constant.get("tag").collect() == [ {"layer 1": "test_tag"}, {"layer 2": "test_tag"}, @@ -180,14 +193,31 @@ def assert_expected_edge_properties_test_layer(g): def assert_expected_layers(g): - assert g.unique_layers == ["_default", "layer 1", "layer 2", "layer 3", "layer 4", "layer 5"] + assert g.unique_layers == [ + "_default", + "layer 1", + "layer 2", + "layer 3", + "layer 4", + "layer 5", + ] assert g.layers(["layer 1"]).edges.src.id.collect() == [1] assert g.layers(["layer 1", "layer 2"]).edges.src.id.collect() == [1, 2] - assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.src.id.collect() == [1, 2, 3] - assert g.layers(["layer 1", "layer 4", "layer 5"]).edges.src.id.collect() == [1, 4, 5] + assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.src.id.collect() == [ + 1, + 2, + 3, + ] + assert g.layers(["layer 1", "layer 4", "layer 5"]).edges.src.id.collect() == [ + 1, + 4, + 5, + ] with pytest.raises( - Exception, - match=re.escape("Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5"), + Exception, + match=re.escape( + "Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5" + ), ): g.layers(["test_layer"]) @@ -198,19 +228,26 @@ def assert_expected_test_layer(g): def test_load_from_parquet_graphs(parquet_files): - nodes_parquet_file_path, edges_parquet_file_path, edges_deletions_parquet_file_path = parquet_files - - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + node_type_col="node_type", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -218,10 +255,10 @@ def test_load_from_parquet_graphs(parquet_files): g = Graph() g.load_nodes_from_parquet( parquet_path=nodes_parquet_file_path, - id="id", time="time", - node_type="node_type", - properties=["name"] + id="id", + node_type_col="node_type", + properties=["name"], ) g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, @@ -229,7 +266,7 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer_col="layers" + layer_col="layers", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -276,56 +313,69 @@ def test_load_from_parquet_graphs(parquet_files): properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer_name="test_layer", + layer="test_layer", ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - layer_name="test_layer", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_constant_properties={"dept": "Sales"}, + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + time="time", + src="src", + dst="dst", + layer="test_layer", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", layer_col="layers", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_constant_properties=["type"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) def test_load_from_parquet_persistent_graphs(parquet_files): - nodes_parquet_file_path, edges_parquet_file_path, edges_deletions_parquet_file_path = parquet_files - - g = PersistentGraph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + g = PersistentGraph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + properties=["weight", "marbles"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + node_type_col="node_type", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -336,7 +386,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): id="id", time="time", node_type="node_type", - properties=["name"] + properties=["name"], ) g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, @@ -344,7 +394,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer_col="layers" + layer_col="layers", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -373,8 +423,8 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_nodes_from_parquet( parquet_path=nodes_parquet_file_path, - id="id", time="time", + id="id", node_type_col="node_type", properties=["name"], shared_constant_properties={"tag": "test_tag"}, @@ -385,43 +435,49 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, + time="time", src="src", dst="dst", - time="time", properties=["weight", "marbles"], constant_properties=["marbles_const"], shared_constant_properties={"type": "Edge", "tag": "test_tag"}, - layer_name="test_layer", + layer="test_layer", ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - layer_name="test_layer", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_constant_properties={"dept": "Sales"}, + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + layer="test_layer", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) - g = PersistentGraph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", + g = PersistentGraph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", layer_col="layers", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_constant_properties=["type"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) @@ -429,16 +485,15 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, + time="time", src="src", dst="dst", - time="time", ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] g.load_edges_deletions_from_parquet( parquet_path=edges_deletions_parquet_file_path, + time="time", src="src", dst="dst", - time="time" ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] - diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 28ea8f87de..3a8e15c1bb 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -36,8 +36,8 @@ pub(crate) fn load_nodes_from_df< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( df_view: DFView>>, - node_id: &str, time: &str, + node_id: &str, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, @@ -195,7 +195,7 @@ pub(crate) fn load_edges_from_df< properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { @@ -228,7 +228,7 @@ pub(crate) fn load_edges_from_df< let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer_name, layer_index, &df)?; + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -321,7 +321,7 @@ pub(crate) fn load_edges_deletions_from_df< time: &str, src: &str, dst: &str, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { @@ -338,7 +338,7 @@ pub(crate) fn load_edges_deletions_from_df< for chunk in df_view.chunks { let df = chunk?; - let layer = lift_layer(layer_name, layer_index, &df)?; + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -414,6 +414,8 @@ pub(crate) fn load_node_props_from_df< >( df_view: DFView>>, node_id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, graph: &G, @@ -424,16 +426,46 @@ pub(crate) fn load_node_props_from_df< .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; let node_id_index = df_view.get_index(node_id)?; + let node_type_index = if let Some(node_type_col) = node_type_col { + Some(df_view.get_index(node_type_col.as_ref())) + } else { + None + }; + let node_type_index = node_type_index.transpose()?; let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; - for chunk in df_view.chunks { let df = chunk?; let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; + let node_type: Result>>, GraphError> = + match (node_type, node_type_index) { + (None, None) => Ok(Box::new(iter::repeat(None))), + (Some(node_type), None) => Ok(Box::new(iter::repeat(Some(node_type)))), + (None, Some(node_type_index)) => { + let iter_res: Result>>, GraphError> = + if let Some(node_types) = df.utf8::(node_type_index) { + Ok(Box::new(node_types)) + } else if let Some(node_types) = df.utf8::(node_type_index) { + Ok(Box::new(node_types)) + } else { + Err(GraphError::LoadFailure( + "Unable to convert / find node_type column in dataframe." + .to_string(), + )) + }; + iter_res + } + _ => Err(GraphError::WrongNumOfArgs( + "node_type".to_string(), + "node_type_col".to_string(), + )), + }; + let node_type = node_type?; + if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(|i| i.copied()); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) @@ -442,12 +474,15 @@ pub(crate) fn load_node_props_from_df< if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(i64_opt_into_u64_opt); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) @@ -456,12 +491,15 @@ pub(crate) fn load_node_props_from_df< if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.utf8::(node_id_index) { let iter = node_id.into_iter(); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) @@ -470,12 +508,15 @@ pub(crate) fn load_node_props_from_df< if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.utf8::(node_id_index) { let iter = node_id.into_iter(); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) @@ -484,6 +525,9 @@ pub(crate) fn load_node_props_from_df< if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } @@ -505,7 +549,7 @@ pub(crate) fn load_edges_props_from_df< dst: &str, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { @@ -529,7 +573,7 @@ pub(crate) fn load_edges_props_from_df< let const_prop_iter = combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer_name, layer_index, &df)?; + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 0283103151..64a1a95e9f 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -141,8 +141,8 @@ mod test { load_nodes_from_df( df, - "id", "time", + "id", Some(&*vec!["name"]), None, None, diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 84a249d804..a709a59173 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -28,8 +28,8 @@ pub fn load_nodes_from_parquet< >( graph: &G, parquet_path: &Path, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option<&[&str]>, @@ -48,8 +48,8 @@ pub fn load_nodes_from_parquet< df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, - id, time, + id, properties, constant_properties, shared_constant_properties, @@ -74,7 +74,7 @@ pub fn load_edges_from_parquet< properties: Option<&[&str]>, constant_properties: Option<&[&str]>, shared_constant_properties: Option<&HashMap>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { let parquet_path = parquet_path.as_ref(); @@ -96,7 +96,7 @@ pub fn load_edges_from_parquet< properties, constant_properties, shared_constant_properties, - layer_name, + layer, layer_col, graph, ) @@ -112,11 +112,16 @@ pub fn load_node_props_from_parquet< graph: &G, parquet_path: &Path, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; @@ -125,8 +130,10 @@ pub fn load_node_props_from_parquet< load_node_props_from_df( df_view, id, + node_type, + node_type_col, constant_properties, - shared_const_properties, + shared_constant_properties, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -144,7 +151,7 @@ pub fn load_edge_props_from_parquet< dst: &str, constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; @@ -162,7 +169,7 @@ pub fn load_edge_props_from_parquet< dst, constant_properties, shared_const_properties, - layer_name, + layer, layer_col, graph.core_graph(), ) @@ -180,7 +187,7 @@ pub fn load_edges_deletions_from_parquet< time: &str, src: &str, dst: &str, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; @@ -191,7 +198,7 @@ pub fn load_edges_deletions_from_parquet< for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df(df_view, time, src, dst, layer_name, layer_col, graph) + load_edges_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -298,10 +305,10 @@ mod test { let parquet_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/test/test_data.parquet"); - let col_names: &[&str] = &["src", "dst", "time", "weight", "marbles"]; + let col_names: &[&str] = &["time", "src", "dst", "weight", "marbles"]; let df = process_parquet_file_to_df(parquet_file_path.as_path(), col_names).unwrap(); - let expected_names: Vec = vec!["src", "dst", "time", "weight", "marbles"] + let expected_names: Vec = vec!["time", "src", "dst", "weight", "marbles"] .iter() .map(|s| s.to_string()) .collect(); diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index b44352bbdc..1a3355aea0 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -427,193 +427,30 @@ impl PyGraph { PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph()) } - /// Load a graph from a Pandas DataFrame. - /// - /// Args: - /// edge_df (pandas.DataFrame): The DataFrame containing the edges. - /// edge_time (str): The column name for the timestamps. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3( - signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None) - )] - fn load_from_pandas( - edge_df: &PyAny, - edge_time: &str, - edge_src: &str, - edge_dst: &str, - edge_properties: Option>, - edge_constant_properties: Option>, - edge_shared_constant_properties: Option>, - layer_name: Option<&str>, - layer_col: Option<&str>, - node_df: Option<&PyAny>, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_constant_properties: Option>, - node_shared_constant_properties: Option>, - node_type: Option<&str>, - node_type_col: Option<&str>, - ) -> Result { - let graph = Graph::new(); - if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { - load_nodes_from_pandas( - &graph.core_graph(), - node_df, - node_id, - node_time, - node_type, - node_type_col, - node_properties.as_ref().map(|props| props.as_ref()), - node_constant_properties - .as_ref() - .map(|props| props.as_ref()), - node_shared_constant_properties.as_ref(), - )?; - } - load_edges_from_pandas( - &graph.core_graph(), - edge_df, - edge_time, - edge_src, - edge_dst, - edge_properties.as_ref().map(|props| props.as_ref()), - edge_constant_properties - .as_ref() - .map(|props| props.as_ref()), - edge_shared_constant_properties.as_ref(), - layer_name, - layer_col, - )?; - Ok(graph) - } - - /// Load a graph from Parquet file. - /// - /// Args: - /// edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - /// edge_time (str): The column name for the timestamps. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3( - signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None) - )] - fn load_from_parquet( - edge_parquet_path: PathBuf, - edge_time: &str, - edge_src: &str, - edge_dst: &str, - edge_properties: Option>, - edge_constant_properties: Option>, - edge_shared_constant_properties: Option>, - layer_name: Option<&str>, - layer_col: Option<&str>, - node_parquet_path: Option, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_constant_properties: Option>, - node_shared_constant_properties: Option>, - node_type: Option<&str>, - node_type_col: Option<&str>, - ) -> Result { - let graph = Graph::new(); - - if let (Some(node_parquet_path), Some(node_id), Some(node_time)) = - (node_parquet_path, node_id, node_time) - { - load_nodes_from_parquet( - &graph, - &node_parquet_path, - node_id, - node_time, - node_type, - node_type_col, - node_properties.as_ref().map(|props| props.as_ref()), - node_constant_properties - .as_ref() - .map(|props| props.as_ref()), - node_shared_constant_properties.as_ref(), - )?; - } - load_edges_from_parquet( - &graph, - edge_parquet_path, - edge_time, - edge_src, - edge_dst, - edge_properties.as_ref().map(|props| props.as_ref()), - edge_constant_properties - .as_ref() - .map(|props| props.as_ref()), - edge_shared_constant_properties.as_ref(), - layer_name, - layer_col, - )?; - - Ok(graph) - } - /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// properties (List): List of node property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) + signature = (df,time, id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_pandas( &self, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, @@ -623,8 +460,8 @@ impl PyGraph { load_nodes_from_pandas( self.graph.core_graph(), df, - id, time, + id, node_type, node_type_col, properties.as_ref().map(|props| props.as_ref()), @@ -637,23 +474,26 @@ impl PyGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// properties (List): List of node property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) + signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, @@ -663,8 +503,8 @@ impl PyGraph { load_nodes_from_parquet( &self.graph, parquet_path.as_path(), - id, time, + id, node_type, node_type_col, properties.as_ref().map(|props| props.as_ref()), @@ -680,16 +520,18 @@ impl PyGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) + signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edges_from_pandas( &self, @@ -700,7 +542,7 @@ impl PyGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( @@ -712,7 +554,7 @@ impl PyGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -721,19 +563,21 @@ impl PyGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing edges + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) + signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edges_from_parquet( &self, @@ -744,7 +588,7 @@ impl PyGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( @@ -756,7 +600,7 @@ impl PyGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -766,16 +610,23 @@ impl PyGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, id, node_type=None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option>, shared_constant_properties: Option>, ) -> Result<(), GraphError> { @@ -783,6 +634,8 @@ impl PyGraph { self.graph.core_graph(), df, id, + node_type, + node_type_col, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), ) @@ -793,16 +646,23 @@ impl PyGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, id, node_type=None,node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option>, shared_constant_properties: Option>, ) -> Result<(), GraphError> { @@ -810,6 +670,8 @@ impl PyGraph { &self.graph, parquet_path.as_path(), id, + node_type, + node_type_col, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), ) @@ -821,15 +683,18 @@ impl PyGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer (str): The edge layer name (optional) Defaults to None. /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) + signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edge_props_from_pandas( &self, @@ -838,7 +703,7 @@ impl PyGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( @@ -848,7 +713,7 @@ impl PyGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -859,15 +724,18 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer (str): The edge layer name (optional) Defaults to None. /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None) + signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edge_props_from_parquet( &self, @@ -876,7 +744,7 @@ impl PyGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( @@ -886,7 +754,7 @@ impl PyGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index ced6992998..aee9871b42 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -378,177 +378,28 @@ impl PyPersistentGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } - /// Load a graph from a Pandas DataFrame. - /// - /// Args: - /// edge_df (pandas.DataFrame): The DataFrame containing the edges. - /// edge_time (str): The column name for the timestamps. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3(signature = (edge_df, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - layer_name = None, layer_col = None, node_df = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None))] - fn load_from_pandas( - edge_df: &PyAny, - edge_time: &str, - edge_src: &str, - edge_dst: &str, - edge_properties: Option>, - edge_constant_properties: Option>, - edge_shared_constant_properties: Option>, - layer_name: Option<&str>, - layer_col: Option<&str>, - node_df: Option<&PyAny>, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_constant_properties: Option>, - node_shared_constant_properties: Option>, - node_type: Option<&str>, - node_type_col: Option<&str>, - ) -> Result { - let graph = PyPersistentGraph { - graph: PersistentGraph::new(), - }; - graph.load_edges_from_pandas( - edge_df, - edge_time, - edge_src, - edge_dst, - edge_properties, - edge_constant_properties, - edge_shared_constant_properties, - layer_name, - layer_col, - )?; - if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { - graph.load_nodes_from_pandas( - node_df, - node_id, - node_time, - node_type, - node_type_col, - node_properties, - node_constant_properties, - node_shared_constant_properties, - )?; - } - Ok(graph.graph) - } - - /// Load a graph from Parquet file. - /// - /// Args: - /// edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_constant_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_constant_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_constant_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3(signature = (edge_parquet_path, edge_time, edge_src, edge_dst, edge_properties = None, edge_constant_properties = None, edge_shared_constant_properties = None, - layer_name = None, layer_col = None, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_constant_properties = None, node_shared_constant_properties = None, node_type = None, node_type_col = None))] - fn load_from_parquet( - edge_parquet_path: PathBuf, - edge_time: &str, - edge_src: &str, - edge_dst: &str, - edge_properties: Option>, - edge_constant_properties: Option>, - edge_shared_constant_properties: Option>, - layer_name: Option<&str>, - layer_col: Option<&str>, - node_parquet_path: Option, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_constant_properties: Option>, - node_shared_constant_properties: Option>, - node_type: Option<&str>, - node_type_col: Option<&str>, - ) -> Result { - let graph = PyPersistentGraph { - graph: PersistentGraph::new(), - }; - if let (Some(node_parquet_file_path), Some(node_id), Some(node_time)) = - (node_parquet_path, node_id, node_time) - { - graph.load_nodes_from_parquet( - node_parquet_file_path, - node_id, - node_time, - node_type, - node_type_col, - node_properties, - node_constant_properties, - node_shared_constant_properties, - )?; - } - graph.load_edges_from_parquet( - edge_parquet_path, - edge_time, - edge_src, - edge_dst, - edge_properties, - edge_constant_properties, - edge_shared_constant_properties, - layer_name, - layer_col, - )?; - Ok(graph.graph) - } - /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// properties (List): List of node property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df,time,id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_pandas( &self, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, @@ -558,8 +409,8 @@ impl PyPersistentGraph { load_nodes_from_pandas( &self.graph.0, df, - id, time, + id, node_type, node_type_col, properties.as_ref().map(|props| props.as_ref()), @@ -572,21 +423,24 @@ impl PyPersistentGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): The node type (optional). Defaults to None. - /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. - /// properties (List): List of node property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time,id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, @@ -596,8 +450,8 @@ impl PyPersistentGraph { load_nodes_from_parquet( &self.graph, parquet_path.as_path(), - id, time, + id, node_type, node_type_col, properties.as_ref().map(|props| props.as_ref()), @@ -610,18 +464,20 @@ impl PyPersistentGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edges_from_pandas( &self, df: &PyAny, @@ -631,7 +487,7 @@ impl PyPersistentGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( @@ -643,7 +499,7 @@ impl PyPersistentGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -652,18 +508,20 @@ impl PyPersistentGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing edges + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, @@ -673,7 +531,7 @@ impl PyPersistentGraph { properties: Option>, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( @@ -685,7 +543,7 @@ impl PyPersistentGraph { properties.as_ref().map(|props| props.as_ref()), constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -697,22 +555,24 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, time, src, dst, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None))] fn load_edges_deletions_from_pandas( &self, df: &PyAny, time: &str, src: &str, dst: &str, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer_name, layer_col) + load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_col) } /// Load edges deletions from a Parquet file into the graph. @@ -722,19 +582,21 @@ impl PyPersistentGraph { /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. - /// layer_name (str): The edge layer name (optional) Defaults to None. - /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, time, src, dst, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None))] fn load_edges_deletions_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_deletions_from_parquet( @@ -743,7 +605,7 @@ impl PyPersistentGraph { time, src, dst, - layer_name, + layer, layer_col, ) } @@ -753,16 +615,23 @@ impl PyPersistentGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, id, node_type=None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option>, shared_constant_properties: Option>, ) -> Result<(), GraphError> { @@ -770,6 +639,8 @@ impl PyPersistentGraph { &self.graph.0, df, id, + node_type, + node_type_col, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), ) @@ -780,16 +651,23 @@ impl PyPersistentGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// constant_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_constant_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, constant_properties = None, shared_constant_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, id, node_type = None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option>, shared_constant_properties: Option>, ) -> Result<(), GraphError> { @@ -797,6 +675,8 @@ impl PyPersistentGraph { &self.graph, parquet_path.as_path(), id, + node_type, + node_type_col, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), ) @@ -808,14 +688,17 @@ impl PyPersistentGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer (str): The edge layer name (optional) Defaults to None. /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edge_props_from_pandas( &self, df: &PyAny, @@ -823,7 +706,7 @@ impl PyPersistentGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( @@ -833,7 +716,7 @@ impl PyPersistentGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } @@ -844,14 +727,17 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// constant_properties (List): List of constant edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer_name (str): The edge layer name (optional) Defaults to None. + /// layer (str): The edge layer name (optional) Defaults to None. /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer_name = None, layer_col = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, @@ -859,7 +745,7 @@ impl PyPersistentGraph { dst: &str, constant_properties: Option>, shared_constant_properties: Option>, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( @@ -869,7 +755,7 @@ impl PyPersistentGraph { dst, constant_properties.as_ref().map(|props| props.as_ref()), shared_constant_properties.as_ref(), - layer_name, + layer, layer_col, ) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 65edf15577..4859a16691 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -15,13 +15,13 @@ use std::collections::HashMap; pub fn load_nodes_from_pandas( graph: &GraphStorage, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![id, time]; @@ -35,11 +35,11 @@ pub fn load_nodes_from_pandas( df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, - id, time, + id, properties, constant_properties, - shared_const_properties, + shared_constant_properties, node_type, node_type_col, graph, @@ -59,8 +59,8 @@ pub fn load_edges_from_pandas( dst: &str, properties: Option<&[&str]>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, - layer_name: Option<&str>, + shared_constant_properties: Option<&HashMap>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { @@ -80,8 +80,8 @@ pub fn load_edges_from_pandas( dst, properties, constant_properties, - shared_const_properties, - layer_name, + shared_constant_properties, + layer, layer_col, graph, ) @@ -96,19 +96,26 @@ pub fn load_node_props_from_pandas( graph: &GraphStorage, df: &PyAny, id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![id]; cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( df_view, id, + node_type, + node_type_col, constant_properties, - shared_const_properties, + shared_constant_properties, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -124,8 +131,8 @@ pub fn load_edge_props_from_pandas( src: &str, dst: &str, constant_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, - layer_name: Option<&str>, + shared_constant_properties: Option<&HashMap>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { @@ -141,8 +148,8 @@ pub fn load_edge_props_from_pandas( src, dst, constant_properties, - shared_const_properties, - layer_name, + shared_constant_properties, + layer, layer_col, graph, ) @@ -159,7 +166,7 @@ pub fn load_edges_deletions_from_pandas( time: &str, src: &str, dst: &str, - layer_name: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { @@ -175,7 +182,7 @@ pub fn load_edges_deletions_from_pandas( time, src, dst, - layer_name, + layer, layer_col, graph.core_graph(), ) From 2e043d5f63195f202114cca93cdaee36772afb20 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Wed, 21 Aug 2024 12:58:23 +0100 Subject: [PATCH 14/17] Fix test --- raphtory/src/io/parquet_loaders.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index a709a59173..8fc25c7b5f 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -305,10 +305,10 @@ mod test { let parquet_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/test/test_data.parquet"); - let col_names: &[&str] = &["time", "src", "dst", "weight", "marbles"]; + let col_names: &[&str] = &["src", "dst","time", "weight", "marbles"]; let df = process_parquet_file_to_df(parquet_file_path.as_path(), col_names).unwrap(); - let expected_names: Vec = vec!["time", "src", "dst", "weight", "marbles"] + let expected_names: Vec = vec!["src", "dst","time", "weight", "marbles"] .iter() .map(|s| s.to_string()) .collect(); From 7993d3d6d1cc856c5ea5438bb007e6cd1f230844 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Wed, 21 Aug 2024 15:11:10 +0100 Subject: [PATCH 15/17] Added extra tests for node_type and layer - changed function name --- python/tests/test_load_from_pandas.py | 281 +++++++++++++- python/tests/test_load_from_parquet.py | 347 +++++++++++++++++- raphtory/src/io/arrow/df_loaders.rs | 2 +- raphtory/src/io/parquet_loaders.rs | 8 +- .../src/python/graph/graph_with_deletions.rs | 8 +- .../src/python/graph/io/pandas_loaders.rs | 4 +- 6 files changed, 622 insertions(+), 28 deletions(-) diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index da414315fe..6df04d40e9 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -557,9 +557,7 @@ def assertions7(g): properties=["weight", "marbles"], layer_col="layers", ) - g.load_nodes_from_pandas( - df=nodes_df, time="time", id="id", properties=["name"] - ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) g.load_node_props_from_pandas( nodes_df, "id", @@ -604,9 +602,7 @@ def assertions8(g): properties=["weight", "marbles"], layer_col="layers", ) - g.load_nodes_from_pandas( - df=nodes_df, time="time", id="id", properties=["name"] - ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) g.load_node_props_from_pandas( nodes_df, "id", @@ -1063,5 +1059,276 @@ def test_load_edge_deletions_from_pandas(): g = PersistentGraph() g.load_edges_from_pandas(edges_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] - g.load_edges_deletions_from_pandas(edge_dels_df, "time", "src", "dst") + g.load_edge_deletions_from_pandas(edge_dels_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] + + +def test_edge_both_option_failures_pandas(): + edges_df = pd.DataFrame( + { + "src": [1, 2, 3, 4, 5], + "dst": [2, 3, 4, 5, 6], + "time": [1, 2, 3, 4, 5], + "weight": [1.0, 2.0, 3.0, 4.0, 5.0], + "marbles": ["red", "blue", "green", "yellow", "purple"], + } + ) + # CHECK ALL EDGE FUNCTIONS ON GRAPH FAIL WITH BOTH LAYER AND LAYER_COL + g = Graph() + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edges_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + # CHECK IF JUST LAYER_COL WORKS + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer_col="marbles", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edges_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_deletions_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + g = PersistentGraph() + g.load_edge_deletions_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + # CHECK IF JUST LAYER_COL WORKS + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer_col="marbles", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + g.load_edge_deletions_from_pandas( + edges_df, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + +def test_node_both_option_failures_pandas(): + nodes_df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6], + "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], + "time": [1, 2, 3, 4, 5, 6], + "node_type": ["P1", "P2", "P3", "P4", "P5", "P6"], + } + ) + # CHECK ALL NODE FUNCTIONS ON GRAPH FAIL WITH BOTH NODE_TYPE AND NODE_TYPE_COL + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)'\)", + ): + g = Graph() + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type="node_type", node_type_col="node_type" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)'\)", + ): + g = Graph() + g.load_node_props_from_pandas( + nodes_df, "id", node_type="node_type", node_type_col="node_type" + ) + + # CHECK IF JUST NODE_TYPE WORKS + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas(nodes_df, "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + + # CHECK IF JUST NODE_TYPE_COL WORKS + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type_col="node_type") + assert g.nodes.node_type.collect() == ["P1", "P2", "P3", "P4", "P5", "P6"] + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas(nodes_df, "id", node_type_col="node_type") + assert g.nodes.node_type.collect() == ["P1", "P2", "P3", "P4", "P5", "P6"] diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index 7495fc42e3..5b8b62e48a 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -1,10 +1,10 @@ import os import re -import tempfile - import pyarrow as pa import pyarrow.parquet as pq import pytest +import tempfile +import pandas as pd from raphtory import Graph, PersistentGraph @@ -32,7 +32,7 @@ def parquet_files(): "Person 5", "Person 6", ], - "node_type": ["p", "p", "p", "p", "p", "p"], + "node_type": ["p1", "p2", "p3", "p4", "p5", "p6"], } table = pa.table(data) @@ -109,12 +109,12 @@ def assert_expected_edges(g): def assert_expected_node_types(g): assert g.nodes.node_type == [ - "p", - "p", - "p", - "p", - "p", - "p", + "p1", + "p2", + "p3", + "p4", + "p5", + "p6", ] @@ -490,10 +490,337 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] - g.load_edges_deletions_from_parquet( + g.load_edge_deletions_from_parquet( parquet_path=edges_deletions_parquet_file_path, time="time", src="src", dst="dst", ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] + + +def test_edge_both_option_failures_parquet(parquet_files): + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + # CHECK ALL EDGE FUNCTIONS ON GRAPH FAIL WITH BOTH LAYER AND LAYER_COL + g = Graph() + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edges_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_props_from_parquet( + edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer="blah", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + # CHECK IF JUST LAYER_COL WORKS + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer_col="marbles", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edges_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_props_from_parquet( + edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + # CHECK IF JUST LAYER WORKS + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer="blah", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + g = PersistentGraph() + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + # CHECK IF JUST LAYER_COL WORKS + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer_col="marbles", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + +def test_node_both_option_failures_parquet(parquet_files): + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + # CHECK ALL NODE FUNCTIONS ON GRAPH FAIL WITH BOTH NODE_TYPE AND NODE_TYPE_COL + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)", + ): + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, + "time", + "id", + node_type="node_type", + node_type_col="node_type", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)", + ): + g = Graph() + g.load_node_props_from_parquet( + nodes_parquet_file_path, + "id", + node_type="node_type", + node_type_col="node_type", + ) + + # CHECK IF JUST NODE_TYPE WORKS + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, "time", "id", node_type="node_type" + ) + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + g = Graph() + g.load_nodes_from_parquet(nodes_parquet_file_path, "time", "id") + g.load_node_props_from_parquet(nodes_parquet_file_path, "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + + # CHECK IF JUST NODE_TYPE_COL WORKS + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, "time", "id", node_type_col="node_type" + ) + assert g.nodes.node_type.collect() == ["p1", "p2", "p3", "p4", "p5", "p6"] + g = Graph() + g.load_nodes_from_parquet(nodes_parquet_file_path, "time", "id") + g.load_node_props_from_parquet( + nodes_parquet_file_path, "id", node_type_col="node_type" + ) + assert g.nodes.node_type.collect() == ["p1", "p2", "p3", "p4", "p5", "p6"] diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 3a8e15c1bb..a7aeeca5ca 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -313,7 +313,7 @@ pub(crate) fn load_edges_from_df< Ok(()) } -pub(crate) fn load_edges_deletions_from_df< +pub(crate) fn load_edge_deletions_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 8fc25c7b5f..ee5a828eda 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -179,7 +179,7 @@ pub fn load_edge_props_from_parquet< Ok(()) } -pub fn load_edges_deletions_from_parquet< +pub fn load_edge_deletions_from_parquet< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( graph: &G, @@ -198,7 +198,7 @@ pub fn load_edges_deletions_from_parquet< for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) + load_edge_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) @@ -305,10 +305,10 @@ mod test { let parquet_file_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources/test/test_data.parquet"); - let col_names: &[&str] = &["src", "dst","time", "weight", "marbles"]; + let col_names: &[&str] = &["src", "dst", "time", "weight", "marbles"]; let df = process_parquet_file_to_df(parquet_file_path.as_path(), col_names).unwrap(); - let expected_names: Vec = vec!["src", "dst","time", "weight", "marbles"] + let expected_names: Vec = vec!["src", "dst", "time", "weight", "marbles"] .iter() .map(|s| s.to_string()) .collect(); diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index aee9871b42..e95eeeba06 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -563,7 +563,7 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None))] - fn load_edges_deletions_from_pandas( + fn load_edge_deletions_from_pandas( &self, df: &PyAny, time: &str, @@ -572,7 +572,7 @@ impl PyPersistentGraph { layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_col) + load_edge_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_col) } /// Load edges deletions from a Parquet file into the graph. @@ -590,7 +590,7 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None))] - fn load_edges_deletions_from_parquet( + fn load_edge_deletions_from_parquet( &self, parquet_path: PathBuf, time: &str, @@ -599,7 +599,7 @@ impl PyPersistentGraph { layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_parquet( + load_edge_deletions_from_parquet( &self.graph, parquet_path.as_path(), time, diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 4859a16691..30fc9ddd04 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -160,7 +160,7 @@ pub fn load_edge_props_from_pandas( Ok(()) } -pub fn load_edges_deletions_from_pandas( +pub fn load_edge_deletions_from_pandas( graph: &GraphStorage, df: &PyAny, time: &str, @@ -177,7 +177,7 @@ pub fn load_edges_deletions_from_pandas( let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df( + load_edge_deletions_from_df( df_view, time, src, From 10653859f4a0d0dd3d8178db4f83e4919e75ebcc Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Wed, 21 Aug 2024 15:21:02 +0100 Subject: [PATCH 16/17] update stubs + black --- python/python/raphtory/__init__.pyi | 1366 ++++++----------- .../python/raphtory/algorithms/__init__.pyi | 1 + python/python/raphtory/graphql/__init__.pyi | 23 +- python/tests/test_algorithms.py | 18 +- python/tests/test_graphql.py | 2 +- 5 files changed, 519 insertions(+), 891 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 2bc0a333fd..0fce22b982 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -8,10 +8,8 @@ ############################################################################### class AlgorithmResult: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ Returns the value corresponding to the provided key @@ -19,7 +17,6 @@ class AlgorithmResult: Arguments: key: The key of type `H` for which the value is to be retrieved. """ - def get_all(self): """ Returns a Dict containing all the nodes (as keys) and their corresponding values (values) or none. @@ -27,10 +24,8 @@ class AlgorithmResult: Returns: A dict of nodes and their values """ - def get_all_values(self): """Returns a a list of all values""" - def get_all_with_names(self): """ Returns a dict with node names and values @@ -38,7 +33,6 @@ class AlgorithmResult: Returns: a dict with node names and values """ - def group_by(self): """ Groups the `AlgorithmResult` by its values. @@ -47,16 +41,12 @@ class AlgorithmResult: A `HashMap` where keys are unique values from the `AlgorithmResult` and values are vectors containing keys of type `H` that share the same value. """ - def max(self): """Returns a tuple of the max result with its key""" - def median(self): """Returns a tuple of the median result with its key""" - def min(self): """Returns a tuple of the min result with its key""" - def sort_by_node(self, reverse=True): """ Sorts by node id in ascending or descending order. @@ -67,7 +57,6 @@ class AlgorithmResult: Returns: A sorted list of tuples containing node names and values. """ - def sort_by_node_name(self, reverse=True): """ The function `sort_by_node_name` sorts a vector of tuples containing a node and an optional @@ -81,7 +70,6 @@ class AlgorithmResult: Returns: The function sort_by_node_name returns a vector of tuples. Each tuple contains a Node and value """ - def sort_by_value(self, reverse=True): """ Sorts the `AlgorithmResult` by its values in ascending or descending order. @@ -92,7 +80,6 @@ class AlgorithmResult: Returns: A sorted vector of tuples containing keys of type `H` and values of type `Y`. """ - def to_df(self): """ Creates a dataframe from the result @@ -100,10 +87,8 @@ class AlgorithmResult: Returns: A `pandas.DataFrame` containing the result """ - def to_string(self): """Returns a formatted string representation of the algorithm.""" - def top_k(self, k, percentage=False, reverse=True): """ Retrieves the top-k elements from the `AlgorithmResult` based on its values. @@ -125,14 +110,12 @@ class ConstProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """ as_dict() -> dict[str, Any] convert the properties view to a python dict """ - def get(self, key): """ get(key: str) -> Any | None @@ -142,21 +125,18 @@ class ConstProperties: get property value by key (returns `None` if key does not exist) """ - def items(self): """ items() -> list[tuple[str, Any]] lists the property keys together with the corresponding value """ - def keys(self): """ keys() -> list[str] lists the available property keys """ - def values(self): """ values() -> list[Any] @@ -164,6 +144,34 @@ class ConstProperties: lists the property values """ +class DiskGraphStorage: + def __init__(self): + """Initialize self. See help(type(self)) for accurate signature.""" + def graph_dir(self): ... + @staticmethod + def load_from_dir(graph_dir): ... + @staticmethod + def load_from_pandas(graph_dir, edge_df, time_col, src_col, dst_col): ... + @staticmethod + def load_from_parquets( + graph_dir, + layer_parquet_cols, + node_properties, + chunk_size, + t_props_chunk_size, + read_chunk_size, + concurrent_files, + num_threads, + node_type_col, + ): ... + def merge_by_sorted_gids(self, other, graph_dir): + """ + Merge this graph with another `DiskGraph`. Note that both graphs should have nodes that are + sorted by their global ids or the resulting graph will be nonsense! + """ + def to_events(self): ... + def to_persistent(self): ... + class Edge: """ PyEdge is a Python class that represents an edge in the graph. @@ -172,7 +180,6 @@ class Edge: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -183,7 +190,6 @@ class Edge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -194,7 +200,6 @@ class Edge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -205,7 +210,6 @@ class Edge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -214,14 +218,12 @@ class Edge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -229,7 +231,6 @@ class Edge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -237,11 +238,9 @@ class Edge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -250,7 +249,6 @@ class Edge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -259,7 +257,6 @@ class Edge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -268,7 +265,6 @@ class Edge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -277,7 +273,6 @@ class Edge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -289,7 +284,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -301,7 +295,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -311,7 +304,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -321,7 +313,6 @@ class Edge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -334,16 +325,11 @@ class Edge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -352,7 +338,6 @@ class Edge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -361,20 +346,15 @@ class Edge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -383,7 +363,6 @@ class Edge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -392,7 +371,6 @@ class Edge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -401,7 +379,6 @@ class Edge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -410,7 +387,6 @@ class Edge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -419,7 +395,6 @@ class Edge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -431,11 +406,9 @@ class Edge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -444,7 +417,6 @@ class Edge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -458,7 +430,6 @@ class Edge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -468,7 +439,6 @@ class Edge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -479,7 +449,6 @@ class Edge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -487,11 +456,9 @@ class Edge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -500,7 +467,6 @@ class Edge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -509,7 +475,6 @@ class Edge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -518,7 +483,6 @@ class Edge: Returns: (int) The time of an exploded edge """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -530,7 +494,6 @@ class Edge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -542,17 +505,15 @@ class Edge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class Edges: """A list of edges that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edges including all events after `start` (exclusive). @@ -563,7 +524,6 @@ class Edges: Returns: A Edges object. """ - def at(self, time): """ Create a view of the Edges including all events at `time`. @@ -574,7 +534,6 @@ class Edges: Returns: A Edges object. """ - def before(self, end): """ Create a view of the Edges including all events before `end` (exclusive). @@ -585,7 +544,6 @@ class Edges: Returns: A Edges object. """ - def collect(self): """ Collect all edges into a list @@ -593,10 +551,8 @@ class Edges: Returns: list[Edge]: the list of edges """ - def count(self): """Returns the number of edges""" - @property def date_time(self): """ @@ -605,14 +561,12 @@ class Edges: Returns: A list of date times. """ - def default_layer(self): """ Return a view of Edges containing only the default edge layer Returns: Edges: The layered view """ - def deletions(self): """ Returns all timestamps of edges where an edge is deleted @@ -620,7 +574,6 @@ class Edges: Returns: A list of lists of unix timestamps """ - def deletions_date_time(self): """ Returns all timestamps of edges where an edge is deleted @@ -628,11 +581,9 @@ class Edges: Returns: A list of lists of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -641,7 +592,6 @@ class Edges: Returns: Earliest date time of the edges. """ - @property def earliest_time(self): """ @@ -650,7 +600,6 @@ class Edges: Returns: Earliest time of the edges. """ - @property def end(self): """ @@ -659,7 +608,6 @@ class Edges: Returns: The latest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def end_date_time(self): """ @@ -668,7 +616,6 @@ class Edges: Returns: The latest datetime that this Edges is valid or None if the Edges is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -680,7 +627,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -692,7 +638,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -702,7 +647,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -712,7 +656,6 @@ class Edges: Returns: Edges: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -725,16 +668,11 @@ class Edges: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edges has the layer `"name"`""" - + """Check if Edges has the layer `"name"`""" def history(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -743,7 +681,6 @@ class Edges: A list of lists unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -752,20 +689,15 @@ class Edges: A list of lists of timestamps. """ - @property def id(self): """Returns all ids of the edges.""" - def is_deleted(self): """Check if the edges are deleted""" - def is_self_loop(self): """Check if the edges are on the same node""" - def is_valid(self): """Check if the edges are valid (i.e. not deleted)""" - @property def latest_date_time(self): """ @@ -774,7 +706,6 @@ class Edges: Returns: Latest date time of the edges. """ - @property def latest_time(self): """ @@ -783,7 +714,6 @@ class Edges: Returns: Latest time of the edges. """ - def layer(self, name): """ Return a view of Edges containing the layer `"name"` @@ -792,7 +722,6 @@ class Edges: Returns: Edges: The layered view """ - @property def layer_name(self): """ @@ -801,7 +730,6 @@ class Edges: Returns: The name of the layer """ - @property def layer_names(self): """ @@ -810,7 +738,6 @@ class Edges: Returns: A list of layer names """ - def layers(self, names): """ Return a view of Edges containing all layers `names` @@ -822,15 +749,12 @@ class Edges: Returns: Edges: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """Returns all properties of the edges""" - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -844,7 +768,6 @@ class Edges: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -854,7 +777,6 @@ class Edges: Returns: A Edges object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -865,7 +787,6 @@ class Edges: Returns: A Edges object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -873,11 +794,9 @@ class Edges: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -886,7 +805,6 @@ class Edges: Returns: The earliest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def start_date_time(self): """ @@ -895,7 +813,6 @@ class Edges: Returns: The earliest datetime that this Edges is valid or None if the Edges is valid for all times. """ - @property def time(self): """ @@ -904,8 +821,9 @@ class Edges: Returns: Time of edge """ - - def to_df(self, include_property_history=True, convert_datetime=False, explode=False): + def to_df( + self, include_property_history=True, convert_datetime=False, explode=False + ): """ Converts the graph's edges into a Pandas DataFrame. @@ -924,7 +842,6 @@ class Edges: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - def valid_layers(self, names): """ Return a view of Edges containing all layers `names` @@ -936,7 +853,6 @@ class Edges: Returns: Edges: The layered view """ - def window(self, start, end): """ Create a view of the Edges including all events between `start` (inclusive) and `end` (exclusive) @@ -948,17 +864,15 @@ class Edges: Returns: r A Edges object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edges""" + """Get the window size (difference between start and end) for this Edges""" class Graph: """A temporal graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -969,7 +883,6 @@ class Graph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -984,7 +897,6 @@ class Graph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -997,7 +909,6 @@ class Graph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -1009,7 +920,6 @@ class Graph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -1020,7 +930,6 @@ class Graph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -1031,7 +940,6 @@ class Graph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -1042,18 +950,8 @@ class Graph: Returns: A GraphView object. """ - - def cache(self, path): - """ - Write Graph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - """ - + def bincode(self): + """Get bincode encoded graph""" def count_edges(self): """ Number of edges in the graph @@ -1061,7 +959,6 @@ class Graph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -1069,7 +966,6 @@ class Graph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -1077,26 +973,12 @@ class Graph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - - @staticmethod - def deserialise(bytes): - """ - Load Graph from serialised bytes. - - Arguments: - bytes (Bytes): The serialised bytes to decode - - Returns: - Graph - """ - @property def earliest_date_time(self): """ @@ -1105,7 +987,6 @@ class Graph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -1114,7 +995,6 @@ class Graph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -1126,7 +1006,6 @@ class Graph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -1135,7 +1014,6 @@ class Graph: Returns: the edges in the graph """ - @property def end(self): """ @@ -1144,7 +1022,6 @@ class Graph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -1153,7 +1030,6 @@ class Graph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1165,7 +1041,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1177,7 +1052,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -1188,7 +1062,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1198,7 +1071,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1208,7 +1080,6 @@ class Graph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1221,7 +1092,6 @@ class Graph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -1230,7 +1100,6 @@ class Graph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -1239,7 +1108,9 @@ class Graph: Returns: the nodes that match the properties name and value """ - + @staticmethod + def from_bincode(bytes): + """Creates a graph from a bincode encoded graph""" def get_all_node_types(self): """ Returns all the node types in the graph. @@ -1247,7 +1118,6 @@ class Graph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -1259,10 +1129,8 @@ class Graph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -1273,7 +1141,6 @@ class Graph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -1289,7 +1156,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -1303,7 +1169,6 @@ class Graph: force (boolean) - An optional boolean flag indicating whether to force the import of the edges. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -1318,7 +1183,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -1332,7 +1196,6 @@ class Graph: force (boolean) - An optional boolean flag indicating whether to force the import of the nodes. """ - def index(self): """ Indexes all node and edge properties. @@ -1342,7 +1205,6 @@ class Graph: Returns: GraphIndex - Returns a GraphIndex """ - def largest_connected_component(self): """ Gives the large connected component of a graph. @@ -1354,7 +1216,6 @@ class Graph: A raphtory graph, which essentially is a sub-graph of the graph `g` """ - @property def latest_date_time(self): """ @@ -1363,7 +1224,6 @@ class Graph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -1372,7 +1232,6 @@ class Graph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -1381,7 +1240,6 @@ class Graph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1393,23 +1251,16 @@ class Graph: Returns: GraphView: The layered view """ - - @staticmethod - def load_cached(path): - """ - Load Graph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - Graph - """ - - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_pandas( + self, + df, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from a Pandas DataFrame. @@ -1417,16 +1268,27 @@ class Graph: df (Dataframe): The Pandas DataFrame containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from parquet file @@ -1434,183 +1296,200 @@ class Graph: parquet_path (str): Parquet file or directory of Parquet files path containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_pandas( + self, + df, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Pandas DataFrame into the graph. Arguments: df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dateframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_parquet( + self, + parquet_path, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing edges + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ @staticmethod - def load_from_file(path): + def load_from_file(path, force=False): """ - Load Graph from a file. + Loads a graph from the given path. Arguments: - path (str): The path to the file. + path (str): The path to the graph. Returns: - Graph - """ - - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from a Pandas DataFrame. - - Args: - edge_df (pandas.DataFrame): The DataFrame containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. + Graph: The loaded graph. """ - - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from Parquet file. - - Args: - edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. - """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, + df, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a Pandas DataFrame. Arguments: df (Dataframe): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_parquet( + self, + parquet_path, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a parquet file. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas( + self, + df, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. Arguments: df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_parquet( + self, + parquet_path, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files containing the nodes - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -1618,7 +1497,6 @@ class Graph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -1629,7 +1507,6 @@ class Graph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -1638,10 +1515,10 @@ class Graph: Returns: the nodes in the graph """ - + def persist_as_disk_graph(self, graph_dir): + """save graph in disk_graph format and memory map the result""" def persistent_graph(self): """Get persistent graph""" - @property def properties(self): """ @@ -1651,7 +1528,6 @@ class Graph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -1665,23 +1541,16 @@ class Graph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ - Saves the Graph to the given path. + Saves the graph to the given path. Arguments: - path (str): The path to the file. - """ - - def serialise(self): - """ - Serialise Graph to bytes. + path (str): The path to the graph. Returns: - Bytes + None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -1691,7 +1560,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -1702,7 +1570,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -1710,7 +1577,6 @@ class Graph: Arguments: """ - @property def start(self): """ @@ -1719,7 +1585,6 @@ class Graph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -1728,7 +1593,6 @@ class Graph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -1739,7 +1603,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -1750,8 +1613,15 @@ class Graph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_disk_graph(self, graph_dir): ... + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -1769,8 +1639,18 @@ class Graph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -1794,11 +1674,9 @@ class Graph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -1809,7 +1687,6 @@ class Graph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1821,8 +1698,16 @@ class Graph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -1837,7 +1722,6 @@ class Graph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -1849,13 +1733,9 @@ class Graph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" - - def write_updates(self): - """Persist the new updates by appending them to the cache file.""" + """Get the window size (difference between start and end) for this GraphView""" class GraphIndex: """ @@ -1866,8 +1746,9 @@ class GraphIndex: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - - def fuzzy_search_edges(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_edges( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for edges which match the given query. This uses Tantivy's fuzzy search. @@ -1881,8 +1762,9 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - - def fuzzy_search_nodes(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_nodes( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for nodes which match the given query. This uses Tantivy's fuzzy search. If you would like to better understand the query syntax, please visit our documentation at https://docs.raphtory.com @@ -1897,7 +1779,6 @@ class GraphIndex: Returns: A list of nodes which match the query. The list will be empty if no nodes match. """ - def search_edges(self, query, limit=25, offset=0): """ Searches for edges which match the given query. This uses Tantivy's exact search. @@ -1910,7 +1791,6 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - def search_nodes(self, query, limit=25, offset=0): """ Searches for nodes which match the given query. This uses Tantivy's exact search. @@ -1925,10 +1805,8 @@ class GraphIndex: """ class MutableEdge: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties, layer=None): """ Add constant properties to an edge in the graph. @@ -1944,7 +1822,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None, layer=None): """ Add updates to an edge in the graph at a specified time. @@ -1959,7 +1836,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -1970,7 +1846,6 @@ class MutableEdge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -1981,7 +1856,6 @@ class MutableEdge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -1992,7 +1866,6 @@ class MutableEdge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -2001,14 +1874,12 @@ class MutableEdge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -2016,7 +1887,6 @@ class MutableEdge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -2024,11 +1894,9 @@ class MutableEdge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -2037,7 +1905,6 @@ class MutableEdge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -2046,7 +1913,6 @@ class MutableEdge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -2055,7 +1921,6 @@ class MutableEdge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -2064,7 +1929,6 @@ class MutableEdge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2076,7 +1940,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2088,7 +1951,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2098,7 +1960,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2108,7 +1969,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2121,16 +1981,11 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2139,7 +1994,6 @@ class MutableEdge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2148,20 +2002,15 @@ class MutableEdge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -2170,7 +2019,6 @@ class MutableEdge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -2179,7 +2027,6 @@ class MutableEdge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -2188,7 +2035,6 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -2197,7 +2043,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -2206,7 +2051,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2218,11 +2062,9 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -2231,7 +2073,6 @@ class MutableEdge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2245,7 +2086,6 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2255,7 +2095,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2266,7 +2105,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2274,11 +2112,9 @@ class MutableEdge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -2287,7 +2123,6 @@ class MutableEdge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -2296,7 +2131,6 @@ class MutableEdge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -2305,7 +2139,6 @@ class MutableEdge: Returns: (int) The time of an exploded edge """ - def update_constant_properties(self, properties, layer=None): """ Update constant properties of an edge in the graph overwriting existing values. @@ -2321,7 +2154,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2333,7 +2165,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -2345,16 +2176,13 @@ class MutableEdge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class MutableNode: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Add constant properties to a node in the graph. @@ -2369,7 +2197,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None): """ Add updates to a node in the graph at a specified time. @@ -2384,7 +2211,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2395,7 +2221,6 @@ class MutableNode: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2406,7 +2231,6 @@ class MutableNode: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2417,14 +2241,12 @@ class MutableNode: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2432,7 +2254,6 @@ class MutableNode: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2441,7 +2262,6 @@ class MutableNode: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2450,7 +2270,6 @@ class MutableNode: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2460,7 +2279,6 @@ class MutableNode: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2469,7 +2287,6 @@ class MutableNode: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2478,7 +2295,6 @@ class MutableNode: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2490,7 +2306,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2502,7 +2317,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2512,7 +2326,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2522,7 +2335,6 @@ class MutableNode: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2535,10 +2347,8 @@ class MutableNode: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2546,7 +2356,6 @@ class MutableNode: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2555,7 +2364,6 @@ class MutableNode: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2565,7 +2373,6 @@ class MutableNode: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -2573,7 +2380,6 @@ class MutableNode: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -2583,7 +2389,6 @@ class MutableNode: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -2593,7 +2398,6 @@ class MutableNode: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -2605,7 +2409,6 @@ class MutableNode: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -2614,7 +2417,6 @@ class MutableNode: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -2623,7 +2425,6 @@ class MutableNode: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -2635,7 +2436,6 @@ class MutableNode: Returns: Node: The layered view """ - @property def name(self): """ @@ -2644,7 +2444,6 @@ class MutableNode: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -2654,11 +2453,9 @@ class MutableNode: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -2666,7 +2463,6 @@ class MutableNode: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -2676,7 +2472,6 @@ class MutableNode: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -2686,7 +2481,6 @@ class MutableNode: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -2695,7 +2489,6 @@ class MutableNode: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2709,7 +2502,6 @@ class MutableNode: Returns: A `WindowSet` object. """ - def set_node_type(self, new_type): """ Set the type on the node. This only works if the type has not been previously set, otherwise will @@ -2721,7 +2513,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2731,7 +2522,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2742,7 +2532,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2750,7 +2539,6 @@ class MutableNode: Arguments: """ - @property def start(self): """ @@ -2759,7 +2547,6 @@ class MutableNode: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -2768,7 +2555,6 @@ class MutableNode: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def update_constant_properties(self, properties): """ Update constant properties of a node in the graph overwriting existing values. @@ -2783,7 +2569,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -2795,7 +2580,6 @@ class MutableNode: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -2807,17 +2591,15 @@ class MutableNode: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Node: """A node (or node) in the graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2828,7 +2610,6 @@ class Node: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2839,7 +2620,6 @@ class Node: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2850,14 +2630,12 @@ class Node: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2865,7 +2643,6 @@ class Node: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2874,7 +2651,6 @@ class Node: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2883,7 +2659,6 @@ class Node: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2893,7 +2668,6 @@ class Node: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2902,7 +2676,6 @@ class Node: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2911,7 +2684,6 @@ class Node: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2923,7 +2695,6 @@ class Node: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2935,7 +2706,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2945,7 +2715,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2955,7 +2724,6 @@ class Node: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2968,10 +2736,8 @@ class Node: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2979,7 +2745,6 @@ class Node: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2988,7 +2753,6 @@ class Node: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2998,7 +2762,6 @@ class Node: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -3006,7 +2769,6 @@ class Node: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -3016,7 +2778,6 @@ class Node: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -3026,7 +2787,6 @@ class Node: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -3038,7 +2798,6 @@ class Node: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -3047,7 +2806,6 @@ class Node: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -3056,7 +2814,6 @@ class Node: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -3068,7 +2825,6 @@ class Node: Returns: Node: The layered view """ - @property def name(self): """ @@ -3077,7 +2833,6 @@ class Node: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -3087,11 +2842,9 @@ class Node: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -3099,7 +2852,6 @@ class Node: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -3109,7 +2861,6 @@ class Node: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3119,7 +2870,6 @@ class Node: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3128,7 +2878,6 @@ class Node: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3142,7 +2891,6 @@ class Node: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3152,7 +2900,6 @@ class Node: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3163,7 +2910,6 @@ class Node: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3171,7 +2917,6 @@ class Node: Arguments: """ - @property def start(self): """ @@ -3180,7 +2925,6 @@ class Node: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -3189,7 +2933,6 @@ class Node: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -3201,7 +2944,6 @@ class Node: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -3213,17 +2955,15 @@ class Node: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Nodes: """A list of nodes that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Nodes including all events after `start` (exclusive). @@ -3234,7 +2974,6 @@ class Nodes: Returns: A Nodes object. """ - def at(self, time): """ Create a view of the Nodes including all events at `time`. @@ -3245,7 +2984,6 @@ class Nodes: Returns: A Nodes object. """ - def before(self, end): """ Create a view of the Nodes including all events before `end` (exclusive). @@ -3256,7 +2994,6 @@ class Nodes: Returns: A Nodes object. """ - def collect(self): """ Collect all nodes into a list @@ -3264,14 +3001,12 @@ class Nodes: Returns: list[Node]: the list of nodes """ - def default_layer(self): """ Return a view of Nodes containing only the default edge layer Returns: Nodes: The layered view """ - def degree(self): """ Returns the number of edges of the nodes @@ -3279,7 +3014,6 @@ class Nodes: Returns: An iterator of the number of edges of the nodes """ - @property def earliest_date_time(self): """ @@ -3288,11 +3022,9 @@ class Nodes: Returns: Earliest time of the nodes. """ - @property def earliest_time(self): """Returns an iterator over the nodes earliest time""" - @property def edges(self): """ @@ -3302,7 +3034,6 @@ class Nodes: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -3311,7 +3042,6 @@ class Nodes: Returns: The latest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def end_date_time(self): """ @@ -3320,7 +3050,6 @@ class Nodes: Returns: The latest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3332,7 +3061,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3344,7 +3072,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3354,7 +3081,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3364,7 +3090,6 @@ class Nodes: Returns: Nodes: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3377,10 +3102,8 @@ class Nodes: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Nodes has the layer `"name"`""" - + """Check if Nodes has the layer `"name"`""" def history(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3389,7 +3112,6 @@ class Nodes: A list of unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3398,11 +3120,9 @@ class Nodes: An list of timestamps. """ - @property def id(self): """Returns an iterator over the nodes ids""" - def in_degree(self): """ Returns the number of in edges of the nodes @@ -3410,7 +3130,6 @@ class Nodes: Returns: An iterator of the number of in edges of the nodes """ - @property def in_edges(self): """ @@ -3420,7 +3139,6 @@ class Nodes: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -3430,7 +3148,6 @@ class Nodes: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -3439,11 +3156,9 @@ class Nodes: Returns: Latest date time of the nodes. """ - @property def latest_time(self): """Returns an iterator over the nodes latest time""" - def layer(self, name): """ Return a view of Nodes containing the layer `"name"` @@ -3452,7 +3167,6 @@ class Nodes: Returns: Nodes: The layered view """ - def layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3464,11 +3178,9 @@ class Nodes: Returns: Nodes: The layered view """ - @property def name(self): """Returns an iterator over the nodes name""" - @property def neighbours(self): """ @@ -3478,11 +3190,9 @@ class Nodes: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Returns the number of out edges of the nodes @@ -3490,7 +3200,6 @@ class Nodes: Returns: An iterator of the number of out edges of the nodes """ - @property def out_edges(self): """ @@ -3500,7 +3209,6 @@ class Nodes: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3510,7 +3218,6 @@ class Nodes: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3519,7 +3226,6 @@ class Nodes: Returns: A List of properties """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3533,7 +3239,6 @@ class Nodes: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3543,7 +3248,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3554,7 +3258,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3562,7 +3265,6 @@ class Nodes: Arguments: """ - @property def start(self): """ @@ -3571,7 +3273,6 @@ class Nodes: Returns: The earliest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def start_date_time(self): """ @@ -3580,7 +3281,6 @@ class Nodes: Returns: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df(self, include_property_history=False, convert_datetime=False): """ Converts the graph's nodes into a Pandas DataFrame. @@ -3597,10 +3297,7 @@ class Nodes: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - - def type_filter(self, node_types): - ... - + def type_filter(self, node_types): ... def valid_layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3612,7 +3309,6 @@ class Nodes: Returns: Nodes: The layered view """ - def window(self, start, end): """ Create a view of the Nodes including all events between `start` (inclusive) and `end` (exclusive) @@ -3624,17 +3320,15 @@ class Nodes: Returns: r A Nodes object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Nodes""" + """Get the window size (difference between start and end) for this Nodes""" class PersistentGraph: """A temporal graph that allows edges and nodes to be deleted.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -3645,7 +3339,6 @@ class PersistentGraph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -3660,7 +3353,6 @@ class PersistentGraph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -3674,7 +3366,6 @@ class PersistentGraph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -3686,7 +3377,6 @@ class PersistentGraph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -3697,7 +3387,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -3708,7 +3397,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -3719,18 +3407,8 @@ class PersistentGraph: Returns: A GraphView object. """ - - def cache(self, path): - """ - Write PersistentGraph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - """ - + def bincode(self): + """Get bincode encoded graph""" def count_edges(self): """ Number of edges in the graph @@ -3738,7 +3416,6 @@ class PersistentGraph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -3746,7 +3423,6 @@ class PersistentGraph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -3754,14 +3430,12 @@ class PersistentGraph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - def delete_edge(self, timestamp, src, dst, layer=None): """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -3775,19 +3449,6 @@ class PersistentGraph: Returns: None or a GraphError if the edge could not be deleted """ - - @staticmethod - def deserialise(bytes): - """ - Load PersistentGraph from serialised bytes. - - Arguments: - bytes (Bytes): The serialised bytes to decode - - Returns: - PersistentGraph - """ - @property def earliest_date_time(self): """ @@ -3796,7 +3457,6 @@ class PersistentGraph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -3805,7 +3465,6 @@ class PersistentGraph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -3817,7 +3476,6 @@ class PersistentGraph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -3826,7 +3484,6 @@ class PersistentGraph: Returns: the edges in the graph """ - @property def end(self): """ @@ -3835,7 +3492,6 @@ class PersistentGraph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -3844,10 +3500,8 @@ class PersistentGraph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def event_graph(self): """Get event graph""" - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3859,7 +3513,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3871,7 +3524,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -3882,7 +3534,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3892,7 +3543,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3902,7 +3552,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3915,7 +3564,6 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -3924,7 +3572,6 @@ class PersistentGraph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -3933,7 +3580,9 @@ class PersistentGraph: Returns: the nodes that match the properties name and value """ - + @staticmethod + def from_bincode(bytes): + """Creates a graph from a bincode encoded graph""" def get_all_node_types(self): """ Returns all the node types in the graph. @@ -3941,7 +3590,6 @@ class PersistentGraph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -3953,10 +3601,8 @@ class PersistentGraph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -3967,7 +3613,6 @@ class PersistentGraph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -3983,7 +3628,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -3997,7 +3641,6 @@ class PersistentGraph: force (boolean) - An optional boolean flag indicating whether to force the import of the edges. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -4012,7 +3655,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -4026,7 +3668,6 @@ class PersistentGraph: force (boolean) - An optional boolean flag indicating whether to force the import of the nodes. """ - def index(self): """ Indexes all node and edge properties. @@ -4036,7 +3677,6 @@ class PersistentGraph: Returns: GraphIndex - Returns a GraphIndex """ - @property def latest_date_time(self): """ @@ -4045,7 +3685,6 @@ class PersistentGraph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -4054,7 +3693,6 @@ class PersistentGraph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -4063,7 +3701,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4075,23 +3712,54 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - @staticmethod - def load_cached(path): + def load_edge_deletions_from_pandas( + self, df, time, src, dst, layer=None, layer_col=None + ): """ - Load PersistentGraph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. + Load edges deletions from a Pandas DataFrame into the graph. Arguments: - path (str): The path to the cache file - + df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - PersistentGraph + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_deletions_from_parquet( + self, parquet_path, time, src, dst, layer=None, layer_col=None + ): """ + Load edges deletions from a Parquet file into the graph. - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Arguments: + parquet_path (str): Parquet file or directory of Parquet files path containing node information. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + time (str): The column name for the update timestamps. + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_pandas( + self, + df, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from a Pandas DataFrame. @@ -4099,16 +3767,27 @@ class PersistentGraph: df (Dataframe): The Pandas DataFrame containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from parquet file @@ -4116,215 +3795,200 @@ class PersistentGraph: parquet_path (str): Parquet file or directory of Parquet files path containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_deletions_from_pandas(self, df, src, dst, time, layer=None, layer_in_df=True): - """ - Load edges deletions from a Pandas DataFrame into the graph. - - Arguments: - df (Dataframe): The Pandas DataFrame containing the edges. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edges_deletions_from_parquet(self, parquet_path, src, dst, time, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. """ - Load edges deletions from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing node information. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_pandas( + self, + df, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Pandas DataFrame into the graph. Arguments: df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_parquet( + self, + parquet_path, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing edges + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ @staticmethod - def load_from_file(path): + def load_from_file(path, force=False): """ - Load PersistentGraph from a file. + Loads a graph from the given path. Arguments: - path (str): The path to the file. + path (str): The path to the graph. Returns: - PersistentGraph - """ - - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from a Pandas DataFrame. - - Args: - edge_df (pandas.DataFrame): The DataFrame containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. + Graph: The loaded graph. """ - - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from Parquet file. - - Args: - edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. - """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, + df, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a Pandas DataFrame. Arguments: df (Dataframe): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_parquet( + self, + parquet_path, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a parquet file. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas( + self, + df, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. Arguments: df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_parquet( + self, + parquet_path, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files containing the nodes - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -4332,7 +3996,6 @@ class PersistentGraph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -4343,7 +4006,6 @@ class PersistentGraph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -4352,7 +4014,6 @@ class PersistentGraph: Returns: the nodes in the graph """ - @property def properties(self): """ @@ -4362,7 +4023,6 @@ class PersistentGraph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -4376,23 +4036,16 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ - Saves the PersistentGraph to the given path. + Saves the graph to the given path. Arguments: - path (str): The path to the file. - """ - - def serialise(self): - """ - Serialise PersistentGraph to bytes. + path (str): The path to the graph. Returns: - Bytes + None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -4402,7 +4055,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -4413,7 +4065,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -4421,7 +4072,6 @@ class PersistentGraph: Arguments: """ - @property def start(self): """ @@ -4430,7 +4080,6 @@ class PersistentGraph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -4439,7 +4088,6 @@ class PersistentGraph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -4450,7 +4098,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -4461,8 +4108,14 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -4480,8 +4133,18 @@ class PersistentGraph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -4505,11 +4168,9 @@ class PersistentGraph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -4520,7 +4181,6 @@ class PersistentGraph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4532,8 +4192,16 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -4548,7 +4216,6 @@ class PersistentGraph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -4560,27 +4227,20 @@ class PersistentGraph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" - - def write_updates(self): - """Persist the new updates by appending them to the cache file.""" + """Get the window size (difference between start and end) for this GraphView""" class Properties: """A view of the properties of an entity""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """Convert properties view to a dict""" - @property def constant(self): """Get a view of the constant properties (meta-data) only.""" - def get(self, key): """ Get property value. @@ -4588,17 +4248,13 @@ class Properties: First searches temporal properties and returns latest value if it exists. If not, it falls back to static properties. """ - def items(self): """Get a list of key-value pairs""" - def keys(self): """Get the names for all properties (includes temporal and static properties)""" - @property def temporal(self): """Get a view of the temporal properties only.""" - def values(self): """ Get the values of the properties @@ -4612,12 +4268,9 @@ class PyDirection: def __init__(self, direction): """Initialize self. See help(type(self)) for accurate signature.""" - - def as_str(self): - ... + def as_str(self): ... class PyGraphEncoder: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" @@ -4626,10 +4279,8 @@ class TemporalProp: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def at(self, t): """Get the value of the property at time `t`""" - def average(self): """ Compute the average of all property values. Alias for mean(). @@ -4637,7 +4288,6 @@ class TemporalProp: Returns: Prop: The average of each property values, or None if count is zero. """ - def count(self): """ Count the number of properties. @@ -4645,19 +4295,14 @@ class TemporalProp: Returns: int: The number of properties. """ - def history(self): """Get the timestamps at which the property was updated""" - def history_date_time(self): """Get the timestamps at which the property was updated""" - def items(self): """List update timestamps and corresponding property values""" - def items_date_time(self): """List update timestamps and corresponding property values""" - def max(self): """ Find the maximum property value and its associated time. @@ -4665,7 +4310,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the maximum property value. """ - def mean(self): """ Compute the mean of all property values. Alias for mean(). @@ -4673,7 +4317,6 @@ class TemporalProp: Returns: Prop: The mean of each property values, or None if count is zero. """ - def median(self): """ Compute the median of all property values. @@ -4681,7 +4324,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the median property value, or None if empty """ - def min(self): """ Find the minimum property value and its associated time. @@ -4689,10 +4331,7 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the minimum property value. """ - - def ordered_dedupe(self, latest_time): - ... - + def ordered_dedupe(self, latest_time): ... def sum(self): """ Compute the sum of all property values. @@ -4700,13 +4339,9 @@ class TemporalProp: Returns: Prop: The sum of all property values. """ - - def unique(self): - ... - + def unique(self): ... def value(self): """Get the latest value of the property""" - def values(self): """Get the property values for each update""" @@ -4715,7 +4350,6 @@ class TemporalProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ get(key: str) -> Optional[TemporalProp] @@ -4725,7 +4359,6 @@ class TemporalProperties: Returns: the property view if it exists, otherwise `None` """ - def histories(self): """ Get the histories of all properties @@ -4733,7 +4366,6 @@ class TemporalProperties: Returns: dict[str, list[(int, Any)]]: the mapping of property keys to histories """ - def histories_date_time(self): """ Get the histories of all properties @@ -4741,13 +4373,10 @@ class TemporalProperties: Returns: dict[str, list[(datetime, Any)]]: the mapping of property keys to histories """ - def items(self): """List the property keys together with the corresponding values""" - def keys(self): """List the available property keys""" - def latest(self): """ Get the latest value of all properties @@ -4755,7 +4384,6 @@ class TemporalProperties: Returns: dict[str, Any]: the mapping of property keys to latest values """ - def values(self): """ List the values of the properties diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index ddd851779c..2f58811862 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -73,6 +73,7 @@ def cohesive_fruchterman_reingold( ): """Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes""" +def connected_components(g): ... def degree_centrality(g, threads=None): """ Computes the degree centrality of all nodes in the graph. The values are normalized diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index 9eab7945b1..b082a9c50c 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -37,7 +37,6 @@ class RaphtoryClient: def __init__(self, url): """Initialize self. See help(type(self)) for accurate signature.""" - def copy_graph(self, path, new_path): """ Copy graph from a path `path` on the server to a `new_path` on the server @@ -49,7 +48,6 @@ class RaphtoryClient: Returns: Copy status as boolean """ - def delete_graph(self, path): """ Delete graph from a path `path` on the server @@ -60,7 +58,6 @@ class RaphtoryClient: Returns: Delete status as boolean """ - def is_server_online(self): """ Check if the server is online. @@ -68,7 +65,6 @@ class RaphtoryClient: Returns: Returns true if server is online otherwise false. """ - def move_graph(self, path, new_path): """ Move graph from a path `path` on the server to a `new_path` on the server @@ -91,7 +87,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response. """ - def receive_graph(self, path): """ Receive graph from a path `path` on the server @@ -102,7 +97,6 @@ class RaphtoryClient: Returns: Graph as string """ - def send_graph(self, path, graph, overwrite=False): """ Send a graph to the server @@ -115,7 +109,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response after executing the mutation. """ - def upload_graph(self, path, file_path, overwrite=False): """ Upload graph file from a path `file_path` on the client @@ -132,9 +125,15 @@ class RaphtoryClient: class RaphtoryServer: """A class for defining and running a Raphtory GraphQL server""" - def __init__(self, work_dir, cache_capacity=None, cache_tti_seconds=None, log_level=None, config_path=None): + def __init__( + self, + work_dir, + cache_capacity=None, + cache_tti_seconds=None, + log_level=None, + config_path=None, + ): """Initialize self. See help(type(self)) for accurate signature.""" - def run(self, port=1736, timeout_ms=...): """ Run the server until completion. @@ -142,7 +141,6 @@ class RaphtoryServer: Arguments: * `port`: the port to use (defaults to 1736). """ - def start(self, port=1736, timeout_ms=None): """ Start the server and return a handle to it. @@ -218,9 +216,6 @@ class RunningRaphtoryServer: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - - def get_client(self): - ... - + def get_client(self): ... def stop(self): """Stop the server and wait for it to finish""" diff --git a/python/tests/test_algorithms.py b/python/tests/test_algorithms.py index 5f671e82c2..5fccc4851e 100644 --- a/python/tests/test_algorithms.py +++ b/python/tests/test_algorithms.py @@ -258,7 +258,11 @@ def test_temporal_reachability(): actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5]) expected = { "1": [(11, "start")], - "2": [(11, "1"), (11, "start"), (12, "1"), ], + "2": [ + (11, "1"), + (11, "start"), + (12, "1"), + ], "3": [], "4": [(12, "2")], "5": [(13, "2")], @@ -321,12 +325,12 @@ def test_single_source_shortest_path(): "4": ["1", "4"], } assert ( - res_two.get_all_with_names() - == {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]} - ) or ( - res_two.get_all_with_names() - == {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]} - ) + res_two.get_all_with_names() + == {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]} + ) or ( + res_two.get_all_with_names() + == {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]} + ) def test_dijsktra_shortest_paths(): diff --git a/python/tests/test_graphql.py b/python/tests/test_graphql.py index 55a32d5f79..bac9f7fb72 100644 --- a/python/tests/test_graphql.py +++ b/python/tests/test_graphql.py @@ -610,7 +610,7 @@ def test_receive_graph_succeeds_if_graph_found(): received_graph = client.query(query)["receiveGraph"] decoded_bytes = base64.b64decode(received_graph) - g = Graph.deserialise(decoded_bytes); + g = Graph.deserialise(decoded_bytes) assert g.nodes.name == ["ben", "hamza", "haaroon"] From 6c5c5647409f7cdf3d461daf94458fb9d0e93241 Mon Sep 17 00:00:00 2001 From: miratepuffin Date: Wed, 21 Aug 2024 16:29:32 +0100 Subject: [PATCH 17/17] fixed after merge --- python/python/raphtory/__init__.pyi | 4 ++-- raphtory/src/python/graph/graph_with_deletions.rs | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 0fce22b982..d2fa738250 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1649,7 +1649,7 @@ class Graph: edge_label=None, colour_nodes_by_type=False, notebook=False, - **kwargs + **kwargs, ): """ Draw a graph with PyVis. @@ -4143,7 +4143,7 @@ class PersistentGraph: edge_label=None, colour_nodes_by_type=False, notebook=False, - **kwargs + **kwargs, ): """ Draw a graph with PyVis. diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index c92a4c8779..654350ce80 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -533,7 +533,15 @@ impl PyPersistentGraph { layer: Option<&str>, layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edge_deletions_from_pandas(&self.graph.0, df, time, src, dst, layer, layer_col) + load_edge_deletions_from_pandas( + self.graph.core_graph(), + df, + time, + src, + dst, + layer, + layer_col, + ) } /// Load edges deletions from a Parquet file into the graph.