Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add largest connected component method #1562

Merged
merged 12 commits into from
Apr 14, 2024
8 changes: 8 additions & 0 deletions python/tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ def test_connected_components():
assert actual.get("1") == 1


def test_largest_connected_component():
g = gen_graph()
actual = g.largest_connected_component()
expected = ["1", "2", "3", "4", "5", "6", "7", "8"]
for node in expected:
assert actual.has_node(node)


def test_in_components():
g = gen_graph()
actual = algorithms.in_components(g).get_all_with_names()
Expand Down
143 changes: 143 additions & 0 deletions raphtory/src/algorithms/components/lcc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use crate::{
algorithms::components::connected_components::weakly_connected_components,
db::{
api::view::{GraphViewOps, StaticGraphViewOps},
graph::views::node_subgraph::NodeSubgraph,
},
prelude::Graph,
};

/// Gives the large connected component of a graph.
/// The large connected component is the largest (i.e., with the highest number of nodes)
/// connected sub-graph of the network.
///
/// # Example Usage:
///
/// g.largest_connected_component()
///
/// # Returns:
///
/// A raphtory graph, which essentially is a sub-graph of the graph `g`
///
pub trait LargestConnectedComponent {
fn largest_connected_component(&self) -> NodeSubgraph<Self>
where
Self: StaticGraphViewOps;
}

impl LargestConnectedComponent for Graph {
fn largest_connected_component(&self) -> NodeSubgraph<Self>
where
Self: StaticGraphViewOps,
{
let mut connected_components_map =
weakly_connected_components(self, usize::MAX, None).group_by();
let mut lcc_key = 0;
let mut key_length = 0;
let mut is_tie = false;

for (key, value) in &connected_components_map {
let length = value.len();
if length > key_length {
key_length = length;
lcc_key = *key;
is_tie = false;
} else if length == key_length {
is_tie = true
}
}
if is_tie {
println!("Warning: The graph has two or more connected components that are both the largest. \
The returned component has been picked arbitrarily.");
}
return match connected_components_map.remove(&lcc_key) {
Some(nodes) => self.subgraph(nodes),
None => self.subgraph(self.nodes()),
};
}
}

#[cfg(test)]
mod largest_connected_component_test {
use super::*;
use crate::{
db::api::view::GraphViewOps,
prelude::{AdditionOps, Graph, NO_PROPS},
};

#[test]
fn test_empty_graph() {
let graph = Graph::new();
let subgraph = graph.largest_connected_component();
assert!(
subgraph.is_empty(),
"The subgraph of an empty graph should be empty"
);
}

#[test]
fn test_single_connected_component() {
let graph = Graph::new();
let edges = vec![(1, 1, 2), (2, 2, 1), (3, 3, 1)];
for (ts, src, dst) in edges {
graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap();
}
let subgraph = graph.largest_connected_component();

let expected_nodes = vec![1, 2, 3];
for node in expected_nodes {
assert_eq!(
subgraph.has_node(node),
true,
"Node {} should be in the largest connected component.",
node
);
}
assert_eq!(subgraph.count_nodes(), 3);
}

#[test]
fn test_multiple_connected_components() {
let graph = Graph::new();
let edges = vec![
(1, 1, 2),
(2, 2, 1),
(3, 3, 1),
(1, 10, 11),
(2, 20, 21),
(3, 30, 31),
];
for (ts, src, dst) in edges {
graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap();
}
let subgraph = graph.largest_connected_component();
let expected_nodes = vec![1, 2, 3];
for node in expected_nodes {
assert_eq!(
subgraph.has_node(node),
true,
"Node {} should be in the largest connected component.",
node
);
}
assert_eq!(subgraph.count_nodes(), 3);
}

#[test]
fn test_same_size_connected_components() {
let graph = Graph::new();
let edges = vec![
(1, 1, 2),
(1, 2, 1),
(1, 3, 1),
(1, 5, 6),
(1, 11, 12),
(1, 12, 11),
(1, 13, 11),
];
for (ts, src, dst) in edges {
graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap();
}
let _subgraph = graph.largest_connected_component();
}
}
2 changes: 2 additions & 0 deletions raphtory/src/algorithms/components/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod connected_components;
mod in_components;
mod lcc;
mod out_components;
mod scc;

pub use connected_components::weakly_connected_components;
pub use in_components::in_components;
pub use lcc::LargestConnectedComponent;
pub use out_components::out_components;
pub use scc::strongly_connected_components;
15 changes: 14 additions & 1 deletion raphtory/src/python/graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
//! In Python, this class wraps around the rust graph.
use super::utils;
use crate::{
algorithms::components::LargestConnectedComponent,
core::{entities::nodes::node_ref::NodeRef, utils::errors::GraphError, ArcStr},
db::{
api::view::internal::{CoreGraphOps, DynamicGraph, IntoDynamic, MaterializedGraph},
graph::{edge::EdgeView, node::NodeView},
graph::{edge::EdgeView, node::NodeView, views::node_subgraph::NodeSubgraph},
},
prelude::*,
python::{
Expand Down Expand Up @@ -366,6 +367,18 @@ impl PyGraph {
Ok(PyBytes::new(py, &bytes))
}

/// Gives the large connected component of a graph.
///
/// # Example Usage:
/// g.largest_connected_component()
///
/// # Returns:
/// A raphtory graph, which essentially is a sub-graph of the graph `g`
///
pub fn largest_connected_component(&self) -> NodeSubgraph<Graph> {
self.graph.largest_connected_component()
}

/// Get persistent graph
pub fn persistent_graph<'py>(&'py self) -> PyResult<Py<PyPersistentGraph>> {
PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph())
Expand Down
Loading