From 7390ae2f8e25ec65a222c5a868942bb67615624d Mon Sep 17 00:00:00 2001
From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com>
Date: Wed, 23 Oct 2024 13:16:09 -0400
Subject: [PATCH] Address Leiden clustering generating too many clusters
 (#4730)

Our implementation of Leiden was generating too many clusters.  This was not obvious in smaller graphs, but as the graphs get larger the problem became more noticeable.

The Leiden loop was terminating if the modularity stopped improving.  But the Leiden algorithm as defined in the paper allows the refinement phase to reduce modularity in order to improve the quality of the clusters.  The convergence criteria defined in the paper was based on making no changes on the iteration rather than strictly monitoring modularity change.

Updating this criteria results in the Leiden algorithm running more iterations and converging on better answers.

Closes #4529

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Naim (https://github.com/naimnv)
  - Joseph Nke (https://github.com/jnke2016)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: https://github.com/rapidsai/cugraph/pull/4730
---
 cpp/src/community/detail/refine.hpp           |  3 +--
 cpp/src/community/detail/refine_impl.cuh      |  9 +++++--
 cpp/src/community/detail/refine_mg_v32_e32.cu |  6 ++---
 cpp/src/community/detail/refine_mg_v64_e64.cu |  6 ++---
 cpp/src/community/detail/refine_sg_v32_e32.cu |  6 ++---
 cpp/src/community/detail/refine_sg_v64_e64.cu |  6 ++---
 cpp/src/community/leiden_impl.cuh             | 26 ++++++++++++-------
 7 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/cpp/src/community/detail/refine.hpp b/cpp/src/community/detail/refine.hpp
index a60efee887f..429e0e9e6c2 100644
--- a/cpp/src/community/detail/refine.hpp
+++ b/cpp/src/community/detail/refine.hpp
@@ -46,8 +46,7 @@ refine_clustering(
   rmm::device_uvector<typename graph_view_t::vertex_type>&& next_clusters_v,
   edge_src_property_t<graph_view_t, weight_t> const& src_vertex_weights_cache,
   edge_src_property_t<graph_view_t, typename graph_view_t::vertex_type> const& src_clusters_cache,
-  edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache,
-  bool up_down);
+  edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache);
 
 }
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
index 272e3d71f83..62b66ed5f41 100644
--- a/cpp/src/community/detail/refine_impl.cuh
+++ b/cpp/src/community/detail/refine_impl.cuh
@@ -150,8 +150,7 @@ refine_clustering(
   edge_src_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
     src_louvain_assignment_cache,
   edge_dst_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
-    dst_louvain_assignment_cache,
-  bool up_down)
+    dst_louvain_assignment_cache)
 {
   const weight_t POSITIVE_GAIN = 1e-6;
   using vertex_t               = typename GraphViewType::vertex_type;
@@ -230,6 +229,7 @@ refine_clustering(
     cugraph::reduce_op::plus<weight_t>{},
     weighted_cut_of_vertices_to_louvain.begin());
 
+  // FIXME: Consider using bit mask logic here.  Would reduce memory by 8x
   rmm::device_uvector<uint8_t> singleton_and_connected_flags(
     graph_view.local_vertex_partition_range_size(), handle.get_stream());
 
@@ -297,6 +297,11 @@ refine_clustering(
   edge_dst_property_t<GraphViewType, vertex_t> dst_leiden_assignment_cache(handle);
   edge_src_property_t<GraphViewType, uint8_t> src_singleton_and_connected_flag_cache(handle);
 
+  // FIXME:  Why is kvstore used here?  Can't this be accomplished by
+  //  a direct lookup in louvain_assignment_of_vertices using
+  //     leiden - graph_view.local_vertex_partition_range_first() as the
+  //     index?
+  // Changing this would save memory and time
   kv_store_t<vertex_t, vertex_t, false> leiden_to_louvain_map(
     leiden_assignment.begin(),
     leiden_assignment.end(),
diff --git a/cpp/src/community/detail/refine_mg_v32_e32.cu b/cpp/src/community/detail/refine_mg_v32_e32.cu
index d27260c1337..ce46a48ed5c 100644
--- a/cpp/src/community/detail/refine_mg_v32_e32.cu
+++ b/cpp/src/community/detail/refine_mg_v32_e32.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_mg_v64_e64.cu b/cpp/src/community/detail/refine_mg_v64_e64.cu
index 1a2ed665b8a..d870f30cd3c 100644
--- a/cpp/src/community/detail/refine_mg_v64_e64.cu
+++ b/cpp/src/community/detail/refine_mg_v64_e64.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_sg_v32_e32.cu b/cpp/src/community/detail/refine_sg_v32_e32.cu
index ac0ede8225d..803a37474d4 100644
--- a/cpp/src/community/detail/refine_sg_v32_e32.cu
+++ b/cpp/src/community/detail/refine_sg_v32_e32.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_sg_v64_e64.cu b/cpp/src/community/detail/refine_sg_v64_e64.cu
index 97ed43b3de0..7b8bc435bc3 100644
--- a/cpp/src/community/detail/refine_sg_v64_e64.cu
+++ b/cpp/src/community/detail/refine_sg_v64_e64.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh
index da790a5dd66..c3600ff12e0 100644
--- a/cpp/src/community/leiden_impl.cuh
+++ b/cpp/src/community/leiden_impl.cuh
@@ -102,7 +102,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
   HighResTimer hr_timer{};
 #endif
 
-  weight_t best_modularity = weight_t{-1.0};
+  weight_t final_Q{-1};
+
   weight_t total_edge_weight =
     compute_total_edge_weight(handle, current_graph_view, *current_edge_weight_view);
 
@@ -368,9 +369,6 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     detail::timer_stop<graph_view_t::is_multi_gpu>(handle, hr_timer);
 #endif
 
-    bool terminate = (cur_Q <= best_modularity);
-    if (!terminate) { best_modularity = cur_Q; }
-
 #ifdef TIMING
     detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "contract graph");
 #endif
@@ -386,8 +384,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     auto nr_unique_louvain_clusters =
       remove_duplicates<vertex_t, multi_gpu>(handle, copied_louvain_partition);
 
-    terminate =
-      terminate || (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
+    bool terminate = (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
 
     rmm::device_uvector<vertex_t> refined_leiden_partition(0, handle.get_stream());
     std::pair<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> leiden_to_louvain_map{
@@ -426,11 +423,19 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
                                   std::move(louvain_assignment_for_vertices),
                                   src_vertex_weights_cache,
                                   src_louvain_assignment_cache,
-                                  dst_louvain_assignment_cache,
-                                  up_down);
+                                  dst_louvain_assignment_cache);
     }
 
     // Clear buffer and contract the graph
+    final_Q = detail::compute_modularity(handle,
+                                         current_graph_view,
+                                         current_edge_weight_view,
+                                         src_louvain_assignment_cache,
+                                         dst_louvain_assignment_cache,
+                                         louvain_assignment_for_vertices,
+                                         cluster_weights,
+                                         total_edge_weight,
+                                         resolution);
 
     cluster_keys.resize(0, handle.get_stream());
     cluster_weights.resize(0, handle.get_stream());
@@ -445,6 +450,9 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     dst_louvain_assignment_cache.clear(handle);
 
     if (!terminate) {
+      src_louvain_assignment_cache.clear(handle);
+      dst_louvain_assignment_cache.clear(handle);
+
       auto nr_unique_leiden = static_cast<vertex_t>(leiden_to_louvain_map.first.size());
       if (graph_view_t::is_multi_gpu) {
         nr_unique_leiden = host_scalar_allreduce(
@@ -586,7 +594,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
   detail::timer_display<graph_view_t::is_multi_gpu>(handle, hr_timer, std::cout);
 #endif
 
-  return std::make_pair(std::move(dendrogram), best_modularity);
+  return std::make_pair(std::move(dendrogram), final_Q);
 }
 
 template <typename vertex_t, bool multi_gpu>