Address Leiden clustering generating too many clusters (#4730)

Our implementation of Leiden was generating too many clusters. This was not obvious in smaller graphs, but as the graphs get larger the problem became more noticeable. The Leiden loop was terminating if the modularity stopped improving. But the Leiden algorithm as defined in the paper allows the refinement phase to reduce modularity in order to improve the quality of the clusters. The convergence criteria defined in the paper was based on making no changes on the iteration rather than strictly monitoring modularity change. Updating this criteria results in the Leiden algorithm running more iterations and converging on better answers. Closes #4529 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) - Seunghwa Kang (https://github.com/seunghwak) URL: #4730
rapidsai · Oct 23, 2024 · 7390ae2 · 7390ae2
1 parent 201ff7c
commit 7390ae2
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 29 deletions.
diff --git a/cpp/src/community/detail/refine.hpp b/cpp/src/community/detail/refine.hpp
@@ -46,8 +46,7 @@ refine_clustering(
   rmm::device_uvector<typename graph_view_t::vertex_type>&& next_clusters_v,
   edge_src_property_t<graph_view_t, weight_t> const& src_vertex_weights_cache,
   edge_src_property_t<graph_view_t, typename graph_view_t::vertex_type> const& src_clusters_cache,
-  edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache,
-  bool up_down);
+  edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache);
 
 }
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
@@ -150,8 +150,7 @@ refine_clustering(
   edge_src_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
     src_louvain_assignment_cache,
   edge_dst_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
-    dst_louvain_assignment_cache,
-  bool up_down)
+    dst_louvain_assignment_cache)
 {
   const weight_t POSITIVE_GAIN = 1e-6;
   using vertex_t               = typename GraphViewType::vertex_type;
@@ -230,6 +229,7 @@ refine_clustering(
     cugraph::reduce_op::plus<weight_t>{},
     weighted_cut_of_vertices_to_louvain.begin());
 
+  // FIXME: Consider using bit mask logic here.  Would reduce memory by 8x
   rmm::device_uvector<uint8_t> singleton_and_connected_flags(
     graph_view.local_vertex_partition_range_size(), handle.get_stream());
 
@@ -297,6 +297,11 @@ refine_clustering(
   edge_dst_property_t<GraphViewType, vertex_t> dst_leiden_assignment_cache(handle);
   edge_src_property_t<GraphViewType, uint8_t> src_singleton_and_connected_flag_cache(handle);
 
+  // FIXME:  Why is kvstore used here?  Can't this be accomplished by
+  //  a direct lookup in louvain_assignment_of_vertices using
+  //     leiden - graph_view.local_vertex_partition_range_first() as the
+  //     index?
+  // Changing this would save memory and time
   kv_store_t<vertex_t, vertex_t, false> leiden_to_louvain_map(
     leiden_assignment.begin(),
     leiden_assignment.end(),

diff --git a/cpp/src/community/detail/refine_mg_v32_e32.cu b/cpp/src/community/detail/refine_mg_v32_e32.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_mg_v64_e64.cu b/cpp/src/community/detail/refine_mg_v64_e64.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_sg_v32_e32.cu b/cpp/src/community/detail/refine_sg_v32_e32.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/detail/refine_sg_v64_e64.cu b/cpp/src/community/detail/refine_sg_v64_e64.cu
@@ -37,8 +37,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
@@ -59,8 +58,7 @@ refine_clustering(
   edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
     src_clusters_cache,
   edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
-    dst_clusters_cache,
-  bool up_down);
+    dst_clusters_cache);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh
@@ -102,7 +102,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
   HighResTimer hr_timer{};
 #endif
 
-  weight_t best_modularity = weight_t{-1.0};
+  weight_t final_Q{-1};
+
   weight_t total_edge_weight =
     compute_total_edge_weight(handle, current_graph_view, *current_edge_weight_view);
 
@@ -368,9 +369,6 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     detail::timer_stop<graph_view_t::is_multi_gpu>(handle, hr_timer);
 #endif
 
-    bool terminate = (cur_Q <= best_modularity);
-    if (!terminate) { best_modularity = cur_Q; }
-
 #ifdef TIMING
     detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "contract graph");
 #endif
@@ -386,8 +384,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     auto nr_unique_louvain_clusters =
       remove_duplicates<vertex_t, multi_gpu>(handle, copied_louvain_partition);
 
-    terminate =
-      terminate || (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
+    bool terminate = (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
 
     rmm::device_uvector<vertex_t> refined_leiden_partition(0, handle.get_stream());
     std::pair<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> leiden_to_louvain_map{
@@ -426,11 +423,19 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
                                   std::move(louvain_assignment_for_vertices),
                                   src_vertex_weights_cache,
                                   src_louvain_assignment_cache,
-                                  dst_louvain_assignment_cache,
-                                  up_down);
+                                  dst_louvain_assignment_cache);
     }
 
     // Clear buffer and contract the graph
+    final_Q = detail::compute_modularity(handle,
+                                         current_graph_view,
+                                         current_edge_weight_view,
+                                         src_louvain_assignment_cache,
+                                         dst_louvain_assignment_cache,
+                                         louvain_assignment_for_vertices,
+                                         cluster_weights,
+                                         total_edge_weight,
+                                         resolution);
 
     cluster_keys.resize(0, handle.get_stream());
     cluster_weights.resize(0, handle.get_stream());
@@ -445,6 +450,9 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
     dst_louvain_assignment_cache.clear(handle);
 
     if (!terminate) {
+      src_louvain_assignment_cache.clear(handle);
+      dst_louvain_assignment_cache.clear(handle);
+
       auto nr_unique_leiden = static_cast<vertex_t>(leiden_to_louvain_map.first.size());
       if (graph_view_t::is_multi_gpu) {
         nr_unique_leiden = host_scalar_allreduce(
@@ -586,7 +594,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
   detail::timer_display<graph_view_t::is_multi_gpu>(handle, hr_timer, std::cout);
 #endif
 
-  return std::make_pair(std::move(dendrogram), best_modularity);
+  return std::make_pair(std::move(dendrogram), final_Q);
 }
 
 template <typename vertex_t, bool multi_gpu>