From 14862c66501a5c8e076ab9b545032b86968418be Mon Sep 17 00:00:00 2001
From: Naim <110031745+naimnv@users.noreply.github.com>
Date: Mon, 31 Jul 2023 16:02:48 +0200
Subject: [PATCH] Update primitive to compute weighted Jaccard, Sorensen and
 Overlap similarity (#3728)

This PR
- changes `per_v_pair_transform_dst_nbr_intersection` to support computing weighted intersection
- updates implementation of `similarity`, `jaccard_coefficients`, `sorensen_coefficients`, `overlap_coefficients` for weighted  graphs

NOTE: current implementation doesn't support computing similarity for multi-edge graphs.

closes #2748
closes #3477

Authors:
  - Naim (https://github.com/naimnv)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Joseph Nke (https://github.com/jnke2016)

URL: https://github.com/rapidsai/cugraph/pull/3728
---
 cpp/src/c_api/similarity.cpp                  |   2 +-
 cpp/src/link_prediction/jaccard_impl.cuh      |  37 +-
 cpp/src/link_prediction/overlap_impl.cuh      |  37 +-
 cpp/src/link_prediction/similarity_impl.cuh   | 105 ++-
 cpp/src/link_prediction/sorensen_impl.cuh     |  37 +-
 .../detail/extract_transform_v_frontier_e.cuh |  78 +--
 cpp/src/prims/detail/nbr_intersection.cuh     | 634 +++++++++++++++---
 .../detail/optional_dataframe_buffer.hpp      | 102 +++
 ..._v_pair_transform_dst_nbr_intersection.cuh | 130 +++-
 ...t_nbr_intersection_of_e_endpoints_by_v.cuh |   7 +
 cpp/tests/CMakeLists.txt                      |  14 +
 .../mg_weighted_similarity_test.cpp           | 298 ++++++++
 .../link_prediction/similarity_compare.cpp    | 213 +++++-
 .../link_prediction/similarity_compare.hpp    |  46 +-
 .../weighted_similarity_test.cpp              | 338 ++++++++++
 ...r_v_pair_transform_dst_nbr_intersection.cu |  11 +-
 ...transform_dst_nbr_weighted_intersection.cu | 402 +++++++++++
 cpp/tests/utilities/test_utilities.hpp        |  16 +
 18 files changed, 2202 insertions(+), 305 deletions(-)
 create mode 100644 cpp/src/prims/detail/optional_dataframe_buffer.hpp
 create mode 100644 cpp/tests/link_prediction/mg_weighted_similarity_test.cpp
 create mode 100644 cpp/tests/link_prediction/weighted_similarity_test.cpp
 create mode 100644 cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu
diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp
index 3241018bfbd..730416abd7b 100644
--- a/cpp/src/c_api/similarity.cpp
+++ b/cpp/src/c_api/similarity.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh
index b9675e3a578..bd4e2d5e58e 100644
--- a/cpp/src/link_prediction/jaccard_impl.cuh
+++ b/cpp/src/link_prediction/jaccard_impl.cuh
@@ -24,22 +24,15 @@ namespace cugraph {
 namespace detail {
 
 struct jaccard_functor_t {
-  template <typename weight_t>
-  weight_t __device__ compute_score(weight_t cardinality_a,
-                                    weight_t cardinality_b,
-                                    weight_t cardinality_a_intersect_b) const
-  {
-    return cardinality_a_intersect_b / (cardinality_a + cardinality_b - cardinality_a_intersect_b);
-  }
-};
-
-struct weighted_jaccard_functor_t {
   template <typename weight_t>
   weight_t __device__ compute_score(weight_t weight_a,
                                     weight_t weight_b,
-                                    weight_t min_weight_a_intersect_b) const
+                                    weight_t weight_a_intersect_b,
+                                    weight_t weight_a_union_b) const
   {
-    return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b);
+    return weight_a_union_b <= std::numeric_limits<weight_t>::min()
+             ? weight_t{0}
+             : weight_a_intersect_b / weight_a_union_b;
   }
 };
 
@@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> jaccard_coefficients(
 {
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
-  if (!edge_weight_view)
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::jaccard_functor_t{},
-                              do_expensive_check);
-  else
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::weighted_jaccard_functor_t{},
-                              do_expensive_check);
+  return detail::similarity(handle,
+                            graph_view,
+                            edge_weight_view,
+                            vertex_pairs,
+                            detail::jaccard_functor_t{},
+                            do_expensive_check);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh
index 4c001a8f243..1810df2f76b 100644
--- a/cpp/src/link_prediction/overlap_impl.cuh
+++ b/cpp/src/link_prediction/overlap_impl.cuh
@@ -24,22 +24,15 @@ namespace cugraph {
 namespace detail {
 
 struct overlap_functor_t {
-  template <typename weight_t>
-  weight_t __device__ compute_score(weight_t cardinality_a,
-                                    weight_t cardinality_b,
-                                    weight_t cardinality_a_intersect_b) const
-  {
-    return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b);
-  }
-};
-
-struct weighted_overlap_functor_t {
   template <typename weight_t>
   weight_t __device__ compute_score(weight_t weight_a,
                                     weight_t weight_b,
-                                    weight_t min_weight_a_intersect_b) const
+                                    weight_t weight_a_intersect_b,
+                                    weight_t weight_a_union_b) const
   {
-    return min_weight_a_intersect_b / std::min(weight_a, weight_b);
+    return std::min(weight_a, weight_b) <= std::numeric_limits<weight_t>::min()
+             ? weight_t{0}
+             : weight_a_intersect_b / std::min(weight_a, weight_b);
   }
 };
 
@@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> overlap_coefficients(
 {
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
-  if (!edge_weight_view)
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::overlap_functor_t{},
-                              do_expensive_check);
-  else
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::weighted_overlap_functor_t{},
-                              do_expensive_check);
+  return detail::similarity(handle,
+                            graph_view,
+                            edge_weight_view,
+                            vertex_pairs,
+                            detail::overlap_functor_t{},
+                            do_expensive_check);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh
index 97c8017c668..55e8f5c88d7 100644
--- a/cpp/src/link_prediction/similarity_impl.cuh
+++ b/cpp/src/link_prediction/similarity_impl.cuh
@@ -15,9 +15,11 @@
  */
 #pragma once
 
+#include <prims/count_if_e.cuh>
 #include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
 #include <prims/update_edge_src_dst_property.cuh>
 
+#include <cugraph/graph_functions.hpp>
 #include <cugraph/graph_view.hpp>
 
 #include <raft/core/device_span.hpp>
@@ -51,33 +53,106 @@ rmm::device_uvector<weight_t> similarity(
   auto vertex_pairs_begin =
     thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data());
 
+  if (do_expensive_check) {
+    auto num_invalids = detail::count_invalid_vertex_pairs(
+      handle, graph_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs);
+    CUGRAPH_EXPECTS(num_invalids == 0,
+                    "Invalid input arguments: there are invalid input vertex pairs.");
+
+    if (edge_weight_view) {
+      auto num_negative_edge_weights =
+        count_if_e(handle,
+                   graph_view,
+                   edge_src_dummy_property_t{}.view(),
+                   edge_dst_dummy_property_t{}.view(),
+                   *edge_weight_view,
+                   [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; });
+      CUGRAPH_EXPECTS(
+        num_negative_edge_weights == 0,
+        "Invalid input argument: input edge weights should have non-negative values.");
+    }
+  }
+
   if (edge_weight_view) {
-    // FIXME: need implementation, similar to unweighted
-    //    Use compute_out_weight_sums instead of compute_out_degrees
-    //    Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and
-    //        max weight((u,a), (a,v)).
-    //    Use these to compute weighted score
-    //
-    CUGRAPH_FAIL("weighted similarity computations are not supported in this release");
+    rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());
+    rmm::device_uvector<weight_t> weighted_out_degrees =
+      compute_out_weight_sums(handle, graph_view, *edge_weight_view);
+
+    per_v_pair_transform_dst_nbr_intersection(
+      handle,
+      graph_view,
+      *edge_weight_view,
+      vertex_pairs_begin,
+      vertex_pairs_begin + num_vertex_pairs,
+      weighted_out_degrees.begin(),
+      [functor] __device__(auto a,
+                           auto b,
+                           auto weight_a,
+                           auto weight_b,
+                           auto intersection,
+                           auto intersected_properties_a,
+                           auto intersected_properties_b) {
+        weight_t sum_of_min_weight_a_intersect_b = weight_t{0};
+        weight_t sum_of_max_weight_a_intersect_b = weight_t{0};
+        weight_t sum_of_intersected_a            = weight_t{0};
+        weight_t sum_of_intersected_b            = weight_t{0};
+
+        auto pair_first = thrust::make_zip_iterator(intersected_properties_a.data(),
+                                                    intersected_properties_b.data());
+        thrust::tie(sum_of_min_weight_a_intersect_b,
+                    sum_of_max_weight_a_intersect_b,
+                    sum_of_intersected_a,
+                    sum_of_intersected_b) =
+          thrust::transform_reduce(
+            thrust::seq,
+            pair_first,
+            pair_first + intersected_properties_a.size(),
+            [] __device__(auto property_pair) {
+              auto prop_a = thrust::get<0>(property_pair);
+              auto prop_b = thrust::get<1>(property_pair);
+              return thrust::make_tuple(min(prop_a, prop_b), max(prop_a, prop_b), prop_a, prop_b);
+            },
+            thrust::make_tuple(weight_t{0}, weight_t{0}, weight_t{0}, weight_t{0}),
+            [] __device__(auto lhs, auto rhs) {
+              return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
+                                        thrust::get<1>(lhs) + thrust::get<1>(rhs),
+                                        thrust::get<2>(lhs) + thrust::get<2>(rhs),
+                                        thrust::get<3>(lhs) + thrust::get<3>(rhs));
+            });
+
+        weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a;
+        weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b;
+
+        sum_of_max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b;
+
+        return functor.compute_score(static_cast<weight_t>(weight_a),
+                                     static_cast<weight_t>(weight_b),
+                                     static_cast<weight_t>(sum_of_min_weight_a_intersect_b),
+                                     static_cast<weight_t>(sum_of_max_weight_a_intersect_b));
+      },
+      similarity_score.begin(),
+      do_expensive_check);
+
+    return similarity_score;
   } else {
     rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());
 
-    //
-    //  Compute vertex_degree for all vertices, then distribute to each GPU.
-    //  Need to use this instead of the dummy properties below
-    //
     auto out_degrees = graph_view.compute_out_degrees(handle);
 
     per_v_pair_transform_dst_nbr_intersection(
       handle,
       graph_view,
+      cugraph::edge_dummy_property_t{}.view(),
       vertex_pairs_begin,
       vertex_pairs_begin + num_vertex_pairs,
       out_degrees.begin(),
-      [functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) {
-        return functor.compute_score(static_cast<weight_t>(v1_degree),
-                                     static_cast<weight_t>(v2_degree),
-                                     static_cast<weight_t>(intersection.size()));
+      [functor] __device__(
+        auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) {
+        return functor.compute_score(
+          static_cast<weight_t>(v1_degree),
+          static_cast<weight_t>(v2_degree),
+          static_cast<weight_t>(intersection.size()),
+          static_cast<weight_t>(v1_degree + v2_degree - intersection.size()));
       },
       similarity_score.begin(),
       do_expensive_check);
diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh
index ac84358049a..00c9a8107f3 100644
--- a/cpp/src/link_prediction/sorensen_impl.cuh
+++ b/cpp/src/link_prediction/sorensen_impl.cuh
@@ -24,22 +24,15 @@ namespace cugraph {
 namespace detail {
 
 struct sorensen_functor_t {
-  template <typename weight_t>
-  weight_t __device__ compute_score(weight_t cardinality_a,
-                                    weight_t cardinality_b,
-                                    weight_t cardinality_a_intersect_b) const
-  {
-    return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b);
-  }
-};
-
-struct weighted_sorensen_functor_t {
   template <typename weight_t>
   weight_t __device__ compute_score(weight_t weight_a,
                                     weight_t weight_b,
-                                    weight_t min_weight_a_intersect_b) const
+                                    weight_t weight_a_intersect_b,
+                                    weight_t weight_a_union_b) const
   {
-    return (2 * min_weight_a_intersect_b) / (weight_a + weight_b);
+    return (weight_a + weight_b) <= std::numeric_limits<weight_t>::min()
+             ? weight_t{0}
+             : (2 * weight_a_intersect_b) / (weight_a + weight_b);
   }
 };
 
@@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> sorensen_coefficients(
 {
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
-  if (!edge_weight_view)
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::sorensen_functor_t{},
-                              do_expensive_check);
-  else
-    return detail::similarity(handle,
-                              graph_view,
-                              edge_weight_view,
-                              vertex_pairs,
-                              detail::weighted_sorensen_functor_t{},
-                              do_expensive_check);
+  return detail::similarity(handle,
+                            graph_view,
+                            edge_weight_view,
+                            vertex_pairs,
+                            detail::sorensen_functor_t{},
+                            do_expensive_check);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
index febdf61943b..2d77d64e1ff 100644
--- a/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
+++ b/cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <prims/detail/optional_dataframe_buffer.hpp>
 #include <prims/property_op_utils.cuh>
 
 #include <cugraph/edge_partition_device_view.cuh>
@@ -60,83 +61,6 @@ namespace detail {
 
 int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512;
 
-// we cannot use thrust::iterator_traits<Iterator>::value_type if Iterator is void* (reference to
-// void is not allowed)
-template <typename Iterator, typename Enable = void>
-struct optional_dataframe_buffer_value_type_t;
-
-template <typename Iterator>
-struct optional_dataframe_buffer_value_type_t<Iterator,
-                                              std::enable_if_t<!std::is_same_v<Iterator, void*>>> {
-  using value = typename thrust::iterator_traits<Iterator>::value_type;
-};
-
-template <typename Iterator>
-struct optional_dataframe_buffer_value_type_t<Iterator,
-                                              std::enable_if_t<std::is_same_v<Iterator, void*>>> {
-  using value = void;
-};
-
-template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
-std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
-{
-  return std::byte{0};  // dummy
-}
-
-template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
-auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
-{
-  return allocate_dataframe_buffer<T>(size, stream);
-}
-
-template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
-void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer)
-{
-  return static_cast<void*>(nullptr);
-}
-
-template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
-auto get_optional_dataframe_buffer_begin(
-  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
-    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer)
-{
-  return get_dataframe_buffer_begin(optional_dataframe_buffer);
-}
-
-template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
-void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
-                                      size_t new_buffer_size,
-                                      rmm::cuda_stream_view stream_view)
-{
-  return;
-}
-
-template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
-void resize_optional_dataframe_buffer(
-  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
-    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
-  size_t new_buffer_size,
-  rmm::cuda_stream_view stream_view)
-{
-  return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view);
-}
-
-template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
-void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
-                                             rmm::cuda_stream_view stream_view)
-{
-  return;
-}
-
-template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
-void shrink_to_fit_optional_dataframe_buffer(
-  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
-    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
-  rmm::cuda_stream_view stream_view)
-{
-  return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view);
-}
-
 template <typename e_op_result_t,
           typename BufferKeyOutputIterator,
           typename BufferValueOutputIterator>
diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh
index 98453d46c3f..f4c4745b14c 100644
--- a/cpp/src/prims/detail/nbr_intersection.cuh
+++ b/cpp/src/prims/detail/nbr_intersection.cuh
@@ -15,9 +15,11 @@
  */
 #pragma once
 
+#include <prims/detail/optional_dataframe_buffer.hpp>
 #include <prims/kv_store.cuh>
 
 #include <cugraph/edge_partition_device_view.cuh>
+#include <cugraph/edge_partition_edge_property_device_view.cuh>
 #include <cugraph/graph.hpp>
 #include <cugraph/partition_manager.hpp>
 #include <cugraph/utilities/device_functors.cuh>
@@ -168,12 +170,17 @@ struct update_rx_major_local_degree_t {
   }
 };
 
-template <typename vertex_t, typename edge_t, bool multi_gpu>
+template <typename vertex_t,
+          typename edge_t,
+          typename edge_partition_e_input_device_view_t,
+          typename optional_property_buffer_view_t,
+          bool multi_gpu>
 struct update_rx_major_local_nbrs_t {
   int major_comm_size{};
   int minor_comm_size{};
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
+  edge_partition_e_input_device_view_t edge_partition_e_value_input{};
 
   size_t reordered_idx_first{};
   size_t local_edge_partition_idx{};
@@ -182,12 +189,13 @@ struct update_rx_major_local_nbrs_t {
   raft::device_span<size_t const> rx_group_firsts{nullptr};
   raft::device_span<vertex_t const> rx_majors{};
   raft::device_span<size_t const> local_nbr_offsets_for_rx_majors{};
-
   raft::device_span<vertex_t> local_nbrs_for_rx_majors{};
+  optional_property_buffer_view_t local_nbrs_properties_for_rx_majors{};
 
-  __device__ void operator()(size_t idx) const
+  __device__ void operator()(size_t idx)
   {
-    auto it = thrust::upper_bound(
+    using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type;
+    auto it                     = thrust::upper_bound(
       thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx);
     auto major_comm_rank = static_cast<int>(thrust::distance(rx_reordered_group_lasts.begin(), it));
     auto offset_in_local_edge_partition =
@@ -214,14 +222,22 @@ struct update_rx_major_local_nbrs_t {
     // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree
     // vertices in a single warp (better optimize if this becomes a performance
     // bottleneck)
-    thrust::copy(
-      thrust::seq,
-      indices,
-      indices + local_degree,
-      local_nbrs_for_rx_majors.begin() +
-        local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size +
-                                                        local_edge_partition_idx] +
-                                        offset_in_local_edge_partition]);
+
+    size_t start_offset =
+      local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size +
+                                                      local_edge_partition_idx] +
+                                      offset_in_local_edge_partition];
+    thrust::copy(thrust::seq,
+                 indices,
+                 indices + local_degree,
+                 local_nbrs_for_rx_majors.begin() + start_offset);
+
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      thrust::copy(thrust::seq,
+                   edge_partition_e_value_input.value_first() + edge_offset,
+                   edge_partition_e_value_input.value_first() + (edge_offset + local_degree),
+                   local_nbrs_properties_for_rx_majors.begin() + start_offset);
+    }
   }
 };
 
@@ -317,30 +333,43 @@ template <typename FirstElementToIdxMap,
           typename VertexPairIterator,
           typename vertex_t,
           typename edge_t,
+          typename edge_partition_e_input_device_view_t,
+          typename optional_property_buffer_view_t,
           bool multi_gpu>
 struct copy_intersecting_nbrs_and_update_intersection_size_t {
   FirstElementToIdxMap first_element_to_idx_map{};
   raft::device_span<size_t const> first_element_offsets{};
   raft::device_span<vertex_t const> first_element_indices{nullptr};
+  optional_property_buffer_view_t first_element_properties{};
 
   SecondElementToIdxMap second_element_to_idx_map{};
   raft::device_span<size_t const> second_element_offsets{};
   raft::device_span<vertex_t const> second_element_indices{nullptr};
+  optional_property_buffer_view_t second_element_properties{};
 
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition{};
+  edge_partition_e_input_device_view_t edge_partition_e_value_input{};
 
   VertexPairIterator vertex_pair_first;
   raft::device_span<size_t const> nbr_intersection_offsets{nullptr};
   raft::device_span<vertex_t> nbr_intersection_indices{nullptr};
 
+  optional_property_buffer_view_t nbr_intersection_properties0{};
+  optional_property_buffer_view_t nbr_intersection_properties1{};
   vertex_t invalid_id{};
-
-  __device__ edge_t operator()(size_t i) const
+  __device__ edge_t operator()(size_t i)
   {
-    auto pair = *(vertex_pair_first + i);
+    using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type;
+    using optional_const_property_buffer_view_t =
+      std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                         raft::device_span<edge_property_value_t const>,
+                         std::byte /* dummy */>;
 
+    auto pair = *(vertex_pair_first + i);
     vertex_t const* indices0{nullptr};
-    [[maybe_unused]] edge_t local_edge_offset0{0};
+    optional_const_property_buffer_view_t properties0{};
+
+    edge_t local_edge_offset0{0};
     edge_t local_degree0{0};
     if constexpr (std::is_same_v<FirstElementToIdxMap, void*>) {
       vertex_t major = thrust::get<0>(pair);
@@ -362,14 +391,27 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
         thrust::tie(indices0, local_edge_offset0, local_degree0) =
           edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major));
       }
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        properties0 = raft::device_span<edge_property_value_t const>(
+          edge_partition_e_value_input.value_first() + local_edge_offset0, local_degree0);
+      }
+
     } else {
-      auto idx = first_element_to_idx_map.find(thrust::get<0>(pair));
-      local_degree0 =
-        static_cast<edge_t>(first_element_offsets[idx + 1] - first_element_offsets[idx]);
-      indices0 = first_element_indices.begin() + first_element_offsets[idx];
+      auto idx           = first_element_to_idx_map.find(thrust::get<0>(pair));
+      local_edge_offset0 = first_element_offsets[idx];
+      local_degree0      = static_cast<edge_t>(first_element_offsets[idx + 1] - local_edge_offset0);
+      indices0           = first_element_indices.begin() + local_edge_offset0;
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        properties0 = raft::device_span<edge_property_value_t const>(
+          first_element_properties.begin() + local_edge_offset0, local_degree0);
+      }
     }
 
     vertex_t const* indices1{nullptr};
+    optional_const_property_buffer_view_t properties1{};
+
     [[maybe_unused]] edge_t local_edge_offset1{0};
     edge_t local_degree1{0};
     if constexpr (std::is_same_v<SecondElementToIdxMap, void*>) {
@@ -392,31 +434,71 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t {
         thrust::tie(indices1, local_edge_offset1, local_degree1) =
           edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major));
       }
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        properties1 = raft::device_span<edge_property_value_t const>(
+          edge_partition_e_value_input.value_first() + local_edge_offset1, local_degree1);
+      }
+
     } else {
-      auto idx = second_element_to_idx_map.find(thrust::get<1>(pair));
-      local_degree1 =
-        static_cast<edge_t>(second_element_offsets[idx + 1] - second_element_offsets[idx]);
-      indices1 = second_element_indices.begin() + second_element_offsets[idx];
+      auto idx           = second_element_to_idx_map.find(thrust::get<1>(pair));
+      local_edge_offset1 = second_element_offsets[idx];
+      local_degree1 = static_cast<edge_t>(second_element_offsets[idx + 1] - local_edge_offset1);
+      indices1      = second_element_indices.begin() + local_edge_offset1;
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        properties1 = raft::device_span<edge_property_value_t const>(
+          second_element_properties.begin() + local_edge_offset1, local_degree1);
+      }
     }
 
     // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree
     // vertices in a single warp (better optimize if this becomes a performance
     // bottleneck)
 
-    auto it =
-      thrust::set_intersection(thrust::seq,
-                               indices0,
-                               indices0 + local_degree0,
-                               indices1,
-                               indices1 + local_degree1,
-                               nbr_intersection_indices.begin() + nbr_intersection_offsets[i]);
+    auto nbr_intersection_first = nbr_intersection_indices.begin() + nbr_intersection_offsets[i];
+
+    auto nbr_intersection_last = thrust::set_intersection(thrust::seq,
+                                                          indices0,
+                                                          indices0 + local_degree0,
+                                                          indices1,
+                                                          indices1 + local_degree1,
+                                                          nbr_intersection_first);
     thrust::fill(thrust::seq,
-                 it,
+                 nbr_intersection_last,
                  nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1],
                  invalid_id);
 
-    return static_cast<size_t>(
-      thrust::distance(nbr_intersection_indices.begin() + nbr_intersection_offsets[i], it));
+    auto insection_size =
+      static_cast<size_t>(thrust::distance(nbr_intersection_first, nbr_intersection_last));
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i];
+
+      // copy edge properties from first vertex to common neighbors
+      thrust::transform(thrust::seq,
+                        nbr_intersection_first,
+                        nbr_intersection_last,
+                        ip0_start,
+                        [indices0, local_degree0, properties0] __device__(auto v) {
+                          auto position =
+                            thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v);
+                          return properties0[thrust::distance(indices0, position)];
+                        });
+
+      auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i];
+
+      // copy edge properties from second vertex to common neighbors
+      thrust::transform(thrust::seq,
+                        nbr_intersection_first,
+                        nbr_intersection_last,
+                        ip1_start,
+                        [indices1, local_degree1, properties1] __device__(auto v) {
+                          auto position =
+                            thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v);
+                          return properties1[thrust::distance(indices1, position)];
+                        });
+    }
+    return insection_size;
   }
 };
 
@@ -436,7 +518,9 @@ struct strided_accumulate_t {
   }
 };
 
-template <typename vertex_t>
+template <typename vertex_t,
+          typename edge_property_value_t,
+          typename optional_property_buffer_view_t>
 struct gatherv_indices_t {
   size_t output_size{};
   int minor_comm_size{};
@@ -444,9 +528,13 @@ struct gatherv_indices_t {
   raft::device_span<size_t const> gathered_intersection_offsets{};
   raft::device_span<vertex_t const> gathered_intersection_indices{};
   raft::device_span<size_t const> combined_nbr_intersection_offsets{};
-
   raft::device_span<vertex_t> combined_nbr_intersection_indices{};
 
+  optional_property_buffer_view_t gathered_nbr_intersection_properties0{};
+  optional_property_buffer_view_t gathered_nbr_intersection_properties1{};
+  optional_property_buffer_view_t combined_nbr_intersection_properties0{};
+  optional_property_buffer_view_t combined_nbr_intersection_properties1{};
+
   __device__ void operator()(size_t i) const
   {
     auto output_offset = combined_nbr_intersection_offsets[i];
@@ -455,12 +543,29 @@ struct gatherv_indices_t {
     // in a single warp (better optimize if this becomes a performance bottleneck)
 
     for (int j = 0; j < minor_comm_size; ++j) {
-      thrust::copy(
-        thrust::seq,
-        gathered_intersection_indices.begin() + gathered_intersection_offsets[output_size * j + i],
-        gathered_intersection_indices.begin() +
-          gathered_intersection_offsets[output_size * j + i + 1],
-        combined_nbr_intersection_indices.begin() + output_offset);
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        auto zipped_gathered_begin = thrust::make_zip_iterator(
+          thrust::make_tuple(gathered_intersection_indices.begin(),
+                             gathered_nbr_intersection_properties0.begin(),
+                             gathered_nbr_intersection_properties1.begin()));
+
+        auto zipped_combined_begin = thrust::make_zip_iterator(
+          thrust::make_tuple(combined_nbr_intersection_indices.begin(),
+                             combined_nbr_intersection_properties0.begin(),
+                             combined_nbr_intersection_properties1.begin()));
+
+        thrust::copy(thrust::seq,
+                     zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i],
+                     zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i + 1],
+                     zipped_combined_begin + output_offset);
+      } else {
+        thrust::copy(thrust::seq,
+                     gathered_intersection_indices.begin() +
+                       gathered_intersection_offsets[output_size * j + i],
+                     gathered_intersection_indices.begin() +
+                       gathered_intersection_offsets[output_size * j + i + 1],
+                     combined_nbr_intersection_indices.begin() + output_offset);
+      }
       output_offset += gathered_intersection_offsets[output_size * j + i + 1] -
                        gathered_intersection_offsets[output_size * j + i];
     }
@@ -553,10 +658,17 @@ size_t count_invalid_vertex_pairs(raft::handle_t const& handle,
 // thrust::distance(vertex_pair_first, vertex_pair_last) should be comparable across the global
 // communicator. If we need to build the neighbor lists, grouping based on applying "vertex ID %
 // number of groups"  is recommended for load-balancing.
-template <typename GraphViewType, typename VertexPairIterator>
-std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<typename GraphViewType::vertex_type>>
+template <typename GraphViewType, typename VertexPairIterator, typename EdgeValueInputIterator>
+std::conditional_t<
+  !std::is_same_v<typename EdgeValueInputIterator::value_type, thrust::nullopt_t>,
+  std::tuple<rmm::device_uvector<size_t>,
+             rmm::device_uvector<typename GraphViewType::vertex_type>,
+             rmm::device_uvector<typename EdgeValueInputIterator::value_type>,
+             rmm::device_uvector<typename EdgeValueInputIterator::value_type>>,
+  std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<typename GraphViewType::vertex_type>>>
 nbr_intersection(raft::handle_t const& handle,
                  GraphViewType const& graph_view,
+                 EdgeValueInputIterator edge_value_input,
                  VertexPairIterator vertex_pair_first,
                  VertexPairIterator vertex_pair_last,
                  std::array<bool, 2> intersect_dst_nbr,
@@ -565,6 +677,31 @@ nbr_intersection(raft::handle_t const& handle,
   using vertex_t = typename GraphViewType::vertex_type;
   using edge_t   = typename GraphViewType::edge_type;
 
+  using edge_property_value_t = typename EdgeValueInputIterator::value_type;
+
+  using edge_partition_e_input_device_view_t =
+    std::conditional_t<std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       detail::edge_partition_edge_dummy_property_device_view_t<vertex_t>,
+                       detail::edge_partition_edge_property_device_view_t<
+                         edge_t,
+                         typename EdgeValueInputIterator::value_iterator,
+                         edge_property_value_t>>;
+
+  using optional_property_buffer_value_type =
+    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       edge_property_value_t,
+                       void>;
+
+  using optional_property_buffer_view_t =
+    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       raft::device_span<edge_property_value_t>,
+                       std::byte /* dummy */>;
+
+  using optional_nbr_intersected_edge_partitions_t =
+    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       std::vector<rmm::device_uvector<edge_property_value_t>>,
+                       std::byte /* dummy */>;
+
   static_assert(std::is_same_v<typename thrust::iterator_traits<VertexPairIterator>::value_type,
                                thrust::tuple<vertex_t, vertex_t>>);
 
@@ -601,6 +738,11 @@ nbr_intersection(raft::handle_t const& handle,
   std::optional<rmm::device_uvector<size_t>> major_nbr_offsets{std::nullopt};
   std::optional<rmm::device_uvector<vertex_t>> major_nbr_indices{std::nullopt};
 
+  [[maybe_unused]] auto major_nbr_properties =
+    cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+      0, handle.get_stream());
+  optional_property_buffer_view_t optional_major_nbr_properties{};
+
   if constexpr (GraphViewType::is_multi_gpu) {
     if (intersect_minor_nbr[1]) {
       auto& comm           = handle.get_comms();
@@ -716,6 +858,11 @@ nbr_intersection(raft::handle_t const& handle,
 
       rmm::device_uvector<edge_t> local_degrees_for_rx_majors(size_t{0}, handle.get_stream());
       rmm::device_uvector<vertex_t> local_nbrs_for_rx_majors(size_t{0}, handle.get_stream());
+
+      [[maybe_unused]] auto local_nbrs_properties_for_rx_majors =
+        cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+          0, handle.get_stream());
+
       std::vector<size_t> local_nbr_counts{};
       {
         rmm::device_uvector<size_t> rx_reordered_group_counts(
@@ -788,10 +935,23 @@ nbr_intersection(raft::handle_t const& handle,
 
         local_nbrs_for_rx_majors.resize(
           local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream());
+
+        optional_property_buffer_view_t optional_local_nbrs_properties{};
+
+        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(),
+                                                     handle.get_stream());
+          optional_local_nbrs_properties = raft::device_span<edge_property_value_t>(
+            local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size());
+        }
+
         for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) {
           auto edge_partition =
             edge_partition_device_view_t<vertex_t, edge_t, GraphViewType::is_multi_gpu>(
               graph_view.local_edge_partition_view(i));
+
+          auto edge_partition_e_value_input =
+            edge_partition_e_input_device_view_t(edge_value_input, i);
           auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
           auto reordered_idx_first =
             (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1];
@@ -801,10 +961,15 @@ nbr_intersection(raft::handle_t const& handle,
             handle.get_thrust_policy(),
             thrust::make_counting_iterator(reordered_idx_first),
             thrust::make_counting_iterator(reordered_idx_last),
-            update_rx_major_local_nbrs_t<vertex_t, edge_t, GraphViewType::is_multi_gpu>{
+            update_rx_major_local_nbrs_t<vertex_t,
+                                         edge_t,
+                                         edge_partition_e_input_device_view_t,
+                                         optional_property_buffer_view_t,
+                                         GraphViewType::is_multi_gpu>{
               major_comm_size,
               minor_comm_size,
               edge_partition,
+              edge_partition_e_value_input,
               reordered_idx_first,
               i,
               raft::device_span<size_t const>(
@@ -814,7 +979,8 @@ nbr_intersection(raft::handle_t const& handle,
               raft::device_span<size_t const>(local_nbr_offsets_for_rx_majors.data(),
                                               local_nbr_offsets_for_rx_majors.size()),
               raft::device_span<vertex_t>(local_nbrs_for_rx_majors.data(),
-                                          local_nbrs_for_rx_majors.size())});
+                                          local_nbrs_for_rx_majors.size()),
+              optional_local_nbrs_properties});
         }
 
         std::vector<size_t> h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0});
@@ -860,6 +1026,17 @@ nbr_intersection(raft::handle_t const& handle,
       std::tie(*major_nbr_indices, std::ignore) = shuffle_values(
         major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream());
 
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        std::tie(major_nbr_properties, std::ignore) =
+          shuffle_values(major_comm,
+                         local_nbrs_properties_for_rx_majors.begin(),
+                         local_nbr_counts,
+                         handle.get_stream());
+
+        optional_major_nbr_properties = raft::device_span<edge_property_value_t>(
+          major_nbr_properties.data(), major_nbr_properties.size());
+      }
+
       major_to_idx_map_ptr = std::make_unique<kv_store_t<vertex_t, vertex_t, false>>(
         unique_majors.begin(),
         unique_majors.end(),
@@ -887,6 +1064,15 @@ nbr_intersection(raft::handle_t const& handle,
 
   rmm::device_uvector<size_t> nbr_intersection_offsets(size_t{0}, handle.get_stream());
   rmm::device_uvector<vertex_t> nbr_intersection_indices(size_t{0}, handle.get_stream());
+
+  [[maybe_unused]] auto nbr_intersection_properties0 =
+    cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+      0, handle.get_stream());
+
+  [[maybe_unused]] auto nbr_intersection_properties1 =
+    cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+      0, handle.get_stream());
+
   if constexpr (GraphViewType::is_multi_gpu) {
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_rank = minor_comm.get_rank();
@@ -929,6 +1115,19 @@ nbr_intersection(raft::handle_t const& handle,
     std::vector<rmm::device_uvector<vertex_t>> edge_partition_nbr_intersection_indices{};
     edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions());
     edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions());
+
+    [[maybe_unused]] optional_nbr_intersected_edge_partitions_t
+      edge_partition_nbr_intersection_property0{};
+    [[maybe_unused]] optional_nbr_intersected_edge_partitions_t
+      edge_partition_nbr_intersection_property1{};
+
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      edge_partition_nbr_intersection_property0.reserve(
+        graph_view.number_of_local_edge_partitions());
+      edge_partition_nbr_intersection_property1.reserve(
+        graph_view.number_of_local_edge_partitions());
+    }
+
     for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) {
       auto rx_v_pair_counts =
         host_scalar_allgather(minor_comm, input_counts[i], handle.get_stream());
@@ -944,6 +1143,15 @@ nbr_intersection(raft::handle_t const& handle,
       rmm::device_uvector<edge_t> rx_v_pair_nbr_intersection_sizes(size_t{0}, handle.get_stream());
       rmm::device_uvector<vertex_t> rx_v_pair_nbr_intersection_indices(size_t{0},
                                                                        handle.get_stream());
+
+      [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties0 =
+        cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+          0, handle.get_stream());
+
+      [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties1 =
+        cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+          0, handle.get_stream());
+
       std::vector<size_t> rx_v_pair_nbr_intersection_index_tx_counts(size_t{0});
       {
         auto vertex_pair_buffer = allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
@@ -966,6 +1174,9 @@ nbr_intersection(raft::handle_t const& handle,
         auto edge_partition =
           edge_partition_device_view_t<vertex_t, edge_t, GraphViewType::is_multi_gpu>(
             graph_view.local_edge_partition_view(i));
+
+        auto edge_partition_e_value_input =
+          edge_partition_e_input_device_view_t(edge_value_input, i);
         auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
 
         rx_v_pair_nbr_intersection_sizes.resize(
@@ -1003,6 +1214,25 @@ nbr_intersection(raft::handle_t const& handle,
         rx_v_pair_nbr_intersection_indices.resize(
           rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()),
           handle.get_stream());
+
+        optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties0{};
+        optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties1{};
+
+        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(),
+                                                        handle.get_stream());
+          rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(),
+                                                        handle.get_stream());
+
+          rx_v_pair_optional_nbr_intersection_properties0 =
+            raft::device_span<edge_property_value_t>(rx_v_pair_nbr_intersection_properties0.data(),
+                                                     rx_v_pair_nbr_intersection_properties0.size());
+
+          rx_v_pair_optional_nbr_intersection_properties1 =
+            raft::device_span<edge_property_value_t>(rx_v_pair_nbr_intersection_properties1.data(),
+                                                     rx_v_pair_nbr_intersection_properties1.size());
+        }
+
         if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) {
           auto second_element_to_idx_map =
             detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view());
@@ -1016,33 +1246,70 @@ nbr_intersection(raft::handle_t const& handle,
               decltype(get_dataframe_buffer_begin(vertex_pair_buffer)),
               vertex_t,
               edge_t,
+              edge_partition_e_input_device_view_t,
+              optional_property_buffer_view_t,
               true>{nullptr,
                     raft::device_span<size_t const>(),
                     raft::device_span<vertex_t const>(),
+                    optional_property_buffer_view_t{},
                     second_element_to_idx_map,
                     raft::device_span<size_t const>((*major_nbr_offsets).data(),
                                                     (*major_nbr_offsets).size()),
                     raft::device_span<vertex_t const>((*major_nbr_indices).data(),
                                                       (*major_nbr_indices).size()),
+                    optional_major_nbr_properties,
                     edge_partition,
+                    edge_partition_e_value_input,
                     get_dataframe_buffer_begin(vertex_pair_buffer),
                     raft::device_span<size_t const>(rx_v_pair_nbr_intersection_offsets.data(),
                                                     rx_v_pair_nbr_intersection_offsets.size()),
                     raft::device_span<vertex_t>(rx_v_pair_nbr_intersection_indices.data(),
                                                 rx_v_pair_nbr_intersection_indices.size()),
+                    rx_v_pair_optional_nbr_intersection_properties0,
+                    rx_v_pair_optional_nbr_intersection_properties1,
+
                     invalid_vertex_id<vertex_t>::value});
+
         } else {
           CUGRAPH_FAIL("unimplemented.");
         }
 
-        rx_v_pair_nbr_intersection_indices.resize(
-          thrust::distance(rx_v_pair_nbr_intersection_indices.begin(),
-                           thrust::remove(handle.get_thrust_policy(),
-                                          rx_v_pair_nbr_intersection_indices.begin(),
-                                          rx_v_pair_nbr_intersection_indices.end(),
-                                          invalid_vertex_id<vertex_t>::value)),
-          handle.get_stream());
-        rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream());
+        if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          rx_v_pair_nbr_intersection_indices.resize(
+            thrust::distance(rx_v_pair_nbr_intersection_indices.begin(),
+                             thrust::remove(handle.get_thrust_policy(),
+                                            rx_v_pair_nbr_intersection_indices.begin(),
+                                            rx_v_pair_nbr_intersection_indices.end(),
+                                            invalid_vertex_id<vertex_t>::value)),
+            handle.get_stream());
+          rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream());
+        } else {
+          auto common_nbr_and_properties_begin = thrust::make_zip_iterator(
+            thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(),
+                               rx_v_pair_nbr_intersection_properties0.begin(),
+                               rx_v_pair_nbr_intersection_properties1.begin()));
+
+          auto last = thrust::remove_if(
+            handle.get_thrust_policy(),
+            common_nbr_and_properties_begin,
+            common_nbr_and_properties_begin + rx_v_pair_nbr_intersection_indices.size(),
+            [] __device__(auto nbr_p0_p1) {
+              return thrust::get<0>(nbr_p0_p1) == invalid_vertex_id<vertex_t>::value;
+            });
+
+          rx_v_pair_nbr_intersection_indices.resize(
+            thrust::distance(common_nbr_and_properties_begin, last), handle.get_stream());
+
+          rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream());
+
+          rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(),
+                                                        handle.get_stream());
+          rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream());
+
+          rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(),
+                                                        handle.get_stream());
+          rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream());
+        }
 
         thrust::inclusive_scan(handle.get_thrust_policy(),
                                rx_v_pair_nbr_intersection_sizes.begin(),
@@ -1159,6 +1426,15 @@ nbr_intersection(raft::handle_t const& handle,
 
       rmm::device_uvector<vertex_t> combined_nbr_intersection_indices(size_t{0},
                                                                       handle.get_stream());
+
+      [[maybe_unused]] auto combined_nbr_intersection_properties0 =
+        cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+          size_t{0}, handle.get_stream());
+
+      [[maybe_unused]] auto combined_nbr_intersection_properties1 =
+        cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+          size_t{0}, handle.get_stream());
+
       {
         std::vector<int> ranks(minor_comm_size);
         std::iota(ranks.begin(), ranks.end(), int{0});
@@ -1194,26 +1470,108 @@ nbr_intersection(raft::handle_t const& handle,
         combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(),
                                                  handle.get_stream());
 
-        thrust::for_each(
-          handle.get_thrust_policy(),
-          thrust::make_counting_iterator(size_t{0}),
-          thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]),
-          gatherv_indices_t<vertex_t>{
-            rx_v_pair_counts[minor_comm_rank],
-            minor_comm_size,
-            raft::device_span<size_t const>(gathered_nbr_intersection_offsets.data(),
-                                            gathered_nbr_intersection_offsets.size()),
-            raft::device_span<vertex_t const>(gathered_nbr_intersection_indices.data(),
-                                              gathered_nbr_intersection_indices.size()),
-            raft::device_span<size_t const>(combined_nbr_intersection_offsets.data(),
-                                            combined_nbr_intersection_offsets.size()),
-            raft::device_span<vertex_t>(combined_nbr_intersection_indices.data(),
-                                        combined_nbr_intersection_indices.size())});
+        [[maybe_unused]] auto gathered_nbr_intersection_properties0 =
+          cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+            rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(),
+            handle.get_stream());
+
+        [[maybe_unused]] auto gathered_nbr_intersection_properties1 =
+          cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+            rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(),
+            handle.get_stream());
+
+        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          device_multicast_sendrecv(minor_comm,
+                                    rx_v_pair_nbr_intersection_properties0.begin(),
+                                    rx_v_pair_nbr_intersection_index_tx_counts,
+                                    tx_displacements,
+                                    ranks,
+                                    gathered_nbr_intersection_properties0.begin(),
+                                    gathered_nbr_intersection_index_rx_counts,
+                                    rx_displacements,
+                                    ranks,
+                                    handle.get_stream());
+          rx_v_pair_nbr_intersection_properties0.resize(size_t{0}, handle.get_stream());
+          rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream());
+
+          combined_nbr_intersection_properties0.resize(gathered_nbr_intersection_properties0.size(),
+                                                       handle.get_stream());
+
+          device_multicast_sendrecv(minor_comm,
+                                    rx_v_pair_nbr_intersection_properties1.begin(),
+                                    rx_v_pair_nbr_intersection_index_tx_counts,
+                                    tx_displacements,
+                                    ranks,
+                                    gathered_nbr_intersection_properties1.begin(),
+                                    gathered_nbr_intersection_index_rx_counts,
+                                    rx_displacements,
+                                    ranks,
+                                    handle.get_stream());
+          rx_v_pair_nbr_intersection_properties1.resize(size_t{0}, handle.get_stream());
+          rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream());
+          combined_nbr_intersection_properties1.resize(gathered_nbr_intersection_properties1.size(),
+                                                       handle.get_stream());
+        }
+
+        if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+          thrust::for_each(
+            handle.get_thrust_policy(),
+            thrust::make_counting_iterator(size_t{0}),
+            thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]),
+            gatherv_indices_t<vertex_t, edge_property_value_t, optional_property_buffer_view_t>{
+              rx_v_pair_counts[minor_comm_rank],
+              minor_comm_size,
+              raft::device_span<size_t const>(gathered_nbr_intersection_offsets.data(),
+                                              gathered_nbr_intersection_offsets.size()),
+              raft::device_span<vertex_t const>(gathered_nbr_intersection_indices.data(),
+                                                gathered_nbr_intersection_indices.size()),
+              raft::device_span<size_t const>(combined_nbr_intersection_offsets.data(),
+                                              combined_nbr_intersection_offsets.size()),
+              raft::device_span<vertex_t>(combined_nbr_intersection_indices.data(),
+                                          combined_nbr_intersection_indices.size()),
+              raft::device_span<edge_property_value_t>(
+                gathered_nbr_intersection_properties0.data(),
+                gathered_nbr_intersection_properties0.size()),
+              raft::device_span<edge_property_value_t>(
+                gathered_nbr_intersection_properties1.data(),
+                gathered_nbr_intersection_properties1.size()),
+              raft::device_span<edge_property_value_t>(
+                combined_nbr_intersection_properties0.data(),
+                combined_nbr_intersection_properties0.size()),
+              raft::device_span<edge_property_value_t>(
+                combined_nbr_intersection_properties1.data(),
+                combined_nbr_intersection_properties1.size())});
+
+        } else {
+          thrust::for_each(
+            handle.get_thrust_policy(),
+            thrust::make_counting_iterator(size_t{0}),
+            thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]),
+            gatherv_indices_t<vertex_t, edge_property_value_t, optional_property_buffer_view_t>{
+              rx_v_pair_counts[minor_comm_rank],
+              minor_comm_size,
+              raft::device_span<size_t const>(gathered_nbr_intersection_offsets.data(),
+                                              gathered_nbr_intersection_offsets.size()),
+              raft::device_span<vertex_t const>(gathered_nbr_intersection_indices.data(),
+                                                gathered_nbr_intersection_indices.size()),
+              raft::device_span<size_t const>(combined_nbr_intersection_offsets.data(),
+                                              combined_nbr_intersection_offsets.size()),
+              raft::device_span<vertex_t>(combined_nbr_intersection_indices.data(),
+                                          combined_nbr_intersection_indices.size())
+
+            });
+        }
       }
 
       edge_partition_nbr_intersection_sizes.push_back(std::move(combined_nbr_intersection_sizes));
       edge_partition_nbr_intersection_indices.push_back(
         std::move(combined_nbr_intersection_indices));
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        edge_partition_nbr_intersection_property0.push_back(
+          std::move(combined_nbr_intersection_properties0));
+        edge_partition_nbr_intersection_property1.push_back(
+          std::move(combined_nbr_intersection_properties1));
+      }
     }
 
     rmm::device_uvector<edge_t> nbr_intersection_sizes(input_size, handle.get_stream());
@@ -1222,6 +1580,10 @@ nbr_intersection(raft::handle_t const& handle,
       num_nbr_intersection_indices += edge_partition_nbr_intersection_indices[i].size();
     }
     nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream());
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream());
+      nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream());
+    }
     size_t size_offset{0};
     size_t index_offset{0};
     for (size_t i = 0; i < edge_partition_nbr_intersection_sizes.size(); ++i) {
@@ -1234,6 +1596,19 @@ nbr_intersection(raft::handle_t const& handle,
                    edge_partition_nbr_intersection_indices[i].begin(),
                    edge_partition_nbr_intersection_indices[i].end(),
                    nbr_intersection_indices.begin() + index_offset);
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        thrust::copy(handle.get_thrust_policy(),
+                     edge_partition_nbr_intersection_property0[i].begin(),
+                     edge_partition_nbr_intersection_property0[i].end(),
+                     nbr_intersection_properties0.begin() + index_offset);
+
+        thrust::copy(handle.get_thrust_policy(),
+                     edge_partition_nbr_intersection_property1[i].begin(),
+                     edge_partition_nbr_intersection_property1[i].end(),
+                     nbr_intersection_properties1.begin() + index_offset);
+      }
+
       index_offset += edge_partition_nbr_intersection_indices[i].size();
     }
     nbr_intersection_offsets.resize(nbr_intersection_sizes.size() + size_t{1}, handle.get_stream());
@@ -1244,11 +1619,13 @@ nbr_intersection(raft::handle_t const& handle,
                            size_first,
                            size_first + nbr_intersection_sizes.size(),
                            nbr_intersection_offsets.begin() + 1);
+
   } else {
     auto edge_partition =
       edge_partition_device_view_t<vertex_t, edge_t, GraphViewType::is_multi_gpu>(
         graph_view.local_edge_partition_view(size_t{0}));
 
+    auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0);
     rmm::device_uvector<edge_t> nbr_intersection_sizes(
       input_size,
       handle.get_stream());  // initially store minimum degrees (upper bound for intersection sizes)
@@ -1278,6 +1655,21 @@ nbr_intersection(raft::handle_t const& handle,
 
     nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()),
                                     handle.get_stream());
+
+    optional_property_buffer_view_t optional_nbr_intersection_properties0{};
+    optional_property_buffer_view_t optional_nbr_intersection_properties1{};
+
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream());
+      nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream());
+
+      optional_nbr_intersection_properties0 = raft::device_span<edge_property_value_t>(
+        nbr_intersection_properties0.data(), nbr_intersection_properties0.size());
+
+      optional_nbr_intersection_properties1 = raft::device_span<edge_property_value_t>(
+        nbr_intersection_properties1.data(), nbr_intersection_properties1.size());
+    }
+
     if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) {
       thrust::tabulate(
         handle.get_thrust_policy(),
@@ -1288,19 +1680,26 @@ nbr_intersection(raft::handle_t const& handle,
                                                               decltype(vertex_pair_first),
                                                               vertex_t,
                                                               edge_t,
+                                                              edge_partition_e_input_device_view_t,
+                                                              optional_property_buffer_view_t,
                                                               false>{
           nullptr,
           raft::device_span<size_t const>(),
           raft::device_span<vertex_t const>(),
+          optional_property_buffer_view_t{},
           nullptr,
           raft::device_span<size_t const>(),
           raft::device_span<vertex_t const>(),
+          optional_property_buffer_view_t{},
           edge_partition,
+          edge_partition_e_value_input,
           vertex_pair_first,
           raft::device_span<size_t const>(nbr_intersection_offsets.data(),
                                           nbr_intersection_offsets.size()),
           raft::device_span<vertex_t>(nbr_intersection_indices.data(),
                                       nbr_intersection_indices.size()),
+          optional_nbr_intersection_properties0,
+          optional_nbr_intersection_properties1,
           invalid_vertex_id<vertex_t>::value});
     } else {
       CUGRAPH_FAIL("unimplemented.");
@@ -1314,31 +1713,87 @@ nbr_intersection(raft::handle_t const& handle,
                        nbr_intersection_indices.end(),
                        detail::not_equal_t<vertex_t>{invalid_vertex_id<vertex_t>::value}),
       handle.get_stream());
+
+    [[maybe_unused]] auto tmp_properties0 =
+      cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+        tmp_indices.size(), handle.get_stream());
+
+    [[maybe_unused]] auto tmp_properties1 =
+      cugraph::detail::allocate_optional_dataframe_buffer<optional_property_buffer_value_type>(
+        tmp_indices.size(), handle.get_stream());
+
     size_t num_copied{0};
     size_t num_scanned{0};
+
     while (num_scanned < nbr_intersection_indices.size()) {
       size_t this_scan_size = std::min(
-        size_t{1} << 30,
+        size_t{1} << 27,
         static_cast<size_t>(thrust::distance(nbr_intersection_indices.begin() + num_scanned,
                                              nbr_intersection_indices.end())));
-      num_copied += static_cast<size_t>(thrust::distance(
-        tmp_indices.begin() + num_copied,
-        thrust::copy_if(handle.get_thrust_policy(),
-                        nbr_intersection_indices.begin() + num_scanned,
-                        nbr_intersection_indices.begin() + num_scanned + this_scan_size,
-                        tmp_indices.begin() + num_copied,
-                        detail::not_equal_t<vertex_t>{invalid_vertex_id<vertex_t>::value})));
+      if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        num_copied += static_cast<size_t>(thrust::distance(
+          tmp_indices.begin() + num_copied,
+          thrust::copy_if(handle.get_thrust_policy(),
+                          nbr_intersection_indices.begin() + num_scanned,
+                          nbr_intersection_indices.begin() + num_scanned + this_scan_size,
+                          tmp_indices.begin() + num_copied,
+                          detail::not_equal_t<vertex_t>{invalid_vertex_id<vertex_t>::value})));
+      } else {
+        auto zipped_itr_to_indices_and_properties_begin =
+          thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(),
+                                                       nbr_intersection_properties0.begin(),
+                                                       nbr_intersection_properties1.begin()));
+
+        auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple(
+          tmp_indices.begin(), tmp_properties0.begin(), tmp_properties1.begin()));
+
+        num_copied += static_cast<size_t>(thrust::distance(
+          zipped_itr_to_tmps_begin + num_copied,
+          thrust::copy_if(handle.get_thrust_policy(),
+                          zipped_itr_to_indices_and_properties_begin + num_scanned,
+                          zipped_itr_to_indices_and_properties_begin + num_scanned + this_scan_size,
+                          zipped_itr_to_tmps_begin + num_copied,
+                          [] __device__(auto nbr_p0_p1) {
+                            auto nbr = thrust::get<0>(nbr_p0_p1);
+                            auto p0  = thrust::get<1>(nbr_p0_p1);
+                            auto p1  = thrust::get<2>(nbr_p0_p1);
+                            return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id<vertex_t>::value;
+                          })));
+      }
       num_scanned += this_scan_size;
     }
     nbr_intersection_indices = std::move(tmp_indices);
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      nbr_intersection_properties0 = std::move(tmp_properties0);
+      nbr_intersection_properties1 = std::move(tmp_properties1);
+    }
+
 #else
-    nbr_intersection_indices.resize(
-      thrust::distance(nbr_intersection_indices.begin(),
-                       thrust::remove(handle.get_thrust_policy(),
-                                      nbr_intersection_indices.begin(),
-                                      nbr_intersection_indices.end(),
-                                      invalid_vertex_id<vertex_t>::value)),
-      handle.get_stream());
+
+    if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      nbr_intersection_indices.resize(
+        thrust::distance(nbr_intersection_indices.begin(),
+                         thrust::remove(handle.get_thrust_policy(),
+                                        nbr_intersection_indices.begin(),
+                                        nbr_intersection_indices.end(),
+                                        invalid_vertex_id<vertex_t>::value)),
+        handle.get_stream());
+    } else {
+      nbr_intersection_indices.resize(
+        thrust::distance(zipped_itr_to_indices_and_properties_begin,
+                         thrust::remove_if(handle.get_thrust_policy(),
+                                           zipped_itr_to_indices_and_properties_begin,
+                                           zipped_itr_to_indices_and_properties_begin +
+                                             nbr_intersection_indices.size(),
+                                           [] __device__(auto nbr_p0_p1) {
+                                             return thrust::get<0>(nbr_p0_p1) ==
+                                                    invalid_vertex_id<vertex_t>::value;
+                                           })),
+        handle.get_stream());
+
+      nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream());
+      nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream());
+    }
 #endif
 
     thrust::inclusive_scan(handle.get_thrust_policy(),
@@ -1349,7 +1804,16 @@ nbr_intersection(raft::handle_t const& handle,
 
   // 5. Return
 
-  return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices));
+  if constexpr (std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+    return std::make_tuple(std::move(nbr_intersection_offsets),
+                           std::move(nbr_intersection_indices));
+
+  } else {
+    return std::make_tuple(std::move(nbr_intersection_offsets),
+                           std::move(nbr_intersection_indices),
+                           std::move(nbr_intersection_properties0),
+                           std::move(nbr_intersection_properties1));
+  }
 }
 
 }  // namespace detail
diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp
new file mode 100644
index 00000000000..dd40e6932e4
--- /dev/null
+++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cugraph/utilities/dataframe_buffer.hpp>
+
+namespace cugraph {
+
+namespace detail {
+
+// we cannot use thrust::iterator_traits<Iterator>::value_type if Iterator is void* (reference to
+// void is not allowed)
+template <typename Iterator, typename Enable = void>
+struct optional_dataframe_buffer_value_type_t;
+
+template <typename Iterator>
+struct optional_dataframe_buffer_value_type_t<Iterator,
+                                              std::enable_if_t<!std::is_same_v<Iterator, void*>>> {
+  using value = typename thrust::iterator_traits<Iterator>::value_type;
+};
+
+template <typename Iterator>
+struct optional_dataframe_buffer_value_type_t<Iterator,
+                                              std::enable_if_t<std::is_same_v<Iterator, void*>>> {
+  using value = void;
+};
+
+template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
+std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
+{
+  return std::byte{0};  // dummy
+}
+
+template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
+auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
+{
+  return allocate_dataframe_buffer<T>(size, stream);
+}
+
+template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
+void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer)
+{
+  return static_cast<void*>(nullptr);
+}
+
+template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
+auto get_optional_dataframe_buffer_begin(
+  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
+    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer)
+{
+  return get_dataframe_buffer_begin(optional_dataframe_buffer);
+}
+
+template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
+void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
+                                      size_t new_buffer_size,
+                                      rmm::cuda_stream_view stream_view)
+{
+  return;
+}
+
+template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
+void resize_optional_dataframe_buffer(
+  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
+    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
+  size_t new_buffer_size,
+  rmm::cuda_stream_view stream_view)
+{
+  return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view);
+}
+
+template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
+void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
+                                             rmm::cuda_stream_view stream_view)
+{
+  return;
+}
+
+template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
+void shrink_to_fit_optional_dataframe_buffer(
+  std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
+    size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
+  rmm::cuda_stream_view stream_view)
+{
+  return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view);
+}
+}  // namespace detail
+
+}  // namespace cugraph
diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
index d69bb8af25e..640c3c04bfd 100644
--- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
+++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
@@ -21,7 +21,7 @@
 #include <utilities/collect_comm.cuh>
 
 #include <cugraph/edge_partition_device_view.cuh>
-#include <cugraph/edge_partition_endpoint_property_device_view.cuh>
+#include <cugraph/edge_partition_edge_property_device_view.cuh>
 #include <cugraph/edge_src_dst_property.hpp>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/device_functors.cuh>
@@ -97,6 +97,7 @@ struct indirection_compare_less_t {
 
 template <typename GraphViewType,
           typename VertexValueInputIterator,
+          typename EdgeValueInputIterator,
           typename IntersectionOp,
           typename VertexPairIndexIterator,
           typename VertexPairIterator,
@@ -111,6 +112,8 @@ struct call_intersection_op_t {
   IntersectionOp intersection_op{};
   size_t const* nbr_offsets{nullptr};
   typename GraphViewType::vertex_type const* nbr_indices{nullptr};
+  EdgeValueInputIterator nbr_intersection_properties0{nullptr};
+  EdgeValueInputIterator nbr_intersection_properties1{nullptr};
   VertexPairIndexIterator major_minor_pair_index_first{};
   VertexPairIterator major_minor_pair_first{};
   VertexPairValueOutputIterator major_minor_pair_value_output_first{};
@@ -118,6 +121,8 @@ struct call_intersection_op_t {
   __device__ void operator()(size_t i) const
   {
     using property_t = typename thrust::iterator_traits<VertexValueInputIterator>::value_type;
+    using edge_property_value_t =
+      typename thrust::iterator_traits<EdgeValueInputIterator>::value_type;
 
     auto index        = *(major_minor_pair_index_first + i);
     auto pair         = *(major_minor_pair_first + index);
@@ -128,6 +133,25 @@ struct call_intersection_op_t {
     auto intersection = raft::device_span<typename GraphViewType::vertex_type const>(
       nbr_indices + nbr_offsets[i], nbr_indices + nbr_offsets[i + 1]);
 
+    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       raft::device_span<edge_property_value_t const>,
+                       std::byte /* dummy */>
+      properties0{};
+
+    std::conditional_t<!std::is_same_v<edge_property_value_t, thrust::nullopt_t>,
+                       raft::device_span<edge_property_value_t const>,
+                       std::byte /* dummy */>
+      properties1{};
+
+    if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+      properties0 = raft::device_span<edge_property_value_t const>(
+        nbr_intersection_properties0 + nbr_offsets[i],
+        nbr_intersection_properties0 + +nbr_offsets[i + 1]);
+      properties1 = raft::device_span<edge_property_value_t const>(
+        nbr_intersection_properties1 + nbr_offsets[i],
+        nbr_intersection_properties1 + +nbr_offsets[i + 1]);
+    }
+
     property_t src_prop{};
     property_t dst_prop{};
     if (unique_vertices) {
@@ -149,8 +173,9 @@ struct call_intersection_op_t {
       src_prop          = *(vertex_property_first + src_offset);
       dst_prop          = *(vertex_property_first + dst_offset);
     }
+
     *(major_minor_pair_value_output_first + index) =
-      intersection_op(src, dst, src_prop, dst_prop, intersection);
+      intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1);
   }
 };
 
@@ -165,7 +190,8 @@ struct call_intersection_op_t {
  *
  * @tparam GraphViewType Type of the passed non-owning graph object.
  * @tparam VertexPairIterator Type of the iterator for input vertex pairs.
- * @tparam VertexValueInputWrapper Type of the wrapper for vertex property values.
+ * @tparam VertexValueInputIterator Type of the iterator for vertex property values.
+ * @tparam EdgeValueInputIterator Type of the iterator for edge property values.
  * @tparam IntersectionOp Type of the quinary per intersection operator.
  * @tparam VertexPairValueOutputIterator Type of the iterator for vertex pair output property
  * variables.
@@ -176,6 +202,10 @@ struct call_intersection_op_t {
  * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair.
  * @param vertex_src_value_input Wrapper used to access vertex input property values (for the
  * vertices assigned to this process in multi-GPU).
+ * @param edge_value_input Wrapper used to access edge input property values (for the edges assigned
+ * to this process in multi-GPU). Use either cugraph::edge_property_t::view() (if @p intersection_op
+ * needs to access edge property values) or cugraph::edge_dummy_property_t::view() (if @p
+ * intersection_op does not access edge property values).
  * @param intersection_op quinary operator takes first vertex of the pair, second vertex of the
  * pair, property values for the first vertex, property values for the second vertex, and a list of
  * vertices in the intersection of the first & second vertices' destination neighbors and returns an
@@ -188,11 +218,13 @@ struct call_intersection_op_t {
 template <typename GraphViewType,
           typename VertexPairIterator,
           typename VertexValueInputIterator,
+          typename EdgeValueInputIterator,
           typename IntersectionOp,
           typename VertexPairValueOutputIterator>
 void per_v_pair_transform_dst_nbr_intersection(
   raft::handle_t const& handle,
   GraphViewType const& graph_view,
+  EdgeValueInputIterator edge_value_input,
   VertexPairIterator vertex_pair_first,
   VertexPairIterator vertex_pair_last,
   VertexValueInputIterator vertex_value_input_first,
@@ -205,7 +237,8 @@ void per_v_pair_transform_dst_nbr_intersection(
   using vertex_t   = typename GraphViewType::vertex_type;
   using edge_t     = typename GraphViewType::edge_type;
   using property_t = typename thrust::iterator_traits<VertexValueInputIterator>::value_type;
-  using result_t   = typename thrust::iterator_traits<VertexPairValueOutputIterator>::value_type;
+  using edge_property_value_t = typename EdgeValueInputIterator::value_type;
+  using result_t = typename thrust::iterator_traits<VertexPairValueOutputIterator>::value_type;
 
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
@@ -344,16 +377,40 @@ void per_v_pair_transform_dst_nbr_intersection(
 
       // FIXME: better restrict detail::nbr_intersection input vertex pairs to a single edge
       // partition? This may provide additional performance improvement opportunities???
+
       auto chunk_vertex_pair_first = thrust::make_transform_iterator(
         chunk_vertex_pair_index_first,
         detail::indirection_t<size_t, VertexPairIterator>{vertex_pair_first});
-      auto [intersection_offsets, intersection_indices] =
-        detail::nbr_intersection(handle,
-                                 graph_view,
-                                 chunk_vertex_pair_first,
-                                 chunk_vertex_pair_first + this_chunk_size,
-                                 std::array<bool, 2>{true, true},
-                                 do_expensive_check);
+
+      rmm::device_uvector<size_t> intersection_offsets(size_t{0}, handle.get_stream());
+      rmm::device_uvector<vertex_t> intersection_indices(size_t{0}, handle.get_stream());
+      [[maybe_unused]] rmm::device_uvector<edge_property_value_t> r_nbr_intersection_properties0(
+        size_t{0}, handle.get_stream());
+      [[maybe_unused]] rmm::device_uvector<edge_property_value_t> r_nbr_intersection_properties1(
+        size_t{0}, handle.get_stream());
+
+      if constexpr (!std::is_same_v<edge_property_value_t, thrust::nullopt_t>) {
+        std::tie(intersection_offsets,
+                 intersection_indices,
+                 r_nbr_intersection_properties0,
+                 r_nbr_intersection_properties1) =
+          detail::nbr_intersection(handle,
+                                   graph_view,
+                                   edge_value_input,
+                                   chunk_vertex_pair_first,
+                                   chunk_vertex_pair_first + this_chunk_size,
+                                   std::array<bool, 2>{true, true},
+                                   do_expensive_check);
+      } else {
+        std::tie(intersection_offsets, intersection_indices) =
+          detail::nbr_intersection(handle,
+                                   graph_view,
+                                   edge_value_input,
+                                   chunk_vertex_pair_first,
+                                   chunk_vertex_pair_first + this_chunk_size,
+                                   std::array<bool, 2>{true, true},
+                                   do_expensive_check);
+      }
 
       if (unique_vertices) {
         auto vertex_value_input_for_unique_vertices_first =
@@ -362,38 +419,45 @@ void per_v_pair_transform_dst_nbr_intersection(
           handle.get_thrust_policy(),
           thrust::make_counting_iterator(size_t{0}),
           thrust::make_counting_iterator(this_chunk_size),
-          detail::call_intersection_op_t<GraphViewType,
-                                         decltype(vertex_value_input_for_unique_vertices_first),
-                                         IntersectionOp,
-                                         decltype(chunk_vertex_pair_index_first),
-                                         VertexPairIterator,
-                                         VertexPairValueOutputIterator>{
-            edge_partition,
-            thrust::make_optional<raft::device_span<vertex_t const>>((*unique_vertices).data(),
-                                                                     (*unique_vertices).size()),
-            vertex_value_input_for_unique_vertices_first,
-            intersection_op,
-            intersection_offsets.data(),
-            intersection_indices.data(),
-            chunk_vertex_pair_index_first,
-            vertex_pair_first,
-            vertex_pair_value_output_first});
+          detail::call_intersection_op_t<
+            GraphViewType,
+            decltype(vertex_value_input_for_unique_vertices_first),
+            typename decltype(r_nbr_intersection_properties0)::const_pointer,
+            IntersectionOp,
+            decltype(chunk_vertex_pair_index_first),
+            VertexPairIterator,
+            VertexPairValueOutputIterator>{edge_partition,
+                                           thrust::make_optional<raft::device_span<vertex_t const>>(
+                                             (*unique_vertices).data(), (*unique_vertices).size()),
+                                           vertex_value_input_for_unique_vertices_first,
+                                           intersection_op,
+                                           intersection_offsets.data(),
+                                           intersection_indices.data(),
+                                           r_nbr_intersection_properties0.data(),
+                                           r_nbr_intersection_properties1.data(),
+                                           chunk_vertex_pair_index_first,
+                                           vertex_pair_first,
+                                           vertex_pair_value_output_first});
       } else {
         thrust::for_each(handle.get_thrust_policy(),
                          thrust::make_counting_iterator(size_t{0}),
                          thrust::make_counting_iterator(this_chunk_size),
-                         detail::call_intersection_op_t<GraphViewType,
-                                                        VertexValueInputIterator,
-                                                        IntersectionOp,
-                                                        decltype(chunk_vertex_pair_index_first),
-                                                        VertexPairIterator,
-                                                        VertexPairValueOutputIterator>{
+                         detail::call_intersection_op_t<
+                           GraphViewType,
+                           VertexValueInputIterator,
+                           typename decltype(r_nbr_intersection_properties0)::const_pointer,
+                           IntersectionOp,
+                           decltype(chunk_vertex_pair_index_first),
+                           VertexPairIterator,
+                           VertexPairValueOutputIterator>{
                            edge_partition,
                            thrust::optional<raft::device_span<vertex_t const>>{thrust::nullopt},
                            vertex_value_input_first,
                            intersection_op,
                            intersection_offsets.data(),
                            intersection_indices.data(),
+                           r_nbr_intersection_properties0.data(),
+                           r_nbr_intersection_properties1.data(),
                            chunk_vertex_pair_index_first,
                            vertex_pair_first,
                            vertex_pair_value_output_first});
diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
index b5cfdf4b16b..f773a102959 100644
--- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
+++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
@@ -65,6 +65,7 @@ struct compute_chunk_id_t {
 template <typename GraphViewType,
           typename EdgePartitionSrcValueInputWrapper,
           typename EdgePartitionDstValueInputWrapper,
+          typename EdgeValueInputIterator,
           typename IntersectionOp,
           typename VertexPairIterator>
 struct call_intersection_op_t {
@@ -77,6 +78,8 @@ struct call_intersection_op_t {
   IntersectionOp intersection_op{};
   size_t const* nbr_offsets{nullptr};
   typename GraphViewType::vertex_type const* nbr_indices{nullptr};
+  EdgeValueInputIterator nbr_intersection_properties0{nullptr};
+  EdgeValueInputIterator nbr_intersection_properties1{nullptr};
   VertexPairIterator major_minor_pair_first{};
 
   __device__ auto operator()(size_t i) const
@@ -342,6 +345,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v(
       auto [intersection_offsets, intersection_indices] =
         detail::nbr_intersection(handle,
                                  graph_view,
+                                 cugraph::edge_dummy_property_t{}.view(),
                                  chunk_vertex_pair_first,
                                  chunk_vertex_pair_first + this_chunk_size,
                                  std::array<bool, 2>{true, true},
@@ -362,6 +366,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v(
                        detail::call_intersection_op_t<GraphViewType,
                                                       edge_partition_src_input_device_view_t,
                                                       edge_partition_dst_input_device_view_t,
+                                                      std::nullptr_t,
                                                       IntersectionOp,
                                                       decltype(chunk_vertex_pair_first)>{
                          edge_partition,
@@ -370,6 +375,8 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v(
                          intersection_op,
                          intersection_offsets.data(),
                          intersection_indices.data(),
+                         nullptr,
+                         nullptr,
                          chunk_vertex_pair_first});
 
       rmm::device_uvector<vertex_t> endpoint_vertices(size_t{0}, handle.get_stream());
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e91b7e71537..da1e0e50919 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -384,6 +384,10 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo
 # - SIMILARITY tests ------------------------------------------------------------------------------
 ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp)
 
+###################################################################################################
+# - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------
+ConfigureTest(WEIGHTED_SIMILARITY_TEST link_prediction/weighted_similarity_test.cpp)
+
 ###################################################################################################
 # - RANDOM_WALKS tests ----------------------------------------------------------------------------
 #  FIXME: Rename to random_walks_test.cu once the legacy implementation is deleted
@@ -627,6 +631,12 @@ if(BUILD_CUGRAPH_MG_TESTS)
                     prims/mg_per_v_pair_transform_dst_nbr_intersection.cu)
     target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST PRIVATE cuco::cuco)
 
+    ###############################################################################################
+    # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION tests -------------------------
+    ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST
+    prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu)
+    target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST PRIVATE cuco::cuco)
+
     ###############################################################################################
     # - MG NBR SAMPLING tests ---------------------------------------------------------------------
     ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu)
@@ -636,6 +646,10 @@ if(BUILD_CUGRAPH_MG_TESTS)
     # - MG RANDOM_WALKS tests ---------------------------------------------------------------------
     ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp)
 
+    ###############################################################################################
+    # - MG WEIGHTED_SIMILARITY tests -----------------------------------------------------------------------
+    ConfigureTestMG(MG_WEIGHTED_SIMILARITY_TEST link_prediction/mg_weighted_similarity_test.cpp)
+
     ###############################################################################################
     # - MG SIMILARITY tests -----------------------------------------------------------------------
     ConfigureTestMG(MG_SIMILARITY_TEST link_prediction/mg_similarity_test.cpp)
diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp
new file mode 100644
index 00000000000..cf3179d51a3
--- /dev/null
+++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/device_comm_wrapper.hpp>
+#include <utilities/mg_utilities.hpp>
+#include <utilities/test_graphs.hpp>
+#include <utilities/test_utilities.hpp>
+#include <utilities/thrust_wrapper.hpp>
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <link_prediction/similarity_compare.hpp>
+
+struct Weighted_Similarity_Usecase {
+  bool use_weights{true};
+  size_t max_seeds{std::numeric_limits<size_t>::max()};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGSimilarity
+  : public ::testing::TestWithParam<std::tuple<Weighted_Similarity_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGSimilarity() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t, typename test_functor_t>
+  void run_current_test(
+    std::tuple<Weighted_Similarity_Usecase const&, input_usecase_t const&> param,
+    test_functor_t const& test_functor)
+  {
+    auto [similarity_usecase, input_usecase] = param;
+    HighResTimer hr_timer{};
+
+    auto const comm_rank = handle_->get_comms().get_rank();
+    auto const comm_size = handle_->get_comms().get_size();
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG Construct graph");
+    }
+
+    auto [mg_graph, mg_edge_weights, d_mg_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_, input_usecase, true, true, false, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 2. run similarity
+
+    auto mg_graph_view = mg_graph.view();
+    auto mg_edge_weight_view =
+      mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
+
+    rmm::device_uvector<vertex_t> d_start_vertices(
+      std::min(
+        static_cast<size_t>(mg_graph_view.local_vertex_partition_range_size()),
+        similarity_usecase.max_seeds / comm_size +
+          (static_cast<size_t>(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)),
+      handle_->get_stream());
+
+    cugraph::test::populate_vertex_ids(
+      *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first());
+
+    auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs(
+      *handle_,
+      mg_graph_view,
+      raft::device_span<vertex_t const>(d_start_vertices.data(), d_start_vertices.size()),
+      2);
+
+    auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices);
+    auto h_offsets        = cugraph::test::to_host(*handle_, d_offsets);
+
+    std::vector<vertex_t> h_v1(h_offsets.back());
+    for (size_t i = 0; i < h_start_vertices.size(); ++i) {
+      std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]);
+    }
+
+    auto d_v1 = cugraph::test::to_device(*handle_, h_v1);
+    auto d_v2 = std::move(two_hop_nbrs);
+
+    std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) =
+      cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
+        vertex_t,
+        edge_t,
+        weight_t,
+        int32_t>(*handle_,
+                 std::move(d_v1),
+                 std::move(d_v2),
+                 std::nullopt,
+                 std::nullopt,
+                 std::nullopt,
+                 mg_graph_view.vertex_partition_range_lasts());
+
+    std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
+      {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG similarity test");
+    }
+
+    auto result_score = test_functor.run(
+      *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 3. compare SG & MG results
+
+    if (similarity_usecase.check_correctness) {
+      auto [src, dst, wgt] =
+        cugraph::test::graph_to_host_coo(*handle_, mg_graph_view, mg_edge_weight_view);
+
+      d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size());
+      d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size());
+      result_score =
+        cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size());
+
+      if (d_v1.size() > 0) {
+        auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1);
+        auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2);
+        auto h_result_score = cugraph::test::to_host(*handle_, result_score);
+
+        if (wgt && similarity_usecase.use_weights) {
+          weighted_similarity_compare(mg_graph_view.number_of_vertices(),
+                                      std::tie(src, dst, wgt),
+                                      std::tie(h_vertex_pair1, h_vertex_pair2),
+                                      h_result_score,
+                                      test_functor);
+        } else {
+          similarity_compare(mg_graph_view.number_of_vertices(),
+                             std::tie(src, dst, wgt),
+                             std::tie(h_vertex_pair1, h_vertex_pair2),
+                             h_result_score,
+                             test_functor);
+        }
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t> Tests_MGSimilarity<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGWeightedSimilarity_File = Tests_MGSimilarity<cugraph::test::File_Usecase>;
+using Tests_MGWeightedSimilarity_Rmat = Tests_MGSimilarity<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatFloatJaccard)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatFloatJaccard)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatFloatJaccard)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatFloatJaccard)
+{
+  auto param = GetParam();
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatSorensen)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatSorensen)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatSorensen)
+{
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatSorensen)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_File, CheckInt32Int32FloatOverlap)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int32FloatOverlap)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt32Int64FloatOverlap)
+{
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_MGWeightedSimilarity_Rmat, CheckInt64Int64FloatOverlap)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGWeightedSimilarity_File,
+  ::testing::Combine(
+    // enable correctness checks
+    // Disable weighted computation testing in 22.10
+    //::testing::Values(Weighted_Similarity_Usecase{true, 20, true},
+    //: Weighted_Similarity_Usecase{false, 20, true}),
+    ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_MGWeightedSimilarity_Rmat,
+  ::testing::Combine(
+    // enable correctness checks
+    // Disable weighted computation testing in 22.10
+    //::testing::Values(Weighted_Similarity_Usecase{true, 20, true},
+    // Weighted_Similarity_Usecase{false, 20, true}),
+    ::testing::Values(Weighted_Similarity_Usecase{true, 20, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGWeightedSimilarity_Rmat,
+  ::testing::Combine(
+    // disable correctness checks for large graphs
+    ::testing::Values(Weighted_Similarity_Usecase{true, 20, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/link_prediction/similarity_compare.cpp b/cpp/tests/link_prediction/similarity_compare.cpp
index f005b4ddcef..b39ee983fa7 100644
--- a/cpp/tests/link_prediction/similarity_compare.cpp
+++ b/cpp/tests/link_prediction/similarity_compare.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,6 +37,159 @@ struct intersection_count_t {
 namespace cugraph {
 namespace test {
 
+template <typename vertex_t, typename weight_t, typename test_t>
+void weighted_similarity_compare(
+  vertex_t num_vertices,
+  std::tuple<std::vector<vertex_t>&, std::vector<vertex_t>&, std::optional<std::vector<weight_t>>&>
+    edge_list,
+  std::tuple<std::vector<vertex_t>&, std::vector<vertex_t>&> vertex_pairs,
+  std::vector<weight_t>& similarity_score,
+  test_t const& test_functor)
+{
+  auto& [graph_src, graph_dst, graph_wgt] = edge_list;
+  auto& [v1, v2]                          = vertex_pairs;
+
+  auto compare_pairs = [](thrust::tuple<vertex_t, vertex_t, weight_t> lhs,
+                          thrust::tuple<vertex_t, vertex_t, weight_t> rhs) {
+    return ((thrust::get<0>(lhs) < thrust::get<0>(rhs)) ||
+            ((thrust::get<0>(lhs) == thrust::get<0>(rhs)) &&
+             (thrust::get<1>(lhs) < thrust::get<1>(rhs))));
+  };
+
+  std::sort(thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()),
+            thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()),
+            compare_pairs);
+
+  std::vector<size_t> vertex_degrees(static_cast<size_t>(num_vertices), size_t{0});
+  std::vector<weight_t> weighted_vertex_degrees(static_cast<size_t>(num_vertices), weight_t{0});
+
+  std::for_each(
+    graph_src.begin(), graph_src.end(), [&vertex_degrees](auto v) { ++vertex_degrees[v]; });
+
+  std::for_each(
+    thrust::make_zip_iterator(graph_src.begin(), graph_dst.begin(), (*graph_wgt).begin()),
+    thrust::make_zip_iterator(graph_src.end(), graph_dst.end(), (*graph_wgt).end()),
+    [&weighted_vertex_degrees](thrust::tuple<vertex_t, vertex_t, weight_t> src_dst_wgt) {
+      auto src = thrust::get<0>(src_dst_wgt);
+      auto dst = thrust::get<1>(src_dst_wgt);
+      auto wgt = thrust::get<2>(src_dst_wgt);
+
+      weighted_vertex_degrees[src] += wgt / weight_t{2};
+      weighted_vertex_degrees[dst] += wgt / weight_t{2};
+    });
+
+  auto compare_functor = cugraph::test::nearly_equal<weight_t>{
+    weight_t{1e-3}, weight_t{(weight_t{1} / static_cast<weight_t>(num_vertices)) * weight_t{1e-3}}};
+
+  if (graph_wgt) {
+    assert(true);
+  } else {
+    assert(false);
+  }
+
+  auto graph_wgt_first = (*graph_wgt).begin();
+  std::for_each(
+    thrust::make_zip_iterator(v1.begin(), v2.begin(), similarity_score.begin()),
+    thrust::make_zip_iterator(v1.end(), v2.end(), similarity_score.end()),
+    [compare_functor,
+     test_functor,
+     &vertex_degrees,
+     &weighted_vertex_degrees,
+     &graph_src,
+     &graph_dst,
+     &graph_wgt_first](auto tuple) {
+      auto v1    = thrust::get<0>(tuple);
+      auto v2    = thrust::get<1>(tuple);
+      auto score = thrust::get<2>(tuple);
+
+      auto v1_begin =
+        std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v1));
+      auto v1_end =
+        std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v1));
+
+      auto v2_begin =
+        std::distance(graph_src.begin(), std::lower_bound(graph_src.begin(), graph_src.end(), v2));
+      auto v2_end =
+        std::distance(graph_src.begin(), std::upper_bound(graph_src.begin(), graph_src.end(), v2));
+
+      std::vector<vertex_t> intersection(std::min((v1_end - v1_begin), (v2_end - v2_begin)));
+
+      auto intersection_end = std::set_intersection(graph_dst.begin() + v1_begin,
+                                                    graph_dst.begin() + v1_end,
+                                                    graph_dst.begin() + v2_begin,
+                                                    graph_dst.begin() + v2_end,
+                                                    intersection.begin());
+
+      auto intersection_size =
+        static_cast<size_t>(std::distance(intersection.begin(), intersection_end));
+
+      std::vector<weight_t> intersected_weights_v1(static_cast<size_t>(intersection_size),
+                                                   weight_t{0});
+
+      std::vector<weight_t> intersected_weights_v2(static_cast<size_t>(intersection_size),
+                                                   weight_t{0});
+
+      int intersected_weight_idx = 0;
+
+      std::for_each(
+        intersection.begin(),
+        intersection_end,
+        [&graph_dst,
+         &graph_wgt_first,
+         &v1_begin,
+         &v1_end,
+         &v2_begin,
+         &v2_end,
+         &intersected_weights_v1,
+         &intersected_weights_v2,
+         &intersected_weight_idx](auto inbr) {
+          auto lower =
+            std::lower_bound(graph_dst.begin() + v1_begin, graph_dst.begin() + v1_end, inbr);
+          auto offset = std::distance(graph_dst.begin() + v1_begin, lower);
+
+          intersected_weights_v1[intersected_weight_idx] =
+            static_cast<weight_t>(graph_wgt_first[v1_begin + offset]);
+
+          lower = std::lower_bound(graph_dst.begin() + v2_begin, graph_dst.begin() + v2_end, inbr);
+
+          offset = std::distance(graph_dst.begin() + v2_begin, lower);
+
+          intersected_weights_v2[intersected_weight_idx] =
+            static_cast<weight_t>(graph_wgt_first[v2_begin + offset]);
+
+          ++intersected_weight_idx;
+        });
+
+      weight_t sum_intersected_weights_v1 =
+        std::accumulate(intersected_weights_v1.begin(), intersected_weights_v1.end(), 0.0);
+      weight_t sum_intersected_weights_v2 =
+        std::accumulate(intersected_weights_v2.begin(), intersected_weights_v2.end(), 0.0);
+
+      weight_t sum_of_uniq_weights_v1 = weighted_vertex_degrees[v1] - sum_intersected_weights_v1;
+      weight_t sum_of_uniq_weights_v2 = weighted_vertex_degrees[v2] - sum_intersected_weights_v2;
+
+      weight_t min_weight_v1_intersect_v2 = weight_t{0};
+      weight_t max_weight_v1_intersect_v2 = weight_t{0};
+
+      std::for_each(
+        thrust::make_zip_iterator(intersected_weights_v1.begin(), intersected_weights_v2.begin()),
+        thrust::make_zip_iterator(intersected_weights_v1.end(), intersected_weights_v2.end()),
+        [&min_weight_v1_intersect_v2,
+         &max_weight_v1_intersect_v2](thrust::tuple<weight_t, weight_t> w1_w2) {
+          min_weight_v1_intersect_v2 += std::min(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2));
+          max_weight_v1_intersect_v2 += std::max(thrust::get<0>(w1_w2), thrust::get<1>(w1_w2));
+        });
+
+      max_weight_v1_intersect_v2 += (sum_of_uniq_weights_v1 + sum_of_uniq_weights_v2);
+      auto expected_score = test_functor.compute_score(weighted_vertex_degrees[v1],
+                                                       weighted_vertex_degrees[v2],
+                                                       min_weight_v1_intersect_v2,
+                                                       max_weight_v1_intersect_v2);
+      EXPECT_TRUE(compare_functor(score, expected_score))
+        << "score mismatch, got " << score << ", expected " << expected_score;
+    });
+}
+
 template <typename vertex_t, typename weight_t, typename test_t>
 void similarity_compare(
   vertex_t num_vertices,
@@ -96,9 +249,11 @@ void similarity_compare(
                                                     intersection.begin());
 
       auto expected_score = test_functor.compute_score(
-        vertex_degrees[v1],
-        vertex_degrees[v2],
-        static_cast<weight_t>(std::distance(intersection.begin(), intersection_end)));
+        static_cast<weight_t>(vertex_degrees[v1]),
+        static_cast<weight_t>(vertex_degrees[v2]),
+        static_cast<weight_t>(std::distance(intersection.begin(), intersection_end)),
+        static_cast<weight_t>(vertex_degrees[v1] + vertex_degrees[v2] -
+                              std::distance(intersection.begin(), intersection_end)));
 
       EXPECT_TRUE(compare_functor(score, expected_score))
         << "score mismatch, got " << score << ", expected " << expected_score;
@@ -153,5 +308,55 @@ template void similarity_compare(
   std::vector<float>& result_score,
   test_overlap_t const& test_functor);
 
+////
+
+template void weighted_similarity_compare(
+  int32_t num_vertices,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_jaccard_t const& test_functor);
+
+template void weighted_similarity_compare(
+  int32_t num_vertices,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_sorensen_t const& test_functor);
+
+template void weighted_similarity_compare(
+  int32_t num_vertices,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int32_t>&, std::vector<int32_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_overlap_t const& test_functor);
+
+template void weighted_similarity_compare(
+  int64_t num_vertices,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_jaccard_t const& test_functor);
+
+template void weighted_similarity_compare(
+  int64_t num_vertices,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_sorensen_t const& test_functor);
+
+template void weighted_similarity_compare(
+  int64_t num_vertices,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&, std::optional<std::vector<float>>&>
+    edge_list,
+  std::tuple<std::vector<int64_t>&, std::vector<int64_t>&> vertex_pairs,
+  std::vector<float>& result_score,
+  test_overlap_t const& test_functor);
+
 }  // namespace test
 }  // namespace cugraph
diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp
index 0fbb3b40b39..5c312a768d0 100644
--- a/cpp/tests/link_prediction/similarity_compare.hpp
+++ b/cpp/tests/link_prediction/similarity_compare.hpp
@@ -29,10 +29,17 @@ struct test_jaccard_t {
   std::string testname{"Jaccard"};
 
   template <typename weight_t>
-  weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const
+  weight_t compute_score(weight_t weight_a,
+                         weight_t weight_b,
+                         weight_t weight_a_intersect_b,
+                         weight_t weight_a_union_b) const
   {
-    return static_cast<weight_t>(intersection_count) /
-           static_cast<weight_t>(u_size + v_size - intersection_count);
+    if (std::abs(static_cast<double>(weight_a_union_b) - double{0}) <
+        double{2} / std::numeric_limits<double>::max()) {
+      return weight_t{0};
+    } else {
+      return weight_a_intersect_b / weight_a_union_b;
+    }
   }
 
   template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
@@ -51,9 +58,17 @@ struct test_sorensen_t {
   std::string testname{"Sorensen"};
 
   template <typename weight_t>
-  weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const
+  weight_t compute_score(weight_t weight_a,
+                         weight_t weight_b,
+                         weight_t weight_a_intersect_b,
+                         weight_t weight_a_union_b) const
   {
-    return static_cast<weight_t>(2 * intersection_count) / static_cast<weight_t>(u_size + v_size);
+    if (std::abs(static_cast<double>(weight_a_union_b) - double{0}) <
+        double{2} / std::numeric_limits<double>::max()) {
+      return weight_t{0};
+    } else {
+      return (2 * weight_a_intersect_b) / (weight_a + weight_b);
+    }
   }
 
   template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
@@ -72,10 +87,17 @@ struct test_overlap_t {
   std::string testname{"Overlap"};
 
   template <typename weight_t>
-  weight_t compute_score(size_t u_size, size_t v_size, weight_t intersection_count) const
+  weight_t compute_score(weight_t weight_a,
+                         weight_t weight_b,
+                         weight_t weight_a_intersect_b,
+                         weight_t weight_a_union_b) const
   {
-    return static_cast<weight_t>(intersection_count) /
-           static_cast<weight_t>(std::min(u_size, v_size));
+    if (std::abs(static_cast<double>(weight_a_union_b) - double{0}) <
+        double{2} / std::numeric_limits<double>::max()) {
+      return weight_t{0};
+    } else {
+      return weight_a_intersect_b / std::min(weight_a, weight_b);
+    }
   }
 
   template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
@@ -99,5 +121,13 @@ void similarity_compare(
   std::vector<weight_t>& similarity_score,
   test_t const& test_functor);
 
+template <typename vertex_t, typename weight_t, typename test_t>
+void weighted_similarity_compare(
+  vertex_t num_vertices,
+  std::tuple<std::vector<vertex_t>&, std::vector<vertex_t>&, std::optional<std::vector<weight_t>>&>
+    edge_list,
+  std::tuple<std::vector<vertex_t>&, std::vector<vertex_t>&> vertex_pairs,
+  std::vector<weight_t>& similarity_score,
+  test_t const& test_functor);
 }  // namespace test
 }  // namespace cugraph
diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp
new file mode 100644
index 00000000000..ca644b76c5a
--- /dev/null
+++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governin_from_mtxg permissions and
+ * limitations under the License.
+ */
+#include <link_prediction/similarity_compare.hpp>
+#include <utilities/base_fixture.hpp>
+#include <utilities/device_comm_wrapper.hpp>
+#include <utilities/test_graphs.hpp>
+#include <utilities/test_utilities.hpp>
+#include <utilities/thrust_wrapper.hpp>
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Similarity_Usecase {
+  bool use_weights{false};
+  bool check_correctness{true};
+  size_t max_seeds{std::numeric_limits<size_t>::max()};
+  size_t max_vertex_pairs_to_check{std::numeric_limits<size_t>::max()};
+};
+
+template <typename input_usecase_t>
+class Tests_Similarity
+  : public ::testing::TestWithParam<std::tuple<Similarity_Usecase, input_usecase_t>> {
+ public:
+  Tests_Similarity() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t, typename test_functor_t>
+  void run_current_test(std::tuple<Similarity_Usecase const&, input_usecase_t const&> const& param,
+                        test_functor_t const& test_functor)
+  {
+    constexpr bool renumber                  = true;
+    auto [similarity_usecase, input_usecase] = param;
+
+    // 1. initialize handle
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    // 2. create SG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    auto [graph, edge_weights, d_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, false>(
+        handle, input_usecase, similarity_usecase.use_weights, renumber, false, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 3. run similarity
+
+    auto graph_view = graph.view();
+    auto edge_weight_view =
+      edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Similarity test");
+    }
+
+    //
+    // FIXME:  Don't currently have an MG implementation of 2-hop neighbors.
+    //         For now we'll do that on the CPU (really slowly, so keep max_seed
+    //         small)
+    //
+    rmm::device_uvector<vertex_t> d_v1(0, handle.get_stream());
+    rmm::device_uvector<vertex_t> d_v2(0, handle.get_stream());
+
+    {
+      auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view);
+
+      size_t max_vertices = std::min(static_cast<size_t>(graph_view.number_of_vertices()),
+                                     similarity_usecase.max_seeds);
+      std::vector<vertex_t> h_v1;
+      std::vector<vertex_t> h_v2;
+      std::vector<vertex_t> one_hop_v1;
+      std::vector<vertex_t> one_hop_v2;
+
+      for (size_t seed = 0; seed < max_vertices; ++seed) {
+        std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()),
+                      thrust::make_zip_iterator(src.end(), dst.end()),
+                      [&one_hop_v1, &one_hop_v2, seed](auto t) {
+                        auto u = thrust::get<0>(t);
+                        auto v = thrust::get<1>(t);
+                        if (u == seed) {
+                          one_hop_v1.push_back(u);
+                          one_hop_v2.push_back(v);
+                        }
+                      });
+      }
+
+      std::for_each(thrust::make_zip_iterator(one_hop_v1.begin(), one_hop_v2.begin()),
+                    thrust::make_zip_iterator(one_hop_v1.end(), one_hop_v2.end()),
+                    [&](auto t1) {
+                      auto seed     = thrust::get<0>(t1);
+                      auto neighbor = thrust::get<1>(t1);
+                      std::for_each(thrust::make_zip_iterator(src.begin(), dst.begin()),
+                                    thrust::make_zip_iterator(src.end(), dst.end()),
+                                    [&](auto t2) {
+                                      auto u = thrust::get<0>(t2);
+                                      auto v = thrust::get<1>(t2);
+                                      if (u == neighbor) {
+                                        h_v1.push_back(seed);
+                                        h_v2.push_back(v);
+                                      }
+                                    });
+                    });
+
+      std::sort(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()),
+                thrust::make_zip_iterator(h_v1.end(), h_v2.end()));
+
+      auto end_iter = std::unique(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()),
+                                  thrust::make_zip_iterator(h_v1.end(), h_v2.end()),
+                                  [](auto t1, auto t2) {
+                                    return (thrust::get<0>(t1) == thrust::get<0>(t2)) &&
+                                           (thrust::get<1>(t1) == thrust::get<1>(t2));
+                                  });
+
+      h_v1.resize(
+        thrust::distance(thrust::make_zip_iterator(h_v1.begin(), h_v2.begin()), end_iter));
+      h_v2.resize(h_v1.size());
+
+      d_v1.resize(h_v1.size(), handle.get_stream());
+      d_v2.resize(h_v2.size(), handle.get_stream());
+
+      raft::update_device(d_v1.data(), h_v1.data(), h_v1.size(), handle.get_stream());
+      raft::update_device(d_v2.data(), h_v2.data(), h_v2.size(), handle.get_stream());
+    }
+
+    // FIXME:  Need to add some tests that specify actual vertex pairs
+    // FIXME:  Need to a variation that calls call the two hop neighbors function
+    // FIXME:  Debugging state as of EOD 9/28:
+    //           1) Tested case of no vertex pairs... works great :-)
+    //           2) Don't have a 2-hop on GPU yet.  Perhaps write a 2-hop on CPU
+    //              for now?  We could then use that for testing the 2-hop function
+    //              later.
+    std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
+      {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
+
+    auto result_score = test_functor.run(
+      handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (similarity_usecase.check_correctness) {
+      auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view);
+
+      size_t check_size = std::min(d_v1.size(), similarity_usecase.max_vertex_pairs_to_check);
+
+      //
+      // FIXME: Need to reorder here.  thrust::shuffle on the tuples (vertex_pairs_1,
+      // vertex_pairs_2, result_score) would
+      //        be sufficient.
+      //
+      std::vector<vertex_t> h_vertex_pair_1(check_size);
+      std::vector<vertex_t> h_vertex_pair_2(check_size);
+      std::vector<weight_t> h_result_score(check_size);
+
+      raft::update_host(
+        h_vertex_pair_1.data(), std::get<0>(vertex_pairs).data(), check_size, handle.get_stream());
+      raft::update_host(
+        h_vertex_pair_2.data(), std::get<1>(vertex_pairs).data(), check_size, handle.get_stream());
+      raft::update_host(
+        h_result_score.data(), result_score.data(), check_size, handle.get_stream());
+
+      if (wgt && similarity_usecase.use_weights) {
+        weighted_similarity_compare(graph_view.number_of_vertices(),
+                                    std::tie(src, dst, wgt),
+                                    std::tie(h_vertex_pair_1, h_vertex_pair_2),
+                                    h_result_score,
+                                    test_functor);
+      } else {
+        similarity_compare(graph_view.number_of_vertices(),
+                           std::tie(src, dst, wgt),
+                           std::tie(h_vertex_pair_1, h_vertex_pair_2),
+                           h_result_score,
+                           test_functor);
+      }
+    }
+  }
+};
+
+using Tests_Similarity_File = Tests_Similarity<cugraph::test::File_Usecase>;
+using Tests_Similarity_Rmat = Tests_Similarity<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard)
+{
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{});
+}
+
+TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen)
+{
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{});
+}
+
+TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap)
+{
+  run_current_test<int32_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{});
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_Similarity_File,
+  ::testing::Combine(
+    // enable correctness checks
+    // Disable weighted computation testing in 22.10
+    //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20,
+    //: 100}),
+    ::testing::Values(Similarity_Usecase{true, true, 20, 100}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/dolphins.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_Similarity_Rmat,
+  ::testing::Combine(
+    // enable correctness checks
+    // Disable weighted computation testing in 22.10
+    //::testing::Values(Similarity_Usecase{true, true, 20, 100},
+    //: Similarity_Usecase{false,true,20,100}),
+    ::testing::Values(Similarity_Usecase{true, true, 20, 100}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with
+                          --gtest_filter to select only the file_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one File_Usecase that differ only in filename
+                          (to avoid running same benchmarks more than once) */
+  Tests_Similarity_File,
+  ::testing::Combine(
+    // disable correctness checks
+    // Disable weighted computation testing in 22.10
+    //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}),
+    ::testing::Values(Similarity_Usecase{true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_Similarity_Rmat,
+  ::testing::Combine(
+    // disable correctness checks for large graphs
+    //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}),
+    ::testing::Values(Similarity_Usecase{true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu
index 0ff0a041a71..a7cd8a989b0 100644
--- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu
+++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu
@@ -50,7 +50,10 @@ struct intersection_op_t {
     vertex_t v1,
     edge_t v0_prop /* out degree */,
     edge_t v1_prop /* out degree */,
-    raft::device_span<vertex_t const> intersection) const
+    raft::device_span<vertex_t const> intersection,
+    std::byte, /* dummy */
+    std::byte  /* dummy */
+  ) const
   {
     return thrust::make_tuple(v0_prop + v1_prop, static_cast<edge_t>(intersection.size()));
   }
@@ -160,6 +163,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection
     cugraph::per_v_pair_transform_dst_nbr_intersection(
       *handle_,
       mg_graph_view,
+      cugraph::edge_dummy_property_t{}.view(),
       cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer),
       cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer),
       mg_out_degrees.begin(),
@@ -227,6 +231,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection
         cugraph::per_v_pair_transform_dst_nbr_intersection(
           *handle_,
           sg_graph_view,
+          cugraph::edge_dummy_property_t{}.view(),
           cugraph::get_dataframe_buffer_begin(
             mg_aggregate_vertex_pair_buffer /* now unrenumbered */),
           cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered */),
@@ -324,9 +329,7 @@ INSTANTIATE_TEST_SUITE_P(
   ::testing::Combine(
     ::testing::Values(Prims_Usecase{size_t{1024}, true}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
-                      cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
-                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
-                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+                      cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));
 
 INSTANTIATE_TEST_SUITE_P(rmat_small_test,
                          Tests_MGPerVPairTransformDstNbrIntersection_Rmat,
diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu
new file mode 100644
index 00000000000..3b6a6b9c4c5
--- /dev/null
+++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/device_comm_wrapper.hpp>
+#include <utilities/mg_utilities.hpp>
+#include <utilities/test_graphs.hpp>
+#include <utilities/test_utilities.hpp>
+
+#include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
+
+#include <cugraph/edge_partition_device_view.cuh>
+#include <cugraph/edge_partition_edge_property_device_view.cuh>
+
+#include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/edge_src_dst_property.hpp>
+#include <cugraph/graph_view.hpp>
+#include <cugraph/utilities/dataframe_buffer.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+#include <cugraph/utilities/host_scalar_comm.hpp>
+#include <cugraph/utilities/thrust_tuple_utils.hpp>
+
+#include <raft/comms/mpi_comms.hpp>
+#include <raft/core/comms.hpp>
+#include <raft/core/device_span.hpp>
+#include <raft/core/handle.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/tuple.h>
+#include <utilities/thrust_wrapper.hpp>
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+template <typename vertex_t, typename edge_t, typename weight_t>
+struct intersection_op_t {
+  __device__ thrust::tuple<edge_t, edge_t> operator()(
+    vertex_t a,
+    vertex_t b,
+    weight_t weight_a /* weighted out degree */,
+    weight_t weight_b /* weighted out degree */,
+    raft::device_span<vertex_t const> intersection,
+    raft::device_span<weight_t const> intersected_properties_a,
+    raft::device_span<weight_t const> intersected_properties_b) const
+  {
+    weight_t min_weight_a_intersect_b = weight_t{0};
+    weight_t max_weight_a_intersect_b = weight_t{0};
+    weight_t sum_of_intersected_a     = weight_t{0};
+    weight_t sum_of_intersected_b     = weight_t{0};
+
+    for (size_t k = 0; k < intersection.size(); k++) {
+      min_weight_a_intersect_b += min(intersected_properties_a[k], intersected_properties_b[k]);
+      max_weight_a_intersect_b += max(intersected_properties_a[k], intersected_properties_b[k]);
+      sum_of_intersected_a += intersected_properties_a[k];
+      sum_of_intersected_b += intersected_properties_b[k];
+    }
+
+    weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a;
+    weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b;
+
+    max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b;
+
+    return thrust::make_tuple(min_weight_a_intersect_b, max_weight_a_intersect_b);
+  }
+};
+
+struct Prims_Usecase {
+  size_t num_vertex_pairs{0};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGPerVPairTransformDstNbrIntersection
+  : public ::testing::TestWithParam<std::tuple<Prims_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGPerVPairTransformDstNbrIntersection() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive
+  template <typename vertex_t, typename edge_t, typename weight_t, typename property_t>
+  void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase)
+  {
+    HighResTimer hr_timer{};
+
+    auto const comm_rank = handle_->get_comms().get_rank();
+    auto const comm_size = handle_->get_comms().get_size();
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG Construct graph");
+    }
+
+    constexpr bool store_transposed = false;
+    constexpr bool multi_gpu        = true;
+
+    cugraph::graph_t<vertex_t, edge_t, false, true> mg_graph(*handle_);
+    std::optional<
+      cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
+                               weight_t>>
+      mg_edge_weight{std::nullopt};
+
+    std::optional<rmm::device_uvector<vertex_t>> mg_renumber_map{std::nullopt};
+
+    constexpr bool test_weighted    = true;
+    constexpr bool renumber         = true;
+    constexpr bool drop_self_loops  = false;
+    constexpr bool drop_multi_edges = true;
+
+    std::tie(mg_graph, mg_edge_weight, mg_renumber_map) =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_, input_usecase, test_weighted, renumber, drop_self_loops, drop_multi_edges);
+
+    auto mg_graph_view       = mg_graph.view();
+    auto mg_edge_weight_view = (*mg_edge_weight).view();
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive
+
+    ASSERT_TRUE(
+      mg_graph_view.number_of_vertices() >
+      vertex_t{0});  // the code below to generate vertex pairs is invalid for an empty graph.
+
+    auto mg_vertex_pair_buffer =
+      cugraph::allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+        prims_usecase.num_vertex_pairs / comm_size +
+          (static_cast<size_t>(comm_rank) < prims_usecase.num_vertex_pairs % comm_size ? 1 : 0),
+        handle_->get_stream());
+
+    thrust::tabulate(
+      handle_->get_thrust_policy(),
+      cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer),
+      cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer),
+      [comm_rank, num_vertices = mg_graph_view.number_of_vertices()] __device__(size_t i) {
+        cuco::detail::MurmurHash3_32<size_t>
+          hash_func{};  // use hash_func to generate arbitrary vertex pairs
+        auto v0 = static_cast<vertex_t>(hash_func(i + comm_rank) % num_vertices);
+        auto v1 = static_cast<vertex_t>(hash_func(i + num_vertices + comm_rank) % num_vertices);
+        return thrust::make_tuple(v0, v1);
+      });
+
+    auto h_vertex_partition_range_lasts = mg_graph_view.vertex_partition_range_lasts();
+    std::tie(std::get<0>(mg_vertex_pair_buffer),
+             std::get<1>(mg_vertex_pair_buffer),
+             std::ignore,
+             std::ignore,
+             std::ignore) =
+      cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
+        vertex_t,
+        edge_t,
+        weight_t,
+        int32_t>(*handle_,
+                 std::move(std::get<0>(mg_vertex_pair_buffer)),
+                 std::move(std::get<1>(mg_vertex_pair_buffer)),
+                 std::nullopt,
+                 std::nullopt,
+                 std::nullopt,
+                 h_vertex_partition_range_lasts);
+
+    auto mg_result_buffer = cugraph::allocate_dataframe_buffer<thrust::tuple<weight_t, weight_t>>(
+      cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream());
+    auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection");
+    }
+
+    rmm::device_uvector<weight_t> mg_out_weight_sums =
+      compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view);
+
+    cugraph::per_v_pair_transform_dst_nbr_intersection(
+      *handle_,
+      mg_graph_view,
+      mg_edge_weight_view,
+      cugraph::get_dataframe_buffer_begin(mg_vertex_pair_buffer),
+      cugraph::get_dataframe_buffer_end(mg_vertex_pair_buffer),
+      mg_out_weight_sums.begin(),
+      intersection_op_t<vertex_t, edge_t, weight_t>{},
+      cugraph::get_dataframe_buffer_begin(mg_result_buffer));
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 3. validate MG results
+
+    if (prims_usecase.check_correctness) {
+      cugraph::unrenumber_int_vertices<vertex_t, true>(
+        *handle_,
+        std::get<0>(mg_vertex_pair_buffer).data(),
+        cugraph::size_dataframe_buffer(mg_vertex_pair_buffer),
+        (*mg_renumber_map).data(),
+        h_vertex_partition_range_lasts);
+      cugraph::unrenumber_int_vertices<vertex_t, true>(
+        *handle_,
+        std::get<1>(mg_vertex_pair_buffer).data(),
+        cugraph::size_dataframe_buffer(mg_vertex_pair_buffer),
+        (*mg_renumber_map).data(),
+        h_vertex_partition_range_lasts);
+
+      auto mg_aggregate_vertex_pair_buffer =
+        cugraph::allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(
+          0, handle_->get_stream());
+      std::get<0>(mg_aggregate_vertex_pair_buffer) =
+        cugraph::test::device_gatherv(*handle_,
+                                      std::get<0>(mg_vertex_pair_buffer).data(),
+                                      std::get<0>(mg_vertex_pair_buffer).size());
+      std::get<1>(mg_aggregate_vertex_pair_buffer) =
+        cugraph::test::device_gatherv(*handle_,
+                                      std::get<1>(mg_vertex_pair_buffer).data(),
+                                      std::get<1>(mg_vertex_pair_buffer).size());
+
+      auto mg_aggregate_result_buffer =
+        cugraph::allocate_dataframe_buffer<thrust::tuple<weight_t, weight_t>>(
+          0, handle_->get_stream());
+      std::get<0>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv(
+        *handle_, std::get<0>(mg_result_buffer).data(), std::get<0>(mg_result_buffer).size());
+      std::get<1>(mg_aggregate_result_buffer) = cugraph::test::device_gatherv(
+        *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size());
+
+      cugraph::graph_t<vertex_t, edge_t, false, false> sg_graph(*handle_);
+
+      std::optional<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, store_transposed, false>,
+                                 weight_t>>
+        sg_edge_weight{std::nullopt};
+
+      std::tie(sg_graph, sg_edge_weight, std::ignore) = cugraph::test::mg_graph_to_sg_graph(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight
+          ? std::make_optional(mg_edge_weight_view)
+          : std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>>{std::nullopt},
+        std::make_optional<raft::device_span<vertex_t const>>((*mg_renumber_map).data(),
+                                                              (*mg_renumber_map).size()),
+        false);
+
+      if (handle_->get_comms().get_rank() == 0) {
+        auto sg_graph_view = sg_graph.view();
+
+        auto sg_result_buffer =
+          cugraph::allocate_dataframe_buffer<thrust::tuple<weight_t, weight_t>>(
+            cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream());
+
+        rmm::device_uvector<weight_t> sg_out_weight_sums =
+          compute_out_weight_sums(*handle_, sg_graph_view, (*sg_edge_weight).view());
+
+        cugraph::per_v_pair_transform_dst_nbr_intersection(
+          *handle_,
+          sg_graph_view,
+          (*sg_edge_weight).view(),
+          cugraph::get_dataframe_buffer_begin(
+            mg_aggregate_vertex_pair_buffer /* now unrenumbered */),
+          cugraph::get_dataframe_buffer_end(mg_aggregate_vertex_pair_buffer /* now unrenumbered
+          */), sg_out_weight_sums.begin(),  intersection_op_t<vertex_t, edge_t, weight_t>{},
+          cugraph::get_dataframe_buffer_begin(sg_result_buffer));
+
+        bool valid = thrust::equal(handle_->get_thrust_policy(),
+                                   cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer),
+                                   cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer),
+                                   cugraph::get_dataframe_buffer_begin(sg_result_buffer));
+
+        ASSERT_TRUE(valid);
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t>
+  Tests_MGPerVPairTransformDstNbrIntersection<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGPerVPairTransformDstNbrIntersection_File =
+  Tests_MGPerVPairTransformDstNbrIntersection<cugraph::test::File_Usecase>;
+using Tests_MGPerVPairTransformDstNbrIntersection_Rmat =
+  Tests_MGPerVPairTransformDstNbrIntersection<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float, thrust::tuple<int, float>>(std::get<0>(param),
+                                                                       std::get<1>(param));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float, thrust::tuple<int, float>>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int64_t, float, thrust::tuple<int, float>>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat)
+{
+  auto param = GetParam();
+  run_current_test<int64_t, int64_t, float, thrust::tuple<int, float>>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float, int>(std::get<0>(param), std::get<1>(param));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, float, int>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int64_t, float, int>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float)
+{
+  auto param = GetParam();
+  run_current_test<int64_t, int64_t, float, int>(
+    std::get<0>(param),
+    cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGPerVPairTransformDstNbrIntersection_File,
+  ::testing::Combine(
+    ::testing::Values(Prims_Usecase{size_t{10}, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(rmat_small_test,
+                         Tests_MGPerVPairTransformDstNbrIntersection_Rmat,
+                         ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}),
+                                            ::testing::Values(cugraph::test::Rmat_Usecase(
+                                              10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGPerVPairTransformDstNbrIntersection_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp
index 1fa869ac2df..0eff8dedc8f 100644
--- a/cpp/tests/utilities/test_utilities.hpp
+++ b/cpp/tests/utilities/test_utilities.hpp
@@ -154,6 +154,21 @@ read_edgelist_from_csv_file(raft::handle_t const& handle,
                             bool store_transposed,
                             bool multi_gpu);
 
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::graph_t<vertex_t, edge_t, store_transposed, multi_gpu>,
+           std::optional<
+             cugraph::edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
+                                      weight_t>>,
+           std::optional<rmm::device_uvector<vertex_t>>>
+read_graph_from_csv_file(raft::handle_t const& handle,
+                         std::string const& graph_file_full_path,
+                         bool test_weighted,
+                         bool renumber);
+
 // alias for easy customization for debug purposes:
 //
 template <typename value_t>
@@ -521,6 +536,7 @@ mg_graph_to_sg_graph(
   bool renumber);
 
 // Only the rank 0 GPU holds the valid data
+
 template <typename vertex_t, typename value_t>
 std::tuple<std::optional<rmm::device_uvector<vertex_t>>, rmm::device_uvector<value_t>>
 mg_vertex_property_values_to_sg_vertex_property_values(