diff --git a/cpp/include/cugraph/edge_partition_device_view.cuh b/cpp/include/cugraph/edge_partition_device_view.cuh
index 213f9b9497a..d1c2cf3df52 100644
--- a/cpp/include/cugraph/edge_partition_device_view.cuh
+++ b/cpp/include/cugraph/edge_partition_device_view.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -298,6 +298,20 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_range_first_ + major_offset;
   }
 
+  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  {
+    if (major_hypersparse_first_ && (major >= *major_hypersparse_first_)) {
+      auto major_hypersparse_idx =
+        detail::major_hypersparse_idx_from_major_nocheck_impl(*dcs_nzd_vertices_, major);
+      return major_hypersparse_idx
+               ? thrust::make_optional((*major_hypersparse_first_ - major_range_first_) +
+                                       *major_hypersparse_idx)
+               : thrust::nullopt;
+    } else {
+      return major - major_range_first_;
+    }
+  }
+
   __device__ vertex_t major_from_major_idx_nocheck(vertex_t major_idx) const noexcept
   {
     if (major_hypersparse_first_) {
@@ -339,6 +353,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return dcs_nzd_vertices_ ? thrust::optional<vertex_t const*>{(*dcs_nzd_vertices_).data()}
                              : thrust::nullopt;
   }
+
   __host__ __device__ thrust::optional<vertex_t> dcs_nzd_vertex_count() const
   {
     return dcs_nzd_vertices_
@@ -460,6 +475,11 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
     return major_offset;
   }
 
+  __device__ thrust::optional<vertex_t> major_idx_from_major_nocheck(vertex_t major) const noexcept
+  {
+    return major_offset_from_major_nocheck(major);
+  }
+
   __device__ vertex_t major_from_major_idx_nocheck(vertex_t major_idx) const noexcept
   {
     return major_from_major_offset_nocheck(major_idx);
diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp
index 53c66c6483e..93d884a56d9 100644
--- a/cpp/include/cugraph/graph_view.hpp
+++ b/cpp/include/cugraph/graph_view.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -631,6 +631,19 @@ class graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if
   edge_t count_self_loops(raft::handle_t const& handle) const;
   edge_t count_multi_edges(raft::handle_t const& handle) const;
 
+  rmm::device_uvector<bool> has_edge(raft::handle_t const& handle,
+                                     /* (edge_srcs, edge_dsts) should be pre-shuffled */
+                                     raft::device_span<vertex_t const> edge_srcs,
+                                     raft::device_span<vertex_t const> edge_dsts,
+                                     bool do_expensive_check = false);
+
+  rmm::device_uvector<edge_t> compute_multiplicity(
+    raft::handle_t const& handle,
+    /* (edge_srcs, edge_dsts) should be pre-shuffled */
+    raft::device_span<vertex_t const> edge_srcs,
+    raft::device_span<vertex_t const> edge_dsts,
+    bool do_expensive_check = false);
+
   template <bool transposed = is_storage_transposed>
   std::enable_if_t<transposed, std::optional<raft::device_span<vertex_t const>>>
   local_sorted_unique_edge_srcs() const
@@ -928,6 +941,16 @@ class graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if
   edge_t count_self_loops(raft::handle_t const& handle) const;
   edge_t count_multi_edges(raft::handle_t const& handle) const;
 
+  rmm::device_uvector<bool> has_edge(raft::handle_t const& handle,
+                                     raft::device_span<vertex_t const> edge_srcs,
+                                     raft::device_span<vertex_t const> edge_dsts,
+                                     bool do_expensive_check = false);
+
+  rmm::device_uvector<edge_t> compute_multiplicity(raft::handle_t const& handle,
+                                                   raft::device_span<vertex_t const> edge_srcs,
+                                                   raft::device_span<vertex_t const> edge_dsts,
+                                                   bool do_expensive_check = false);
+
   template <bool transposed = is_storage_transposed>
   std::enable_if_t<transposed, std::optional<raft::device_span<vertex_t const>>>
   local_sorted_unique_edge_srcs() const
diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh
index 55e8f5c88d7..7ac294d7719 100644
--- a/cpp/src/link_prediction/similarity_impl.cuh
+++ b/cpp/src/link_prediction/similarity_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include <prims/count_if_e.cuh>
 #include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
 #include <prims/update_edge_src_dst_property.cuh>
+#include <utilities/error_check_utils.cuh>
 
 #include <cugraph/graph_functions.hpp>
 #include <cugraph/graph_view.hpp>
diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh
index cefc1836fa6..8261ec747f9 100644
--- a/cpp/src/prims/detail/nbr_intersection.cuh
+++ b/cpp/src/prims/detail/nbr_intersection.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <prims/detail/optional_dataframe_buffer.hpp>
 #include <prims/kv_store.cuh>
+#include <utilities/error_check_utils.cuh>
 
 #include <cugraph/edge_partition_device_view.cuh>
 #include <cugraph/edge_partition_edge_property_device_view.cuh>
@@ -63,35 +64,6 @@ namespace cugraph {
 
 namespace detail {
 
-// check vertices in the pair are valid and first element of the pair is within the local vertex
-// partition range
-template <typename vertex_t>
-struct is_invalid_input_vertex_pair_t {
-  vertex_t num_vertices{};
-  raft::device_span<vertex_t const> edge_partition_major_range_firsts{};
-  raft::device_span<vertex_t const> edge_partition_major_range_lasts{};
-  vertex_t edge_partition_minor_range_first{};
-  vertex_t edge_partition_minor_range_last{};
-
-  __device__ bool operator()(thrust::tuple<vertex_t, vertex_t> pair) const
-  {
-    auto major = thrust::get<0>(pair);
-    auto minor = thrust::get<1>(pair);
-    if (!is_valid_vertex(num_vertices, major) || !is_valid_vertex(num_vertices, minor)) {
-      return true;
-    }
-    auto it = thrust::upper_bound(thrust::seq,
-                                  edge_partition_major_range_lasts.begin(),
-                                  edge_partition_major_range_lasts.end(),
-                                  major);
-    if (it == edge_partition_major_range_lasts.end()) { return true; }
-    auto edge_partition_idx =
-      static_cast<size_t>(thrust::distance(edge_partition_major_range_lasts.begin(), it));
-    if (major < edge_partition_major_range_firsts[edge_partition_idx]) { return true; }
-    return (minor < edge_partition_minor_range_first) || (minor >= edge_partition_minor_range_last);
-  }
-};
-
 // group index determined by major_comm_rank (primary key) and local edge partition index (secondary
 // key)
 template <typename vertex_t>
@@ -154,24 +126,11 @@ struct update_rx_major_local_degree_t {
     auto major =
       rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] +
                 offset_in_local_edge_partition];
-    vertex_t major_idx{0};
-    edge_t local_degree{0};
-    if (multi_gpu && (edge_partition.major_hypersparse_first() &&
-                      (major >= *(edge_partition.major_hypersparse_first())))) {
-      auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major);
-      if (major_hypersparse_idx) {
-        major_idx =
-          (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) +
-          *major_hypersparse_idx;
-        local_degree = edge_partition.local_degree(major_idx);
-      }
-    } else {
-      major_idx    = edge_partition.major_offset_from_major_nocheck(major);
-      local_degree = edge_partition.local_degree(major_idx);
-    }
+    auto major_idx    = edge_partition.major_idx_from_major_nocheck(major);
+    auto local_degree = major_idx ? edge_partition.local_degree(*major_idx) : edge_t{0};
 
     if (edge_partition_e_mask && (local_degree > edge_t{0})) {
-      auto local_offset = edge_partition.local_offset(major_idx);
+      auto local_offset = edge_partition.local_offset(*major_idx);
       local_degree      = static_cast<edge_t>(
         count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree));
     }
@@ -325,29 +284,11 @@ struct pick_min_degree_t {
     edge_t local_degree0{0};
     vertex_t major0 = thrust::get<0>(pair);
     if constexpr (std::is_same_v<FirstElementToIdxMap, void*>) {
-      vertex_t major_idx{0};
-      if constexpr (multi_gpu) {
-        if (edge_partition.major_hypersparse_first() &&
-            (major0 >= *(edge_partition.major_hypersparse_first()))) {
-          auto major_hypersparse_idx =
-            edge_partition.major_hypersparse_idx_from_major_nocheck(major0);
-          if (major_hypersparse_idx) {
-            major_idx =
-              (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) +
-              *major_hypersparse_idx;
-            local_degree0 = edge_partition.local_degree(major_idx);
-          }
-        } else {
-          major_idx     = edge_partition.major_offset_from_major_nocheck(major0);
-          local_degree0 = edge_partition.local_degree(major_idx);
-        }
-      } else {
-        major_idx     = edge_partition.major_offset_from_major_nocheck(major0);
-        local_degree0 = edge_partition.local_degree(major_idx);
-      }
+      auto major_idx = edge_partition.major_idx_from_major_nocheck(major0);
+      local_degree0  = major_idx ? edge_partition.local_degree(*major_idx) : edge_t{0};
 
       if (edge_partition_e_mask && (local_degree0 > edge_t{0})) {
-        auto local_offset = edge_partition.local_offset(major_idx);
+        auto local_offset = edge_partition.local_offset(*major_idx);
         local_degree0 =
           count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree0);
       }
@@ -360,29 +301,11 @@ struct pick_min_degree_t {
     edge_t local_degree1{0};
     vertex_t major1 = thrust::get<1>(pair);
     if constexpr (std::is_same_v<SecondElementToIdxMap, void*>) {
-      vertex_t major_idx{0};
-      if constexpr (multi_gpu) {
-        if (edge_partition.major_hypersparse_first() &&
-            (major1 >= *(edge_partition.major_hypersparse_first()))) {
-          auto major_hypersparse_idx =
-            edge_partition.major_hypersparse_idx_from_major_nocheck(major1);
-          if (major_hypersparse_idx) {
-            major_idx =
-              (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) +
-              *major_hypersparse_idx;
-            local_degree1 = edge_partition.local_degree(major_idx);
-          }
-        } else {
-          major_idx     = edge_partition.major_offset_from_major_nocheck(major1);
-          local_degree1 = edge_partition.local_degree(major_idx);
-        }
-      } else {
-        major_idx     = edge_partition.major_offset_from_major_nocheck(major1);
-        local_degree1 = edge_partition.local_degree(major_idx);
-      }
+      auto major_idx = edge_partition.major_idx_from_major_nocheck(major1);
+      local_degree1  = major_idx ? edge_partition.local_degree(*major_idx) : edge_t{0};
 
       if (edge_partition_e_mask && (local_degree1 > edge_t{0})) {
-        auto local_offset = edge_partition.local_offset(major_idx);
+        auto local_offset = edge_partition.local_offset(*major_idx);
         local_degree1 =
           count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree1);
       }
@@ -699,77 +622,6 @@ struct gatherv_indices_t {
   }
 };
 
-template <typename GraphViewType, typename VertexPairIterator>
-size_t count_invalid_vertex_pairs(raft::handle_t const& handle,
-                                  GraphViewType const& graph_view,
-                                  VertexPairIterator vertex_pair_first,
-                                  VertexPairIterator vertex_pair_last)
-{
-  using vertex_t = typename GraphViewType::vertex_type;
-
-  std::vector<vertex_t> h_edge_partition_major_range_firsts(
-    graph_view.number_of_local_edge_partitions());
-  std::vector<vertex_t> h_edge_partition_major_range_lasts(
-    h_edge_partition_major_range_firsts.size());
-  vertex_t edge_partition_minor_range_first{};
-  vertex_t edge_partition_minor_range_last{};
-  if constexpr (GraphViewType::is_multi_gpu) {
-    for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i++) {
-      if constexpr (GraphViewType::is_storage_transposed) {
-        h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_dst_range_first(i);
-        h_edge_partition_major_range_lasts[i]  = graph_view.local_edge_partition_dst_range_last(i);
-      } else {
-        h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_src_range_first(i);
-        h_edge_partition_major_range_lasts[i]  = graph_view.local_edge_partition_src_range_last(i);
-      }
-    }
-    if constexpr (GraphViewType::is_storage_transposed) {
-      edge_partition_minor_range_first = graph_view.local_edge_partition_src_range_first();
-      edge_partition_minor_range_last  = graph_view.local_edge_partition_src_range_last();
-    } else {
-      edge_partition_minor_range_first = graph_view.local_edge_partition_dst_range_first();
-      edge_partition_minor_range_last  = graph_view.local_edge_partition_dst_range_last();
-    }
-  } else {
-    h_edge_partition_major_range_firsts[0] = vertex_t{0};
-    h_edge_partition_major_range_lasts[0]  = graph_view.number_of_vertices();
-    edge_partition_minor_range_first       = vertex_t{0};
-    edge_partition_minor_range_last        = graph_view.number_of_vertices();
-  }
-  rmm::device_uvector<vertex_t> d_edge_partition_major_range_firsts(
-    h_edge_partition_major_range_firsts.size(), handle.get_stream());
-  rmm::device_uvector<vertex_t> d_edge_partition_major_range_lasts(
-    h_edge_partition_major_range_lasts.size(), handle.get_stream());
-  raft::update_device(d_edge_partition_major_range_firsts.data(),
-                      h_edge_partition_major_range_firsts.data(),
-                      h_edge_partition_major_range_firsts.size(),
-                      handle.get_stream());
-  raft::update_device(d_edge_partition_major_range_lasts.data(),
-                      h_edge_partition_major_range_lasts.data(),
-                      h_edge_partition_major_range_lasts.size(),
-                      handle.get_stream());
-
-  auto num_invalid_pairs = thrust::count_if(
-    handle.get_thrust_policy(),
-    vertex_pair_first,
-    vertex_pair_last,
-    is_invalid_input_vertex_pair_t<vertex_t>{
-      graph_view.number_of_vertices(),
-      raft::device_span<vertex_t const>(d_edge_partition_major_range_firsts.begin(),
-                                        d_edge_partition_major_range_firsts.end()),
-      raft::device_span<vertex_t const>(d_edge_partition_major_range_lasts.begin(),
-                                        d_edge_partition_major_range_lasts.end()),
-      edge_partition_minor_range_first,
-      edge_partition_minor_range_last});
-  if constexpr (GraphViewType::is_multi_gpu) {
-    auto& comm = handle.get_comms();
-    num_invalid_pairs =
-      host_scalar_allreduce(comm, num_invalid_pairs, raft::comms::op_t::SUM, handle.get_stream());
-  }
-
-  return num_invalid_pairs;
-}
-
 // In multi-GPU, the first element of every vertex pair in [vertex_pair_first, vertex_pair) should
 // be within the valid edge partition major range assigned to this process and the second element
 // should be within the valid edge partition minor range assigned to this process.
diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
index 201c08325d7..469bfcb4e47 100644
--- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
+++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <prims/detail/nbr_intersection.cuh>
 #include <prims/property_op_utils.cuh>
 #include <utilities/collect_comm.cuh>
+#include <utilities/error_check_utils.cuh>
 
 #include <cugraph/edge_partition_device_view.cuh>
 #include <cugraph/edge_partition_edge_property_device_view.cuh>
diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh
index c6623621d24..93a2d040b60 100644
--- a/cpp/src/prims/transform_e.cuh
+++ b/cpp/src/prims/transform_e.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -426,28 +426,15 @@ void transform_e(raft::handle_t const& handle,
           edge_first + edge_partition_offsets[i + 1],
           [edge_partition,
            edge_partition_e_mask] __device__(thrust::tuple<vertex_t, vertex_t> edge) {
-            auto major = thrust::get<0>(edge);
-            auto minor = thrust::get<1>(edge);
-            vertex_t major_idx{};
-            auto major_hypersparse_first = edge_partition.major_hypersparse_first();
-            if (major_hypersparse_first) {
-              if (major < *major_hypersparse_first) {
-                major_idx = edge_partition.major_offset_from_major_nocheck(major);
-              } else {
-                auto major_hypersparse_idx =
-                  edge_partition.major_hypersparse_idx_from_major_nocheck(major);
-                if (!major_hypersparse_idx) { return true; }
-                major_idx =
-                  edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) +
-                  *major_hypersparse_idx;
-              }
-            } else {
-              major_idx = edge_partition.major_offset_from_major_nocheck(major);
-            }
+            auto major     = thrust::get<0>(edge);
+            auto minor     = thrust::get<1>(edge);
+            auto major_idx = edge_partition.major_idx_from_major_nocheck(major);
+            if (!major_idx) { return true; }
             vertex_t const* indices{nullptr};
             edge_t edge_offset{};
             edge_t local_degree{};
-            thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
+            thrust::tie(indices, edge_offset, local_degree) =
+              edge_partition.local_edges(*major_idx);
             auto lower_it =
               thrust::lower_bound(thrust::seq, indices, indices + local_degree, minor);
             if (*lower_it != minor) { return true; }
@@ -494,24 +481,16 @@ void transform_e(raft::handle_t const& handle,
         auto major = thrust::get<0>(edge);
         auto minor = thrust::get<1>(edge);
 
-        auto major_hypersparse_first = edge_partition.major_hypersparse_first();
-        auto major_offset            = edge_partition.major_offset_from_major_nocheck(major);
-        vertex_t major_idx{major_offset};
-
-        if ((major_hypersparse_first) && (major >= *major_hypersparse_first)) {
-          auto major_hypersparse_idx =
-            edge_partition.major_hypersparse_idx_from_major_nocheck(major);
-          assert(major_hypersparse_idx);
-          major_idx = edge_partition.major_offset_from_major_nocheck(*major_hypersparse_first) +
-                      *major_hypersparse_idx;
-        }
+        auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
+        auto major_idx    = edge_partition.major_idx_from_major_nocheck(major);
+        assert(major_idx);
 
         auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor);
 
         vertex_t const* indices{nullptr};
         edge_t edge_offset{};
         edge_t local_degree{};
-        thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
+        thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(*major_idx);
         auto lower_it = thrust::lower_bound(thrust::seq, indices, indices + local_degree, minor);
         auto upper_it = thrust::upper_bound(thrust::seq, lower_it, indices + local_degree, minor);
 
diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh
index da0ecc991df..7928c61cf7b 100644
--- a/cpp/src/structure/graph_view_impl.cuh
+++ b/cpp/src/structure/graph_view_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <prims/count_if_e.cuh>
 #include <prims/per_v_transform_reduce_incoming_outgoing_e.cuh>
 #include <prims/reduce_op.cuh>
+#include <utilities/error_check_utils.cuh>
 
 #include <cugraph/edge_property.hpp>
 #include <cugraph/edge_src_dst_property.hpp>
@@ -414,6 +415,59 @@ edge_t count_edge_partition_multi_edges(
   }
 }
 
+template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
+std::tuple<rmm::device_uvector<size_t>, std::vector<size_t>>
+compute_edge_indices_and_edge_partition_offsets(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  raft::device_span<vertex_t const> edge_majors,
+  raft::device_span<vertex_t const> edge_minors)
+{
+  auto edge_first = thrust::make_zip_iterator(edge_majors.begin(), edge_minors.begin());
+
+  rmm::device_uvector<size_t> edge_indices(edge_majors.size(), handle.get_stream());
+  thrust::sequence(handle.get_thrust_policy(), edge_indices.begin(), edge_indices.end(), size_t{0});
+  thrust::sort(handle.get_thrust_policy(),
+               edge_indices.begin(),
+               edge_indices.end(),
+               [edge_first] __device__(size_t lhs, size_t rhs) {
+                 return *(edge_first + lhs) < *(edge_first + rhs);
+               });
+
+  std::vector<size_t> h_major_range_lasts(graph_view.number_of_local_edge_partitions());
+  for (size_t i = 0; i < h_major_range_lasts.size(); ++i) {
+    if constexpr (store_transposed) {
+      h_major_range_lasts[i] = graph_view.local_edge_partition_dst_range_last(i);
+    } else {
+      h_major_range_lasts[i] = graph_view.local_edge_partition_src_range_last(i);
+    }
+  }
+  rmm::device_uvector<size_t> d_major_range_lasts(h_major_range_lasts.size(), handle.get_stream());
+  raft::update_device(d_major_range_lasts.data(),
+                      h_major_range_lasts.data(),
+                      h_major_range_lasts.size(),
+                      handle.get_stream());
+  rmm::device_uvector<size_t> d_lower_bounds(d_major_range_lasts.size(), handle.get_stream());
+  auto major_first        = edge_majors.begin();
+  auto sorted_major_first = thrust::make_transform_iterator(
+    edge_indices.begin(),
+    cugraph::detail::indirection_t<size_t, decltype(major_first)>{major_first});
+  thrust::lower_bound(handle.get_thrust_policy(),
+                      sorted_major_first,
+                      sorted_major_first + edge_indices.size(),
+                      d_major_range_lasts.begin(),
+                      d_major_range_lasts.end(),
+                      d_lower_bounds.begin());
+  std::vector<size_t> edge_partition_offsets(d_lower_bounds.size() + 1, 0);
+  raft::update_host(edge_partition_offsets.data() + 1,
+                    d_lower_bounds.data(),
+                    d_lower_bounds.size(),
+                    handle.get_stream());
+  handle.sync_stream();
+
+  return std::make_tuple(std::move(edge_indices), edge_partition_offsets);
+}
+
 }  // namespace
 
 template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
@@ -751,4 +805,293 @@ edge_t graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_i
     this->local_edge_partition_segment_offsets());
 }
 
+template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
+rmm::device_uvector<bool>
+graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::has_edge(
+  raft::handle_t const& handle,
+  raft::device_span<vertex_t const> edge_srcs,
+  raft::device_span<vertex_t const> edge_dsts,
+  bool do_expensive_check)
+{
+  CUGRAPH_EXPECTS(
+    edge_srcs.size() == edge_dsts.size(),
+    "Invalid input arguments: edge_srcs.size() does not coincide with edge_dsts.size().");
+
+  auto edge_first =
+    thrust::make_zip_iterator(store_transposed ? edge_dsts.begin() : edge_srcs.begin(),
+                              store_transposed ? edge_srcs.begin() : edge_dsts.begin());
+
+  if (do_expensive_check) {
+    auto num_invalids =
+      detail::count_invalid_vertex_pairs(handle, *this, edge_first, edge_first + edge_srcs.size());
+    CUGRAPH_EXPECTS(num_invalids == 0,
+                    "Invalid input argument: there are invalid edge (src, dst) pairs.");
+  }
+
+  auto [edge_indices, edge_partition_offsets] =
+    compute_edge_indices_and_edge_partition_offsets(handle,
+                                                    *this,
+                                                    store_transposed ? edge_dsts : edge_srcs,
+                                                    store_transposed ? edge_srcs : edge_dsts);
+
+  auto edge_mask_view = this->edge_mask_view();
+
+  auto sorted_edge_first = thrust::make_transform_iterator(
+    edge_indices.begin(), cugraph::detail::indirection_t<size_t, decltype(edge_first)>{edge_first});
+  rmm::device_uvector<bool> ret(edge_srcs.size(), handle.get_stream());
+
+  for (size_t i = 0; i < this->number_of_local_edge_partitions(); ++i) {
+    auto edge_partition =
+      edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view(i));
+    auto edge_partition_e_mask =
+      edge_mask_view
+        ? thrust::make_optional<
+            detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
+            *edge_mask_view, i)
+        : thrust::nullopt;
+    thrust::transform(handle.get_thrust_policy(),
+                      sorted_edge_first + edge_partition_offsets[i],
+                      sorted_edge_first + edge_partition_offsets[i + 1],
+                      thrust::make_permutation_iterator(
+                        ret.begin(), edge_indices.begin() + edge_partition_offsets[i]),
+                      [edge_partition, edge_partition_e_mask] __device__(auto e) {
+                        auto major     = thrust::get<0>(e);
+                        auto minor     = thrust::get<1>(e);
+                        auto major_idx = edge_partition.major_idx_from_major_nocheck(major);
+                        if (major_idx) {
+                          vertex_t const* indices{nullptr};
+                          edge_t local_edge_offset{};
+                          edge_t local_degree{};
+                          thrust::tie(indices, local_edge_offset, local_degree) =
+                            edge_partition.local_edges(*major_idx);
+                          auto it = thrust::lower_bound(
+                            thrust::seq, indices, indices + local_degree, minor);
+                          if ((it != indices + local_degree) && *it == minor) {
+                            if (edge_partition_e_mask) {
+                              return (*edge_partition_e_mask)
+                                .get(local_edge_offset + thrust::distance(indices, it));
+                            } else {
+                              return true;
+                            }
+                          } else {
+                            return false;
+                          }
+                        } else {
+                          return false;
+                        }
+                      });
+  }
+
+  return ret;
+}
+
+template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
+rmm::device_uvector<bool>
+graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<!multi_gpu>>::has_edge(
+  raft::handle_t const& handle,
+  raft::device_span<vertex_t const> edge_srcs,
+  raft::device_span<vertex_t const> edge_dsts,
+  bool do_expensive_check)
+{
+  CUGRAPH_EXPECTS(
+    edge_srcs.size() == edge_dsts.size(),
+    "Invalid input arguments: edge_srcs.size() does not coincide with edge_dsts.size().");
+
+  auto edge_first =
+    thrust::make_zip_iterator(store_transposed ? edge_dsts.begin() : edge_srcs.begin(),
+                              store_transposed ? edge_srcs.begin() : edge_dsts.begin());
+
+  if (do_expensive_check) {
+    auto num_invalids =
+      detail::count_invalid_vertex_pairs(handle, *this, edge_first, edge_first + edge_srcs.size());
+    CUGRAPH_EXPECTS(num_invalids == 0,
+                    "Invalid input argument: there are invalid edge (src, dst) pairs.");
+  }
+
+  auto edge_mask_view = this->edge_mask_view();
+
+  rmm::device_uvector<bool> ret(edge_srcs.size(), handle.get_stream());
+
+  auto edge_partition =
+    edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view());
+  auto edge_partition_e_mask =
+    edge_mask_view
+      ? thrust::make_optional<
+          detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
+          *edge_mask_view, 0)
+      : thrust::nullopt;
+  thrust::transform(
+    handle.get_thrust_policy(),
+    edge_first,
+    edge_first + edge_srcs.size(),
+    ret.begin(),
+    [edge_partition, edge_partition_e_mask] __device__(auto e) {
+      auto major        = thrust::get<0>(e);
+      auto minor        = thrust::get<1>(e);
+      auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
+      vertex_t const* indices{nullptr};
+      edge_t local_edge_offset{};
+      edge_t local_degree{};
+      thrust::tie(indices, local_edge_offset, local_degree) =
+        edge_partition.local_edges(major_offset);
+      auto it = thrust::lower_bound(thrust::seq, indices, indices + local_degree, minor);
+      if ((it != indices + local_degree) && *it == minor) {
+        if (edge_partition_e_mask) {
+          return (*edge_partition_e_mask).get(local_edge_offset + thrust::distance(indices, it));
+        } else {
+          return true;
+        }
+      } else {
+        return false;
+      }
+    });
+
+  return ret;
+}
+
+template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<multi_gpu>>::
+  compute_multiplicity(raft::handle_t const& handle,
+                       raft::device_span<vertex_t const> edge_srcs,
+                       raft::device_span<vertex_t const> edge_dsts,
+                       bool do_expensive_check)
+{
+  CUGRAPH_EXPECTS(this->is_multigraph(), "Use has_edge() instead for non-multigraphs.");
+  CUGRAPH_EXPECTS(
+    edge_srcs.size() == edge_dsts.size(),
+    "Invalid input arguments: edge_srcs.size() does not coincide with edge_dsts.size().");
+
+  auto edge_first =
+    thrust::make_zip_iterator(store_transposed ? edge_dsts.begin() : edge_srcs.begin(),
+                              store_transposed ? edge_srcs.begin() : edge_dsts.begin());
+
+  if (do_expensive_check) {
+    auto num_invalids =
+      detail::count_invalid_vertex_pairs(handle, *this, edge_first, edge_first + edge_srcs.size());
+    CUGRAPH_EXPECTS(num_invalids == 0,
+                    "Invalid input argument: there are invalid edge (src, dst) pairs.");
+  }
+
+  auto [edge_indices, edge_partition_offsets] =
+    compute_edge_indices_and_edge_partition_offsets(handle,
+                                                    *this,
+                                                    store_transposed ? edge_dsts : edge_srcs,
+                                                    store_transposed ? edge_srcs : edge_dsts);
+
+  auto edge_mask_view = this->edge_mask_view();
+
+  auto sorted_edge_first = thrust::make_transform_iterator(
+    edge_indices.begin(), cugraph::detail::indirection_t<size_t, decltype(edge_first)>{edge_first});
+  rmm::device_uvector<edge_t> ret(edge_srcs.size(), handle.get_stream());
+
+  for (size_t i = 0; i < this->number_of_local_edge_partitions(); ++i) {
+    auto edge_partition =
+      edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view(i));
+    auto edge_partition_e_mask =
+      edge_mask_view
+        ? thrust::make_optional<
+            detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
+            *edge_mask_view, i)
+        : thrust::nullopt;
+    thrust::transform(
+      handle.get_thrust_policy(),
+      sorted_edge_first + edge_partition_offsets[i],
+      sorted_edge_first + edge_partition_offsets[i + 1],
+      thrust::make_permutation_iterator(ret.begin(),
+                                        edge_indices.begin() + edge_partition_offsets[i]),
+      [edge_partition, edge_partition_e_mask] __device__(auto e) {
+        auto major     = thrust::get<0>(e);
+        auto minor     = thrust::get<1>(e);
+        auto major_idx = edge_partition.major_idx_from_major_nocheck(major);
+        if (major_idx) {
+          vertex_t const* indices{nullptr};
+          edge_t local_edge_offset{};
+          edge_t local_degree{};
+          thrust::tie(indices, local_edge_offset, local_degree) =
+            edge_partition.local_edges(*major_idx);
+          auto lower_it = thrust::lower_bound(thrust::seq, indices, indices + local_degree, minor);
+          auto upper_it = thrust::upper_bound(thrust::seq, indices, indices + local_degree, minor);
+          auto multiplicity = static_cast<edge_t>(thrust::distance(lower_it, upper_it));
+          if (edge_partition_e_mask && (multiplicity > 0)) {
+            multiplicity = static_cast<edge_t>(detail::count_set_bits(
+              (*edge_partition_e_mask).value_first(),
+              static_cast<size_t>(local_edge_offset + thrust::distance(indices, lower_it)),
+              static_cast<size_t>(multiplicity)));
+          }
+          return multiplicity;
+        } else {
+          return edge_t{0};
+        }
+      });
+  }
+
+  return ret;
+}
+
+template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
+rmm::device_uvector<edge_t>
+graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if_t<!multi_gpu>>::
+  compute_multiplicity(raft::handle_t const& handle,
+                       raft::device_span<vertex_t const> edge_srcs,
+                       raft::device_span<vertex_t const> edge_dsts,
+                       bool do_expensive_check)
+{
+  CUGRAPH_EXPECTS(this->is_multigraph(), "Use has_edge() instead for non-multigraphs.");
+  CUGRAPH_EXPECTS(
+    edge_srcs.size() == edge_dsts.size(),
+    "Invalid input arguments: edge_srcs.size() does not coincide with edge_dsts.size().");
+
+  auto edge_first =
+    thrust::make_zip_iterator(store_transposed ? edge_dsts.begin() : edge_srcs.begin(),
+                              store_transposed ? edge_srcs.begin() : edge_dsts.begin());
+
+  if (do_expensive_check) {
+    auto num_invalids =
+      detail::count_invalid_vertex_pairs(handle, *this, edge_first, edge_first + edge_srcs.size());
+    CUGRAPH_EXPECTS(num_invalids == 0,
+                    "Invalid input argument: there are invalid edge (src, dst) pairs.");
+  }
+
+  auto edge_mask_view = this->edge_mask_view();
+
+  rmm::device_uvector<edge_t> ret(edge_srcs.size(), handle.get_stream());
+
+  auto edge_partition =
+    edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(this->local_edge_partition_view());
+  auto edge_partition_e_mask =
+    edge_mask_view
+      ? thrust::make_optional<
+          detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
+          *edge_mask_view, 0)
+      : thrust::nullopt;
+  thrust::transform(
+    handle.get_thrust_policy(),
+    edge_first,
+    edge_first + edge_srcs.size(),
+    ret.begin(),
+    [edge_partition, edge_partition_e_mask] __device__(auto e) {
+      auto major        = thrust::get<0>(e);
+      auto minor        = thrust::get<1>(e);
+      auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
+      vertex_t const* indices{nullptr};
+      edge_t local_edge_offset{};
+      edge_t local_degree{};
+      thrust::tie(indices, local_edge_offset, local_degree) =
+        edge_partition.local_edges(major_offset);
+      auto lower_it     = thrust::lower_bound(thrust::seq, indices, indices + local_degree, minor);
+      auto upper_it     = thrust::upper_bound(thrust::seq, indices, indices + local_degree, minor);
+      auto multiplicity = static_cast<edge_t>(thrust::distance(lower_it, upper_it));
+      if (edge_partition_e_mask && (multiplicity > 0)) {
+        multiplicity = static_cast<edge_t>(detail::count_set_bits(
+          (*edge_partition_e_mask).value_first(),
+          static_cast<size_t>(local_edge_offset + thrust::distance(indices, lower_it)),
+          static_cast<size_t>(multiplicity)));
+      }
+      return multiplicity;
+    });
+
+  return ret;
+}
+
 }  // namespace cugraph
diff --git a/cpp/src/utilities/error_check_utils.cuh b/cpp/src/utilities/error_check_utils.cuh
new file mode 100644
index 00000000000..baaf513d93d
--- /dev/null
+++ b/cpp/src/utilities/error_check_utils.cuh
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cugraph/graph.hpp>
+#include <cugraph/graph_view.hpp>
+#include <cugraph/utilities/host_scalar_comm.hpp>
+
+#include <raft/core/device_span.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/util/cudart_utils.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/binary_search.h>
+#include <thrust/count.h>
+
+#include <vector>
+
+namespace cugraph {
+namespace detail {
+
+// check vertices in the pair are in [0, num_vertices) and belongs to one of the local edge
+// partitions.
+template <typename vertex_t>
+struct is_invalid_input_vertex_pair_t {
+  vertex_t num_vertices{};
+  raft::device_span<vertex_t const> edge_partition_major_range_firsts{};
+  raft::device_span<vertex_t const> edge_partition_major_range_lasts{};
+  vertex_t edge_partition_minor_range_first{};
+  vertex_t edge_partition_minor_range_last{};
+
+  __device__ bool operator()(thrust::tuple<vertex_t, vertex_t> pair) const
+  {
+    auto major = thrust::get<0>(pair);
+    auto minor = thrust::get<1>(pair);
+    if (!is_valid_vertex(num_vertices, major) || !is_valid_vertex(num_vertices, minor)) {
+      return true;
+    }
+    auto it = thrust::upper_bound(thrust::seq,
+                                  edge_partition_major_range_lasts.begin(),
+                                  edge_partition_major_range_lasts.end(),
+                                  major);
+    if (it == edge_partition_major_range_lasts.end()) { return true; }
+    auto edge_partition_idx =
+      static_cast<size_t>(thrust::distance(edge_partition_major_range_lasts.begin(), it));
+    if (major < edge_partition_major_range_firsts[edge_partition_idx]) { return true; }
+    return (minor < edge_partition_minor_range_first) || (minor >= edge_partition_minor_range_last);
+  }
+};
+
+template <typename GraphViewType, typename VertexPairIterator>
+size_t count_invalid_vertex_pairs(raft::handle_t const& handle,
+                                  GraphViewType const& graph_view,
+                                  VertexPairIterator vertex_pair_first,
+                                  VertexPairIterator vertex_pair_last)
+{
+  using vertex_t = typename GraphViewType::vertex_type;
+
+  std::vector<vertex_t> h_edge_partition_major_range_firsts(
+    graph_view.number_of_local_edge_partitions());
+  std::vector<vertex_t> h_edge_partition_major_range_lasts(
+    h_edge_partition_major_range_firsts.size());
+  vertex_t edge_partition_minor_range_first{};
+  vertex_t edge_partition_minor_range_last{};
+  if constexpr (GraphViewType::is_multi_gpu) {
+    for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); i++) {
+      if constexpr (GraphViewType::is_storage_transposed) {
+        h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_dst_range_first(i);
+        h_edge_partition_major_range_lasts[i]  = graph_view.local_edge_partition_dst_range_last(i);
+      } else {
+        h_edge_partition_major_range_firsts[i] = graph_view.local_edge_partition_src_range_first(i);
+        h_edge_partition_major_range_lasts[i]  = graph_view.local_edge_partition_src_range_last(i);
+      }
+    }
+    if constexpr (GraphViewType::is_storage_transposed) {
+      edge_partition_minor_range_first = graph_view.local_edge_partition_src_range_first();
+      edge_partition_minor_range_last  = graph_view.local_edge_partition_src_range_last();
+    } else {
+      edge_partition_minor_range_first = graph_view.local_edge_partition_dst_range_first();
+      edge_partition_minor_range_last  = graph_view.local_edge_partition_dst_range_last();
+    }
+  } else {
+    h_edge_partition_major_range_firsts[0] = vertex_t{0};
+    h_edge_partition_major_range_lasts[0]  = graph_view.number_of_vertices();
+    edge_partition_minor_range_first       = vertex_t{0};
+    edge_partition_minor_range_last        = graph_view.number_of_vertices();
+  }
+  rmm::device_uvector<vertex_t> d_edge_partition_major_range_firsts(
+    h_edge_partition_major_range_firsts.size(), handle.get_stream());
+  rmm::device_uvector<vertex_t> d_edge_partition_major_range_lasts(
+    h_edge_partition_major_range_lasts.size(), handle.get_stream());
+  raft::update_device(d_edge_partition_major_range_firsts.data(),
+                      h_edge_partition_major_range_firsts.data(),
+                      h_edge_partition_major_range_firsts.size(),
+                      handle.get_stream());
+  raft::update_device(d_edge_partition_major_range_lasts.data(),
+                      h_edge_partition_major_range_lasts.data(),
+                      h_edge_partition_major_range_lasts.size(),
+                      handle.get_stream());
+
+  auto num_invalid_pairs = thrust::count_if(
+    handle.get_thrust_policy(),
+    vertex_pair_first,
+    vertex_pair_last,
+    is_invalid_input_vertex_pair_t<vertex_t>{
+      graph_view.number_of_vertices(),
+      raft::device_span<vertex_t const>(d_edge_partition_major_range_firsts.begin(),
+                                        d_edge_partition_major_range_firsts.end()),
+      raft::device_span<vertex_t const>(d_edge_partition_major_range_lasts.begin(),
+                                        d_edge_partition_major_range_lasts.end()),
+      edge_partition_minor_range_first,
+      edge_partition_minor_range_last});
+  if constexpr (GraphViewType::is_multi_gpu) {
+    auto& comm = handle.get_comms();
+    num_invalid_pairs =
+      host_scalar_allreduce(comm, num_invalid_pairs, raft::comms::op_t::SUM, handle.get_stream());
+  }
+
+  return num_invalid_pairs;
+}
+
+}  // namespace detail
+}  // namespace cugraph
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d9d2f677abc..3df979fe5c2 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -313,6 +313,11 @@ ConfigureTest(DEGREE_TEST structure/degree_test.cpp)
 ConfigureTest(COUNT_SELF_LOOPS_AND_MULTI_EDGES_TEST
               "structure/count_self_loops_and_multi_edges_test.cpp")
 
+###################################################################################################
+# - Query edge existence and multiplicity tests ---------------------------------------------------
+ConfigureTest(HAS_EDGE_AND_COMPUTE_MULTIPLICITY_TEST
+              "structure/has_edge_and_compute_multiplicity_test.cpp")
+
 ###################################################################################################
 # - Coarsening tests ------------------------------------------------------------------------------
 ConfigureTest(COARSEN_GRAPH_TEST structure/coarsen_graph_test.cpp)
@@ -479,6 +484,11 @@ if(BUILD_CUGRAPH_MG_TESTS)
     ConfigureTestMG(MG_COUNT_SELF_LOOPS_AND_MULTI_EDGES_TEST
           "structure/mg_count_self_loops_and_multi_edges_test.cpp")
 
+    ###############################################################################################
+    # - MG Query edge existence and multiplicity tests --------------------------------------------
+    ConfigureTestMG(MG_HAS_EDGE_AND_COMPUTE_MULTIPLICITY_TEST
+          "structure/mg_has_edge_and_compute_multiplicity_test.cpp")
+
     ###############################################################################################
     # - MG PAGERANK tests -------------------------------------------------------------------------
     ConfigureTestMG(MG_PAGERANK_TEST link_analysis/mg_pagerank_test.cpp)
diff --git a/cpp/tests/structure/count_self_loops_and_multi_edges_test.cpp b/cpp/tests/structure/count_self_loops_and_multi_edges_test.cpp
index 68828d5eee1..b7f1dce2023 100644
--- a/cpp/tests/structure/count_self_loops_and_multi_edges_test.cpp
+++ b/cpp/tests/structure/count_self_loops_and_multi_edges_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -208,10 +208,7 @@ INSTANTIATE_TEST_SUITE_P(
   Tests_CountSelfLoopsAndMultiEdges_File,
   ::testing::Combine(
     // enable correctness checks
-    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{}),
+    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
                       cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
 
@@ -220,10 +217,7 @@ INSTANTIATE_TEST_SUITE_P(
   Tests_CountSelfLoopsAndMultiEdges_Rmat,
   ::testing::Combine(
     // enable correctness checks
-    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{},
-                      CountSelfLoopsAndMultiEdges_Usecase{}),
+    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{}),
     ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
 
 INSTANTIATE_TEST_SUITE_P(
@@ -235,10 +229,7 @@ INSTANTIATE_TEST_SUITE_P(
   Tests_CountSelfLoopsAndMultiEdges_Rmat,
   ::testing::Combine(
     // disable correctness checks for large graphs
-    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{false},
-                      CountSelfLoopsAndMultiEdges_Usecase{false},
-                      CountSelfLoopsAndMultiEdges_Usecase{false},
-                      CountSelfLoopsAndMultiEdges_Usecase{false}),
+    ::testing::Values(CountSelfLoopsAndMultiEdges_Usecase{false}),
     ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/structure/has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/has_edge_and_compute_multiplicity_test.cpp
new file mode 100644
index 00000000000..3ad6953ca03
--- /dev/null
+++ b/cpp/tests/structure/has_edge_and_compute_multiplicity_test.cpp
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governin_from_mtxg permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/test_graphs.hpp>
+#include <utilities/test_utilities.hpp>
+#include <utilities/thrust_wrapper.hpp>
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/graph.hpp>
+#include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_view.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <raft/core/handle.hpp>
+#include <raft/util/cudart_utils.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <random>
+#include <vector>
+
+struct HasEdgeAndComputeMultiplicity_Usecase {
+  size_t num_vertex_pairs{};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_HasEdgeAndComputeMultiplicity
+  : public ::testing::TestWithParam<
+      std::tuple<HasEdgeAndComputeMultiplicity_Usecase, input_usecase_t>> {
+ public:
+  Tests_HasEdgeAndComputeMultiplicity() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, bool store_transposed>
+  void run_current_test(
+    HasEdgeAndComputeMultiplicity_Usecase const& has_edge_and_compute_multiplicity_usecase,
+    input_usecase_t const& input_usecase)
+  {
+    using weight_t = float;
+
+    constexpr bool renumber = true;
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    cugraph::graph_t<vertex_t, edge_t, store_transposed, false> graph(handle);
+    std::optional<rmm::device_uvector<vertex_t>> d_renumber_map_labels{std::nullopt};
+    std::tie(graph, std::ignore, d_renumber_map_labels) =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, input_usecase, false, renumber);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto graph_view = graph.view();
+
+    raft::random::RngState rng_state(0);
+    rmm::device_uvector<vertex_t> edge_srcs(
+      has_edge_and_compute_multiplicity_usecase.num_vertex_pairs, handle.get_stream());
+    rmm::device_uvector<vertex_t> edge_dsts(edge_srcs.size(), handle.get_stream());
+    cugraph::detail::uniform_random_fill(handle.get_stream(),
+                                         edge_srcs.data(),
+                                         edge_srcs.size(),
+                                         vertex_t{0},
+                                         graph_view.number_of_vertices(),
+                                         rng_state);
+    cugraph::detail::uniform_random_fill(handle.get_stream(),
+                                         edge_dsts.data(),
+                                         edge_dsts.size(),
+                                         vertex_t{0},
+                                         graph_view.number_of_vertices(),
+                                         rng_state);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Querying edge existence");
+    }
+
+    auto edge_exists =
+      graph_view.has_edge(handle,
+                          raft::device_span<vertex_t const>(edge_srcs.data(), edge_srcs.size()),
+                          raft::device_span<vertex_t const>(edge_dsts.data(), edge_dsts.size()));
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Computing multiplicity");
+    }
+
+    auto edge_multiplicities = graph_view.compute_multiplicity(
+      handle,
+      raft::device_span<vertex_t const>(edge_srcs.data(), edge_srcs.size()),
+      raft::device_span<vertex_t const>(edge_dsts.data(), edge_dsts.size()));
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (has_edge_and_compute_multiplicity_usecase.check_correctness) {
+      cugraph::graph_t<vertex_t, edge_t, store_transposed, false> unrenumbered_graph(handle);
+      if (renumber) {
+        std::tie(unrenumbered_graph, std::ignore, std::ignore) =
+          cugraph::test::construct_graph<vertex_t, edge_t, weight_t, store_transposed, false>(
+            handle, input_usecase, false, false);
+      }
+      auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view;
+
+      std::vector<edge_t> h_offsets = cugraph::test::to_host(
+        handle, unrenumbered_graph_view.local_edge_partition_view().offsets());
+      std::vector<vertex_t> h_indices = cugraph::test::to_host(
+        handle, unrenumbered_graph_view.local_edge_partition_view().indices());
+
+      rmm::device_uvector<vertex_t> d_unrenumbered_edge_srcs(edge_srcs.size(), handle.get_stream());
+      rmm::device_uvector<vertex_t> d_unrenumbered_edge_dsts(edge_dsts.size(), handle.get_stream());
+      raft::copy_async(
+        d_unrenumbered_edge_srcs.data(), edge_srcs.data(), edge_srcs.size(), handle.get_stream());
+      raft::copy_async(
+        d_unrenumbered_edge_dsts.data(), edge_dsts.data(), edge_dsts.size(), handle.get_stream());
+      if (renumber) {
+        cugraph::unrenumber_local_int_vertices(handle,
+                                               d_unrenumbered_edge_srcs.data(),
+                                               d_unrenumbered_edge_srcs.size(),
+                                               (*d_renumber_map_labels).data(),
+                                               vertex_t{0},
+                                               graph_view.number_of_vertices());
+        cugraph::unrenumber_local_int_vertices(handle,
+                                               d_unrenumbered_edge_dsts.data(),
+                                               d_unrenumbered_edge_dsts.size(),
+                                               (*d_renumber_map_labels).data(),
+                                               vertex_t{0},
+                                               graph_view.number_of_vertices());
+      }
+      auto h_unrenumbered_edge_srcs = cugraph::test::to_host(handle, d_unrenumbered_edge_srcs);
+      auto h_unrenumbered_edge_dsts = cugraph::test::to_host(handle, d_unrenumbered_edge_dsts);
+
+      auto h_cugraph_edge_exists         = cugraph::test::to_host(handle, edge_exists);
+      auto h_cugraph_edge_multiplicities = cugraph::test::to_host(handle, edge_multiplicities);
+      std::vector<bool> h_reference_edge_exists(edge_srcs.size());
+      std::vector<edge_t> h_reference_edge_multiplicities(edge_srcs.size());
+      for (size_t i = 0; i < edge_srcs.size(); ++i) {
+        auto src      = h_unrenumbered_edge_srcs[i];
+        auto dst      = h_unrenumbered_edge_dsts[i];
+        auto major    = store_transposed ? dst : src;
+        auto minor    = store_transposed ? src : dst;
+        auto lower_it = std::lower_bound(
+          h_indices.begin() + h_offsets[major], h_indices.begin() + h_offsets[major + 1], minor);
+        auto upper_it = std::upper_bound(
+          h_indices.begin() + h_offsets[major], h_indices.begin() + h_offsets[major + 1], minor);
+        auto multiplicity                  = static_cast<edge_t>(std::distance(lower_it, upper_it));
+        h_reference_edge_exists[i]         = multiplicity > 0 ? true : false;
+        h_reference_edge_multiplicities[i] = multiplicity;
+      }
+
+      ASSERT_TRUE(std::equal(h_reference_edge_exists.begin(),
+                             h_reference_edge_exists.end(),
+                             h_cugraph_edge_exists.begin()))
+        << "has_edge() return values do not match with the reference values.";
+      ASSERT_TRUE(std::equal(h_reference_edge_multiplicities.begin(),
+                             h_reference_edge_multiplicities.end(),
+                             h_cugraph_edge_multiplicities.begin()))
+        << "compute_multiplicity() return values do not match with the reference values.";
+    }
+  }
+};
+
+using Tests_HasEdgeAndComputeMultiplicity_File =
+  Tests_HasEdgeAndComputeMultiplicity<cugraph::test::File_Usecase>;
+using Tests_HasEdgeAndComputeMultiplicity_Rmat =
+  Tests_HasEdgeAndComputeMultiplicity<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_File, CheckInt32Int32FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, false>(std::get<0>(param), std::get<1>(param));
+}
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int32FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int64FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int64_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_Rmat, CheckInt64Int64FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int64_t, int64_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_File, CheckInt32Int32FloatTransposeTrue)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, true>(std::get<0>(param), std::get<1>(param));
+}
+
+TEST_P(Tests_HasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int32FloatTransposeTrue)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, true>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_HasEdgeAndComputeMultiplicity_File,
+  ::testing::Combine(
+    // enable correctness checks
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 128}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_HasEdgeAndComputeMultiplicity_Rmat,
+  ::testing::Combine(
+    // enable correctness checks
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 128}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_HasEdgeAndComputeMultiplicity_Rmat,
+  ::testing::Combine(
+    // disable correctness checks for large graphs
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 1024 * 128, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp
new file mode 100644
index 00000000000..8079de7ebfe
--- /dev/null
+++ b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/device_comm_wrapper.hpp>
+#include <utilities/mg_utilities.hpp>
+#include <utilities/test_graphs.hpp>
+#include <utilities/test_utilities.hpp>
+#include <utilities/thrust_wrapper.hpp>
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/graph.hpp>
+#include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_view.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <raft/comms/mpi_comms.hpp>
+#include <raft/core/comms.hpp>
+#include <raft/core/handle.hpp>
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+struct HasEdgeAndComputeMultiplicity_Usecase {
+  size_t num_vertex_pairs{};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGHasEdgeAndComputeMultiplicity
+  : public ::testing::TestWithParam<
+      std::tuple<HasEdgeAndComputeMultiplicity_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGHasEdgeAndComputeMultiplicity() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  // Compare the results of running has_edge & compute_multiplicity on multiple GPUs to that of
+  // a single-GPU run
+  template <typename vertex_t, typename edge_t, bool store_transposed>
+  void run_current_test(
+    HasEdgeAndComputeMultiplicity_Usecase const& has_edge_and_compute_multiplicity_usecase,
+    input_usecase_t const& input_usecase)
+  {
+    using weight_t       = float;
+    using edge_type_id_t = int32_t;
+
+    HighResTimer hr_timer{};
+
+    auto const comm_rank = handle_->get_comms().get_rank();
+    auto const comm_size = handle_->get_comms().get_size();
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG Construct graph");
+    }
+
+    cugraph::graph_t<vertex_t, edge_t, store_transposed, true> mg_graph(*handle_);
+    std::optional<rmm::device_uvector<vertex_t>> mg_renumber_map{std::nullopt};
+    std::tie(mg_graph, std::ignore, mg_renumber_map) =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, store_transposed, true>(
+        *handle_, input_usecase, false, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto mg_graph_view = mg_graph.view();
+
+    // 2. create an edge list to query
+
+    raft::random::RngState rng_state(comm_rank);
+    size_t num_vertex_pairs_this_gpu =
+      (has_edge_and_compute_multiplicity_usecase.num_vertex_pairs / comm_size) +
+      ((comm_rank < has_edge_and_compute_multiplicity_usecase.num_vertex_pairs % comm_size)
+         ? size_t{1}
+         : size_t{0});
+    rmm::device_uvector<vertex_t> d_mg_edge_srcs(num_vertex_pairs_this_gpu, handle_->get_stream());
+    rmm::device_uvector<vertex_t> d_mg_edge_dsts(d_mg_edge_srcs.size(), handle_->get_stream());
+    cugraph::detail::uniform_random_fill(handle_->get_stream(),
+                                         d_mg_edge_srcs.data(),
+                                         d_mg_edge_srcs.size(),
+                                         vertex_t{0},
+                                         mg_graph_view.number_of_vertices(),
+                                         rng_state);
+    cugraph::detail::uniform_random_fill(handle_->get_stream(),
+                                         d_mg_edge_dsts.data(),
+                                         d_mg_edge_dsts.size(),
+                                         vertex_t{0},
+                                         mg_graph_view.number_of_vertices(),
+                                         rng_state);
+
+    std::tie(store_transposed ? d_mg_edge_dsts : d_mg_edge_srcs,
+             store_transposed ? d_mg_edge_srcs : d_mg_edge_dsts,
+             std::ignore,
+             std::ignore,
+             std::ignore) =
+      cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
+        vertex_t,
+        edge_t,
+        weight_t,
+        edge_type_id_t>(*handle_,
+                        std::move(store_transposed ? d_mg_edge_dsts : d_mg_edge_srcs),
+                        std::move(store_transposed ? d_mg_edge_srcs : d_mg_edge_dsts),
+                        std::nullopt,
+                        std::nullopt,
+                        std::nullopt,
+                        mg_graph_view.vertex_partition_range_lasts());
+
+    // 3. run MG has_edge & compute_multiplicity
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG Querying edge existence");
+    }
+
+    auto d_mg_edge_exists = mg_graph_view.has_edge(
+      *handle_,
+      raft::device_span<vertex_t const>(d_mg_edge_srcs.data(), d_mg_edge_srcs.size()),
+      raft::device_span<vertex_t const>(d_mg_edge_dsts.data(), d_mg_edge_dsts.size()));
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG Computing multiplicity");
+    }
+
+    auto d_mg_edge_multiplicities = mg_graph_view.compute_multiplicity(
+      *handle_,
+      raft::device_span<vertex_t const>(d_mg_edge_srcs.data(), d_mg_edge_srcs.size()),
+      raft::device_span<vertex_t const>(d_mg_edge_dsts.data(), d_mg_edge_dsts.size()));
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    // 4. copmare SG & MG results
+
+    if (has_edge_and_compute_multiplicity_usecase.check_correctness) {
+      // 4-1. aggregate MG results
+
+      cugraph::unrenumber_int_vertices<vertex_t, true>(
+        *handle_,
+        d_mg_edge_srcs.data(),
+        d_mg_edge_srcs.size(),
+        (*mg_renumber_map).data(),
+        mg_graph_view.vertex_partition_range_lasts());
+      cugraph::unrenumber_int_vertices<vertex_t, true>(
+        *handle_,
+        d_mg_edge_dsts.data(),
+        d_mg_edge_dsts.size(),
+        (*mg_renumber_map).data(),
+        mg_graph_view.vertex_partition_range_lasts());
+
+      auto d_mg_aggregate_edge_srcs = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>(d_mg_edge_srcs.data(), d_mg_edge_srcs.size()));
+      auto d_mg_aggregate_edge_dsts = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>(d_mg_edge_dsts.data(), d_mg_edge_dsts.size()));
+      auto d_mg_aggregate_edge_exists = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<bool const>(d_mg_edge_exists.data(), d_mg_edge_exists.size()));
+      auto d_mg_aggregate_edge_multiplicities = cugraph::test::device_gatherv(
+        *handle_,
+        raft::device_span<edge_t const>(d_mg_edge_multiplicities.data(),
+                                        d_mg_edge_multiplicities.size()));
+
+      cugraph::graph_t<vertex_t, edge_t, store_transposed, false> sg_graph(*handle_);
+      std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(
+        *handle_,
+        mg_graph_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>>{std::nullopt},
+        std::make_optional<raft::device_span<vertex_t const>>((*mg_renumber_map).data(),
+                                                              (*mg_renumber_map).size()),
+        false);
+
+      if (handle_->get_comms().get_rank() == 0) {
+        auto sg_graph_view = sg_graph.view();
+
+        // 4-2. run SG count_self_loops & count_multi_edges
+
+        auto d_sg_edge_exists = sg_graph_view.has_edge(
+          *handle_,
+          raft::device_span<vertex_t const>(d_mg_aggregate_edge_srcs.data(),
+                                            d_mg_aggregate_edge_srcs.size()),
+          raft::device_span<vertex_t const>(d_mg_aggregate_edge_dsts.data(),
+                                            d_mg_aggregate_edge_dsts.size()));
+        auto d_sg_edge_multiplicities = sg_graph_view.compute_multiplicity(
+          *handle_,
+          raft::device_span<vertex_t const>(d_mg_aggregate_edge_srcs.data(),
+                                            d_mg_aggregate_edge_srcs.size()),
+          raft::device_span<vertex_t const>(d_mg_aggregate_edge_dsts.data(),
+                                            d_mg_aggregate_edge_dsts.size()));
+
+        // 4-3. compare
+
+        auto h_mg_aggregate_edge_exists =
+          cugraph::test::to_host(*handle_, d_mg_aggregate_edge_exists);
+        auto h_mg_aggregate_edge_multiplicities =
+          cugraph::test::to_host(*handle_, d_mg_aggregate_edge_multiplicities);
+        auto h_sg_edge_exists         = cugraph::test::to_host(*handle_, d_sg_edge_exists);
+        auto h_sg_edge_multiplicities = cugraph::test::to_host(*handle_, d_sg_edge_multiplicities);
+
+        ASSERT_TRUE(std::equal(h_mg_aggregate_edge_exists.begin(),
+                               h_mg_aggregate_edge_exists.end(),
+                               h_sg_edge_exists.begin()));
+        ASSERT_TRUE(std::equal(h_mg_aggregate_edge_multiplicities.begin(),
+                               h_mg_aggregate_edge_multiplicities.end(),
+                               h_sg_edge_multiplicities.begin()));
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t> Tests_MGHasEdgeAndComputeMultiplicity<input_usecase_t>::handle_ =
+  nullptr;
+
+using Tests_MGHasEdgeAndComputeMultiplicity_File =
+  Tests_MGHasEdgeAndComputeMultiplicity<cugraph::test::File_Usecase>;
+using Tests_MGHasEdgeAndComputeMultiplicity_Rmat =
+  Tests_MGHasEdgeAndComputeMultiplicity<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_File, CheckInt32Int32FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, false>(std::get<0>(param), std::get<1>(param));
+}
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int32FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int64FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int64_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_Rmat, CheckInt64Int64FloatTransposeFalse)
+{
+  auto param = GetParam();
+  run_current_test<int64_t, int64_t, false>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_File, CheckInt32Int32FloatTransposeTrue)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, true>(std::get<0>(param), std::get<1>(param));
+}
+
+TEST_P(Tests_MGHasEdgeAndComputeMultiplicity_Rmat, CheckInt32Int32FloatTransposeTrue)
+{
+  auto param = GetParam();
+  run_current_test<int32_t, int32_t, true>(
+    std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_tests,
+  Tests_MGHasEdgeAndComputeMultiplicity_File,
+  ::testing::Combine(
+    // enable correctness checks
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 128}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_tests,
+  Tests_MGHasEdgeAndComputeMultiplicity_Rmat,
+  ::testing::Combine(
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 128}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGHasEdgeAndComputeMultiplicity_Rmat,
+  ::testing::Combine(
+    ::testing::Values(HasEdgeAndComputeMultiplicity_Usecase{1024 * 1024 * 128, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/utilities/device_comm_wrapper.cu b/cpp/tests/utilities/device_comm_wrapper.cu
index cfc65b5d741..50727394ad7 100644
--- a/cpp/tests/utilities/device_comm_wrapper.cu
+++ b/cpp/tests/utilities/device_comm_wrapper.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,9 +40,10 @@ rmm::device_uvector<T> device_gatherv(raft::handle_t const& handle,
   rmm::device_uvector<T> gathered_v(
     is_root ? std::reduce(rx_sizes.begin(), rx_sizes.end()) : size_t{0}, handle.get_stream());
 
+  using comm_datatype_t = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>;
   cugraph::device_gatherv(handle.get_comms(),
-                          d_input.data(),
-                          gathered_v.data(),
+                          reinterpret_cast<comm_datatype_t const*>(d_input.data()),
+                          reinterpret_cast<comm_datatype_t*>(gathered_v.data()),
                           d_input.size(),
                           rx_sizes,
                           rx_displs,
@@ -64,9 +65,10 @@ rmm::device_uvector<T> device_allgatherv(raft::handle_t const& handle,
   rmm::device_uvector<T> gathered_v(std::reduce(rx_sizes.begin(), rx_sizes.end()),
                                     handle.get_stream());
 
+  using comm_datatype_t = std::conditional_t<std::is_same_v<T, bool>, uint8_t, T>;
   cugraph::device_allgatherv(handle.get_comms(),
-                             d_input.data(),
-                             gathered_v.data(),
+                             reinterpret_cast<comm_datatype_t const*>(d_input.data()),
+                             reinterpret_cast<comm_datatype_t*>(gathered_v.data()),
                              rx_sizes,
                              rx_displs,
                              handle.get_stream());
@@ -76,6 +78,9 @@ rmm::device_uvector<T> device_allgatherv(raft::handle_t const& handle,
 
 // explicit instantiation
 
+template rmm::device_uvector<bool> device_gatherv(raft::handle_t const& handle,
+                                                  raft::device_span<bool const> d_input);
+
 template rmm::device_uvector<int32_t> device_gatherv(raft::handle_t const& handle,
                                                      raft::device_span<int32_t const> d_input);
 
@@ -91,6 +96,9 @@ template rmm::device_uvector<float> device_gatherv(raft::handle_t const& handle,
 template rmm::device_uvector<double> device_gatherv(raft::handle_t const& handle,
                                                     raft::device_span<double const> d_input);
 
+template rmm::device_uvector<bool> device_allgatherv(raft::handle_t const& handle,
+                                                     raft::device_span<bool const> d_input);
+
 template rmm::device_uvector<int32_t> device_allgatherv(raft::handle_t const& handle,
                                                         raft::device_span<int32_t const> d_input);
 
diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp
index 321a0536e02..3fa6ae089d3 100644
--- a/cpp/tests/utilities/test_utilities.hpp
+++ b/cpp/tests/utilities/test_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -377,18 +377,24 @@ template <typename T>
 std::vector<T> to_host(raft::handle_t const& handle, raft::device_span<T const> data)
 {
   std::vector<T> h_data(data.size());
-  raft::update_host(h_data.data(), data.data(), data.size(), handle.get_stream());
-  handle.sync_stream();
+  if constexpr (std::is_same_v<T, bool>) {  // std::vector<bool> stores values in a packed format
+    auto h_tmp = new bool[data.size()];
+    raft::update_host(h_tmp, data.data(), data.size(), handle.get_stream());
+    handle.sync_stream();
+    std::transform(
+      h_tmp, h_tmp + data.size(), h_data.begin(), [](uint8_t v) { return static_cast<bool>(v); });
+    delete[] h_tmp;
+  } else {
+    raft::update_host(h_data.data(), data.data(), data.size(), handle.get_stream());
+    handle.sync_stream();
+  }
   return h_data;
 }
 
 template <typename T>
 std::vector<T> to_host(raft::handle_t const& handle, rmm::device_uvector<T> const& data)
 {
-  std::vector<T> h_data(data.size());
-  raft::update_host(h_data.data(), data.data(), data.size(), handle.get_stream());
-  handle.sync_stream();
-  return h_data;
+  return to_host(handle, raft::device_span<T const>(data.data(), data.size()));
 }
 
 template <typename T>
@@ -396,11 +402,7 @@ std::optional<std::vector<T>> to_host(raft::handle_t const& handle,
                                       std::optional<raft::device_span<T const>> data)
 {
   std::optional<std::vector<T>> h_data{std::nullopt};
-  if (data) {
-    h_data = std::vector<T>((*data).size());
-    raft::update_host((*h_data).data(), (*data).data(), (*data).size(), handle.get_stream());
-    handle.sync_stream();
-  }
+  if (data) { h_data = to_host(handle, *data); }
   return h_data;
 }
 
@@ -410,9 +412,7 @@ std::optional<std::vector<T>> to_host(raft::handle_t const& handle,
 {
   std::optional<std::vector<T>> h_data{std::nullopt};
   if (data) {
-    h_data = std::vector<T>((*data).size());
-    raft::update_host((*h_data).data(), (*data).data(), (*data).size(), handle.get_stream());
-    handle.sync_stream();
+    h_data = to_host(handle, raft::device_span<T const>((*data).data(), (*data).size()));
   }
   return h_data;
 }
@@ -430,8 +430,16 @@ template <typename T>
 rmm::device_uvector<T> to_device(raft::handle_t const& handle, std::vector<T> const& data)
 {
   rmm::device_uvector<T> d_data(data.size(), handle.get_stream());
-  raft::update_device(d_data.data(), data.data(), data.size(), handle.get_stream());
-  handle.sync_stream();
+  if constexpr (std::is_same_v<T, bool>) {  // std::vector<bool> stores values in a packed format
+    auto h_tmp = new bool[data.size()];
+    std::copy(data.begin(), data.end(), h_tmp);
+    raft::update_device(d_data.data(), h_tmp, h_tmp + data.size(), handle.get_stream());
+    handle.sync_stream();
+    delete[] h_tmp;
+  } else {
+    raft::update_device(d_data.data(), data.data(), data.size(), handle.get_stream());
+    handle.sync_stream();
+  }
   return d_data;
 }
 
@@ -453,11 +461,7 @@ std::optional<rmm::device_uvector<T>> to_device(raft::handle_t const& handle,
                                                 std::optional<std::vector<T>> const& data)
 {
   std::optional<rmm::device_uvector<T>> d_data{std::nullopt};
-  if (data) {
-    d_data = rmm::device_uvector<T>(data->size(), handle.get_stream());
-    raft::update_host(d_data->data(), data->data(), data->size(), handle.get_stream());
-    handle.sync_stream();
-  }
+  if (data) { d_data = to_device(handle, *data); }
   return d_data;
 }