From e4eb22bf21a189c937736cd74642199410e8d788 Mon Sep 17 00:00:00 2001
From: Joseph Nke <76006812+jnke2016@users.noreply.github.com>
Date: Thu, 27 Jul 2023 14:43:59 +0100
Subject: [PATCH] Refactor edge betweenness centrality (#3672)

This PR refactor edge betweenness centrality by enabling it to follow the PLC path

closes #3147

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)

URL: https://github.com/rapidsai/cugraph/pull/3672
---
 cpp/src/c_api/centrality_result.cpp           |  10 +
 .../betweenness_centrality_impl.cuh           |  29 ++
 .../betweenness_centrality_reference.hpp      |  38 ++-
 .../edge_betweenness_centrality_test.cpp      |  19 +-
 python/cugraph/CMakeLists.txt                 |   1 -
 .../cugraph/cugraph/centrality/CMakeLists.txt |  25 --
 .../centrality/betweenness_centrality.py      | 138 ++++-----
 .../edge_betweenness_centrality.pxd           |  31 --
 .../edge_betweenness_centrality_wrapper.pyx   | 224 --------------
 python/cugraph/cugraph/dask/__init__.py       |   1 +
 .../cugraph/dask/centrality/__init__.py       |  17 ++
 .../dask/centrality/betweenness_centrality.py | 285 +++++++++++++++---
 .../simpleDistributedGraph.py                 |  14 +-
 .../cugraph/cugraph/structure/symmetrize.py   |   3 +
 ...st_batch_edge_betweenness_centrality_mg.py |  16 +-
 .../centrality/test_betweenness_centrality.py |   2 +-
 .../test_edge_betweenness_centrality.py       |  56 ++--
 .../test_edge_betweenness_centrality_mg.py    | 231 ++++++++++++++
 .../cugraph/tests/structure/test_graph.py     |   7 +-
 .../pylibcugraph/pylibcugraph/CMakeLists.txt  |   1 +
 python/pylibcugraph/pylibcugraph/__init__.py  |   2 +
 .../_cugraph_c/centrality_algorithms.pxd      |  42 +++
 .../edge_betweenness_centrality.pyx           | 197 ++++++++++++
 python/pylibcugraph/pylibcugraph/graphs.pxd   |   4 +-
 python/pylibcugraph/pylibcugraph/graphs.pyx   |  20 +-
 .../tests/test_edge_betweenness_centrality.py | 145 +++++++++
 26 files changed, 1101 insertions(+), 457 deletions(-)
 delete mode 100644 python/cugraph/cugraph/centrality/CMakeLists.txt
 delete mode 100644 python/cugraph/cugraph/centrality/edge_betweenness_centrality.pxd
 delete mode 100644 python/cugraph/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
 create mode 100644 python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
 create mode 100644 python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx
 create mode 100644 python/pylibcugraph/pylibcugraph/tests/test_edge_betweenness_centrality.py

diff --git a/cpp/src/c_api/centrality_result.cpp b/cpp/src/c_api/centrality_result.cpp
index 08e7c0341f2..75f10fcbbdb 100644
--- a/cpp/src/c_api/centrality_result.cpp
+++ b/cpp/src/c_api/centrality_result.cpp
@@ -81,6 +81,15 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_resu
     internal_pointer->values_->view());
 }
 
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_edge_ids(
+  cugraph_edge_centrality_result_t* result)
+{
+  auto internal_pointer =
+    reinterpret_cast<cugraph::c_api::cugraph_edge_centrality_result_t*>(result);
+  return reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
+    internal_pointer->edge_ids_->view());
+}
+
 extern "C" void cugraph_edge_centrality_result_free(cugraph_edge_centrality_result_t* result)
 {
   auto internal_pointer =
@@ -88,5 +97,6 @@ extern "C" void cugraph_edge_centrality_result_free(cugraph_edge_centrality_resu
   delete internal_pointer->src_ids_;
   delete internal_pointer->dst_ids_;
   delete internal_pointer->values_;
+  delete internal_pointer->edge_ids_;
   delete internal_pointer;
 }
diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh
index 0a87531d6ca..e496344583c 100644
--- a/cpp/src/centrality/betweenness_centrality_impl.cuh
+++ b/cpp/src/centrality/betweenness_centrality_impl.cuh
@@ -647,6 +647,35 @@ edge_betweenness_centrality(
                             do_expensive_check);
   }
 
+  std::optional<weight_t> scale_factor{std::nullopt};
+
+  if (normalized) {
+    weight_t n   = static_cast<weight_t>(graph_view.number_of_vertices());
+    scale_factor = n * (n - 1);
+  } else if (graph_view.is_symmetric())
+    scale_factor = weight_t{2};
+
+  if (scale_factor) {
+    if (graph_view.number_of_vertices() > 1) {
+      if (static_cast<vertex_t>(num_sources) < graph_view.number_of_vertices()) {
+        (*scale_factor) *= static_cast<weight_t>(num_sources) /
+                           static_cast<weight_t>(graph_view.number_of_vertices());
+      }
+
+      auto firsts         = centralities.view().value_firsts();
+      auto counts         = centralities.view().edge_counts();
+      auto mutable_firsts = centralities.mutable_view().value_firsts();
+      for (size_t k = 0; k < counts.size(); k++) {
+        thrust::transform(
+          handle.get_thrust_policy(),
+          firsts[k],
+          firsts[k] + counts[k],
+          mutable_firsts[k],
+          [sf = *scale_factor] __device__(auto centrality) { return centrality / sf; });
+      }
+    }
+  }
+
   return centralities;
 }
 
diff --git a/cpp/tests/centrality/betweenness_centrality_reference.hpp b/cpp/tests/centrality/betweenness_centrality_reference.hpp
index 3c60020265a..0f1a4d6adf3 100644
--- a/cpp/tests/centrality/betweenness_centrality_reference.hpp
+++ b/cpp/tests/centrality/betweenness_centrality_reference.hpp
@@ -166,6 +166,37 @@ void reference_rescale(result_t* result,
   }
 }
 
+template <typename result_t>
+void reference_edge_rescale(result_t* result,
+                            bool directed,
+                            bool normalize,
+                            size_t const number_of_vertices,
+                            size_t const number_of_edges,
+                            size_t const number_of_sources)
+{
+  result_t rescale_factor            = static_cast<result_t>(1);
+  result_t casted_number_of_vertices = static_cast<result_t>(number_of_vertices);
+  result_t casted_number_of_sources  = static_cast<result_t>(number_of_sources);
+
+  if (normalize) {
+    if (number_of_edges > 2) {
+      rescale_factor /= ((casted_number_of_vertices) * (casted_number_of_vertices - 1));
+    }
+  } else {
+    if (!directed) { rescale_factor /= static_cast<result_t>(2); }
+  }
+
+  if (rescale_factor != result_t{1}) {
+    if (number_of_sources > 0) {
+      rescale_factor *= (casted_number_of_vertices / casted_number_of_sources);
+    }
+
+    for (auto idx = 0; idx < number_of_edges; ++idx) {
+      result[idx] *= rescale_factor;
+    }
+  }
+}
+
 template <typename vertex_t, typename edge_t, typename weight_t>
 std::vector<weight_t> betweenness_centrality_reference(
   std::vector<edge_t> const& offsets,
@@ -213,7 +244,9 @@ std::vector<weight_t> edge_betweenness_centrality_reference(
   std::vector<edge_t> const& offsets,
   std::vector<vertex_t> const& indices,
   std::optional<std::vector<weight_t>> const& wgt,
-  std::vector<vertex_t> const& seeds)
+  std::vector<vertex_t> const& seeds,
+  bool directed,
+  bool normalize)
 {
   std::vector<weight_t> result;
   if (indices.size() > 0) {
@@ -234,6 +267,9 @@ std::vector<weight_t> edge_betweenness_centrality_reference(
       ref_edge_accumulation(result, offsets, indices, S, pred, sigmas, deltas, s);
     }
   }
+
+  reference_edge_rescale(
+    result.data(), directed, normalize, offsets.size() - 1, indices.size(), seeds.size());
   return result;
 }
 }  // namespace
diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
index e4d22ff069c..be153bb41f8 100644
--- a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
+++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
@@ -122,7 +122,12 @@ class Tests_EdgeBetweennessCentrality
       auto h_seeds = cugraph::test::to_host(handle, d_seeds);
 
       auto h_reference_centralities =
-        edge_betweenness_centrality_reference(h_offsets, h_indices, h_wgt, h_seeds);
+        edge_betweenness_centrality_reference(h_offsets,
+                                              h_indices,
+                                              h_wgt,
+                                              h_seeds,
+                                              !graph_view.is_symmetric(),
+                                              betweenness_usecase.normalized);
 
       rmm::device_uvector<vertex_t> d_reference_src_vertex_ids(0, handle.get_stream());
       rmm::device_uvector<vertex_t> d_reference_dst_vertex_ids(0, handle.get_stream());
@@ -183,7 +188,9 @@ INSTANTIATE_TEST_SUITE_P(
   ::testing::Combine(
     // enable correctness checks
     ::testing::Values(EdgeBetweennessCentrality_Usecase{20, false, false, true},
-                      EdgeBetweennessCentrality_Usecase{20, false, true, true}),
+                      EdgeBetweennessCentrality_Usecase{20, false, true, true},
+                      EdgeBetweennessCentrality_Usecase{20, true, false, true},
+                      EdgeBetweennessCentrality_Usecase{20, true, true, true}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
                       cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
                       cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
@@ -194,7 +201,9 @@ INSTANTIATE_TEST_SUITE_P(
   // enable correctness checks
   ::testing::Combine(
     ::testing::Values(EdgeBetweennessCentrality_Usecase{50, false, false, true},
-                      EdgeBetweennessCentrality_Usecase{50, false, true, true}),
+                      EdgeBetweennessCentrality_Usecase{50, false, true, true},
+                      EdgeBetweennessCentrality_Usecase{50, true, false, true},
+                      EdgeBetweennessCentrality_Usecase{50, true, true, true}),
     ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
 
 INSTANTIATE_TEST_SUITE_P(
@@ -207,7 +216,9 @@ INSTANTIATE_TEST_SUITE_P(
   // disable correctness checks for large graphs
   ::testing::Combine(
     ::testing::Values(EdgeBetweennessCentrality_Usecase{500, false, false, false},
-                      EdgeBetweennessCentrality_Usecase{500, false, true, false}),
+                      EdgeBetweennessCentrality_Usecase{500, false, true, false},
+                      EdgeBetweennessCentrality_Usecase{500, true, false, false},
+                      EdgeBetweennessCentrality_Usecase{500, true, true, false}),
     ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt
index 48815792553..9094a73fdeb 100644
--- a/python/cugraph/CMakeLists.txt
+++ b/python/cugraph/CMakeLists.txt
@@ -82,7 +82,6 @@ endif()
 
 rapids_cython_init()
 
-add_subdirectory(cugraph/centrality)
 add_subdirectory(cugraph/community)
 add_subdirectory(cugraph/components)
 add_subdirectory(cugraph/dask/comms)
diff --git a/python/cugraph/cugraph/centrality/CMakeLists.txt b/python/cugraph/cugraph/centrality/CMakeLists.txt
deleted file mode 100644
index f5036cca0a8..00000000000
--- a/python/cugraph/cugraph/centrality/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources
-    edge_betweenness_centrality_wrapper.pyx
-)
-set(linked_libraries cugraph::cugraph)
-
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX centrality_
-  ASSOCIATED_TARGETS cugraph
-)
diff --git a/python/cugraph/cugraph/centrality/betweenness_centrality.py b/python/cugraph/cugraph/centrality/betweenness_centrality.py
index 56fa7fea9a7..63af410e06c 100644
--- a/python/cugraph/cugraph/centrality/betweenness_centrality.py
+++ b/python/cugraph/cugraph/centrality/betweenness_centrality.py
@@ -13,9 +13,9 @@
 
 from pylibcugraph import (
     betweenness_centrality as pylibcugraph_betweenness_centrality,
+    edge_betweenness_centrality as pylibcugraph_edge_betweenness_centrality,
     ResourceHandle,
 )
-from cugraph.centrality import edge_betweenness_centrality_wrapper
 
 from cugraph.utilities import (
     df_edge_score_to_dictionary,
@@ -25,7 +25,6 @@
 import cudf
 import warnings
 import numpy as np
-import random
 from typing import Union
 
 
@@ -49,25 +48,24 @@ def betweenness_centrality(
     To improve performance. rather than doing an all-pair shortest path,
     a sample of k starting vertices can be used.
 
-    CuGraph does not currently support the 'endpoints' and 'weight' parameters
-    as seen in the corresponding networkX call.
+    CuGraph does not currently support 'weight' parameters.
 
     Parameters
     ----------
     G : cuGraph.Graph or networkx.Graph
         The graph can be either directed (Graph(directed=True)) or undirected.
-        Weights in the graph are ignored, the current implementation uses a parallel
-        variation of the Brandes Algorithm (2001) to compute exact or approximate
-        betweenness. If weights are provided in the edgelist, they will not be
-        used.
+        The current implementation uses a parallel variation of the Brandes
+        Algorithm (2001) to compute exact or approximate betweenness.
+        If weights are provided in the edgelist, they will not be used.
 
     k : int, list or cudf object or None, optional (default=None)
-        If k is not None, use k node samples to estimate betweenness.  Higher
-        values give better approximation.  If k is either a list or a cudf, use its
-        content for estimation: it contain vertex identifiers. If k is None
-        (the default), all the vertices are used to estimate betweenness.  Vertices
-        obtained through sampling or defined as a list will be used as sources for
-        traversals inside the algorithm.
+        If k is not None, use k node samples to estimate betweenness. Higher
+        values give better approximation.  If k is either a list, a cudf DataFrame,
+        or a dask_cudf DataFrame, then its contents are assumed to be vertex
+        identifiers to be used for estimation. If k is None (the default), all the
+        vertices are used to estimate betweenness. Vertices obtained through
+        sampling or defined as a list will be used as sources for traversals inside
+        the algorithm.
 
     normalized : bool, optional (default=True)
         If true, the betweenness values are normalized by
@@ -137,7 +135,6 @@ def betweenness_centrality(
 
     G, isNx = ensure_cugraph_obj_for_nx(G)
 
-    # FIXME: Should we raise an error if the graph created is weighted?
     if weight is not None:
         raise NotImplementedError(
             "weighted implementation of betweenness "
@@ -218,29 +215,28 @@ def edge_betweenness_centrality(
     To improve performance, rather than doing an all-pair shortest path,
     a sample of k starting vertices can be used.
 
-    CuGraph does not currently support the 'weight' parameter
-    as seen in the corresponding networkX call.
+    CuGraph does not currently support the 'weight' parameter.
 
     Parameters
     ----------
     G : cuGraph.Graph or networkx.Graph
         The graph can be either directed (Graph(directed=True)) or undirected.
-        Weights in the graph are ignored, the current implementation uses
-        BFS traversals. Use weight parameter if weights need to be considered
-        (currently not supported)
+        The current implementation uses BFS traversals. Use weight parameter
+        if weights need to be considered (currently not supported).
 
     k : int or list or None, optional (default=None)
-        If k is not None, use k node samples to estimate betweenness.  Higher
-        values give better approximation.
-        If k is a list, use the content of the list for estimation: the list
-        should contain vertices identifiers.
-        Vertices obtained through sampling or defined as a list will be used as
-        sources for traversals inside the algorithm.
+        If k is not None, use k node samples to estimate betweenness. Higher
+        values give better approximation.  If k is either a list, a cudf DataFrame,
+        or a dask_cudf DataFrame, then its contents are assumed to be vertex
+        identifiers to be used for estimation. If k is None (the default), all the
+        vertices are used to estimate betweenness. Vertices obtained through
+        sampling or defined as a list will be used as sources for traversals inside
+        the algorithm.
 
     normalized : bool, optional (default=True)
         If true, the betweenness values are normalized by
-        2 / (n * (n - 1)) for undirected Graphs, and
-        1 / (n * (n - 1)) for directed Graphs
+        __2 / (n * (n - 1))__ for undirected Graphs, and
+        __1 / (n * (n - 1))__ for directed Graphs
         where n is the number of nodes in G.
         Normalization will ensure that values are in [0, 1],
         this normalization scales for the highest possible value where one
@@ -278,13 +274,11 @@ def edge_betweenness_centrality(
         df['dst'] : cudf.Series
             Contains the vertex identifiers of the destination of each edge
 
-        df['edge_betweenness_centrality'] : cudf.Series
+        df['betweenness_centrality'] : cudf.Series
             Contains the betweenness centrality of edges
 
-        When using undirected graphs, 'src' and 'dst' only contains elements
-        such that 'src' < 'dst', which might differ from networkx and user's
-        input. Namely edge (1 -> 0) is transformed into (0 -> 1) but
-        contains the betweenness centrality of edge (1 -> 0).
+        df["edge_id"] : cudf.Series
+            Contains the edge ids of edges if present.
 
 
     Examples
@@ -303,16 +297,47 @@ def edge_betweenness_centrality(
         raise TypeError("result type can only be np.float32 or np.float64")
 
     G, isNx = ensure_cugraph_obj_for_nx(G)
-    vertices = _initialize_vertices(G, k, seed)
 
-    df = edge_betweenness_centrality_wrapper.edge_betweenness_centrality(
-        G, normalized, weight, vertices, result_dtype
+    if not isinstance(k, (cudf.DataFrame, cudf.Series)):
+        if isinstance(k, list):
+            vertex_dtype = G.edgelist.edgelist_df.dtypes[0]
+            k = cudf.Series(k, dtype=vertex_dtype)
+
+    if isinstance(k, (cudf.DataFrame, cudf.Series)):
+        if G.renumbered:
+            k = G.lookup_internal_vertex_id(k)
+
+    # FIXME: src, dst and edge_ids need to be of the same type which should not
+    # be the case
+
+    (
+        src_vertices,
+        dst_vertices,
+        values,
+        edge_ids,
+    ) = pylibcugraph_edge_betweenness_centrality(
+        resource_handle=ResourceHandle(),
+        graph=G._plc_graph,
+        k=k,
+        random_state=seed,
+        normalized=normalized,
+        do_expensive_check=False,
     )
 
+    df = cudf.DataFrame()
+    df["src"] = src_vertices
+    df["dst"] = dst_vertices
+    df["betweenness_centrality"] = values
+    if edge_ids is not None:
+        df["edge_id"] = edge_ids
+
     if G.renumbered:
         df = G.unrenumber(df, "src")
         df = G.unrenumber(df, "dst")
 
+    if df["betweenness_centrality"].dtype != result_dtype:
+        df["betweenness_centrality"] = df["betweenness_centrality"].astype(result_dtype)
+
     if G.is_directed() is False:
         # select the lower triangle of the df based on src/dst vertex value
         lower_triangle = df["src"] >= df["dst"]
@@ -332,44 +357,3 @@ def edge_betweenness_centrality(
         return df_edge_score_to_dictionary(df, "betweenness_centrality")
     else:
         return df
-
-
-# In order to compare with pre-set sources,
-# k can either be a list or an integer or None
-#  int: Generate an random sample with k elements
-# list: k become the length of the list and vertices become the content
-# None: All the vertices are considered
-def _initialize_vertices(G, k: Union[int, list], seed: int) -> np.ndarray:
-    vertices = None
-    numpy_vertices = None
-    if k is not None:
-        if isinstance(k, int):
-            vertices = _initialize_vertices_from_indices_sampling(G, k, seed)
-        elif isinstance(k, list):
-            vertices = _initialize_vertices_from_identifiers_list(G, k)
-        numpy_vertices = np.array(vertices, dtype=np.int32)
-    else:
-        numpy_vertices = np.arange(G.number_of_vertices(), dtype=np.int32)
-    return numpy_vertices
-
-
-# NOTE: We do not renumber in case k is an int, the sampling is
-#       not operating on the valid vertices identifiers but their
-#       indices:
-# Example:
-# - vertex '2' is missing
-# - vertices '0' '1' '3' '4' exist
-# - There is a vertex at index 2 (there is not guarantee that it is
-#   vertice '3' )
-def _initialize_vertices_from_indices_sampling(G, k: int, seed: int) -> list:
-    random.seed(seed)
-    vertices = random.sample(range(G.number_of_vertices()), k)
-    return vertices
-
-
-def _initialize_vertices_from_identifiers_list(G, identifiers: list) -> np.ndarray:
-    vertices = identifiers
-    if G.renumbered:
-        vertices = G.lookup_internal_vertex_id(cudf.Series(vertices)).to_numpy()
-
-    return vertices
diff --git a/python/cugraph/cugraph/centrality/edge_betweenness_centrality.pxd b/python/cugraph/cugraph/centrality/edge_betweenness_centrality.pxd
deleted file mode 100644
index 7ec20e35b83..00000000000
--- a/python/cugraph/cugraph/centrality/edge_betweenness_centrality.pxd
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-# cython: language_level = 3
-
-from cugraph.structure.graph_primtypes cimport *
-from libcpp cimport bool
-
-cdef extern from "cugraph/algorithms.hpp" namespace "cugraph":
-
-    cdef void edge_betweenness_centrality[VT, ET, WT, result_t](
-        const handle_t &handle,
-        const GraphCSRView[VT, ET, WT] &graph,
-        result_t *result,
-        bool normalized,
-        const WT *weight,
-        VT k,
-        const VT *vertices) except +
diff --git a/python/cugraph/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
deleted file mode 100644
index 8c64dcbf952..00000000000
--- a/python/cugraph/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-# cython: language_level = 3
-
-from cugraph.centrality.edge_betweenness_centrality cimport edge_betweenness_centrality as c_edge_betweenness_centrality
-from cugraph.structure import graph_primtypes_wrapper
-from cugraph.structure.graph_primtypes cimport *
-from libc.stdint cimport uintptr_t
-from libcpp cimport bool
-import cudf
-import numpy as np
-from cugraph.dask.common.mg_utils import get_client
-import cugraph.dask.comms.comms as Comms
-import dask.distributed
-
-
-def get_output_df(indices, result_dtype):
-    number_of_edges = len(indices)
-    df = cudf.DataFrame()
-    df['src'] = cudf.Series(np.zeros(number_of_edges, dtype=np.int32))
-    df['dst'] = indices.copy()
-    df['betweenness_centrality'] = cudf.Series(np.zeros(number_of_edges,
-                                               dtype=result_dtype))
-    return df
-
-
-def get_batch(sources, number_of_workers, current_worker):
-    batch_size = len(sources) // number_of_workers
-    begin =  current_worker * batch_size
-    end = (current_worker + 1) * batch_size
-    if current_worker == (number_of_workers - 1):
-        end = len(sources)
-    batch = sources[begin:end]
-    return batch
-
-
-def run_mg_work(input_data, normalized, weights, sources,
-             result_dtype, session_id):
-    result = None
-
-    number_of_workers = Comms.get_n_workers(session_id)
-    worker_idx = Comms.get_worker_id(session_id)
-    handle = Comms.get_handle(session_id)
-
-    batch = get_batch(sources, number_of_workers, worker_idx)
-
-    result = run_internal_work(handle, input_data, normalized, weights,
-                               batch, result_dtype)
-    return result
-
-
-def run_internal_work(handle, input_data, normalized, weights, batch,
-                      result_dtype):
-    cdef uintptr_t c_handle = <uintptr_t> NULL
-    cdef uintptr_t c_graph = <uintptr_t> NULL
-    cdef uintptr_t c_src_identifier = <uintptr_t> NULL
-    cdef uintptr_t c_dst_identifier = <uintptr_t> NULL
-    cdef uintptr_t c_weights = <uintptr_t> NULL
-    cdef uintptr_t c_betweenness = <uintptr_t> NULL
-    cdef uintptr_t c_batch = <uintptr_t> NULL
-
-    cdef uintptr_t c_offsets = <uintptr_t> NULL
-    cdef uintptr_t c_indices = <uintptr_t> NULL
-    cdef uintptr_t c_graph_weights = <uintptr_t> NULL
-
-    cdef GraphCSRViewDouble graph_double
-    cdef GraphCSRViewFloat graph_float
-
-    (offsets, indices, graph_weights), is_directed =  input_data
-
-    if graph_weights is not None:
-        c_graph_weights = graph_weights.__cuda_array_interface__['data'][0]
-    c_offsets = offsets.__cuda_array_interface__['data'][0]
-    c_indices = indices.__cuda_array_interface__['data'][0]
-
-    number_of_vertices = len(offsets) - 1
-    number_of_edges = len(indices)
-
-    result_df = get_output_df(indices, result_dtype)
-    c_src_identifier = result_df['src'].__cuda_array_interface__['data'][0]
-    c_dst_identifier = result_df['dst'].__cuda_array_interface__['data'][0]
-    c_betweenness = result_df['betweenness_centrality'].__cuda_array_interface__['data'][0]
-
-    number_of_sources_in_batch = len(batch)
-    if result_dtype == np.float64:
-        graph_double = GraphCSRView[int, int, double](<int*> c_offsets,
-                                                      <int*> c_indices,
-                                                      <double*> c_graph_weights,
-                                                      number_of_vertices,
-                                                      number_of_edges)
-        graph_double.prop.directed = is_directed
-        c_graph = <uintptr_t>&graph_double
-    elif result_dtype == np.float32:
-        graph_float = GraphCSRView[int, int, float](<int*>c_offsets,
-                                                    <int*>c_indices,
-                                                    <float*>c_graph_weights,
-                                                    number_of_vertices,
-                                                    number_of_edges)
-        graph_float.prop.directed = is_directed
-        c_graph = <uintptr_t>&graph_float
-    else:
-        raise ValueError("result_dtype can only be np.float64 or np.float32")
-
-    if weights is not None:
-        c_weights = weights.__cuda_array_interface__['data'][0]
-    c_batch = batch.__array_interface__['data'][0]
-    c_handle = <uintptr_t>handle.getHandle()
-
-    run_c_edge_betweenness_centrality(c_handle,
-                                      c_graph,
-                                      c_betweenness,
-                                      normalized,
-                                      c_weights,
-                                      number_of_sources_in_batch,
-                                      c_batch,
-                                      result_dtype)
-    return result_df
-
-
-cdef void run_c_edge_betweenness_centrality(uintptr_t c_handle,
-                                            uintptr_t c_graph,
-                                            uintptr_t c_betweenness,
-                                            bool normalized,
-                                            uintptr_t c_weights,
-                                            int number_of_sources_in_batch,
-                                            uintptr_t c_batch,
-                                            result_dtype):
-    if result_dtype == np.float64:
-        c_edge_betweenness_centrality[int, int, double, double]((<handle_t *> c_handle)[0],
-                                                                (<GraphCSRView[int, int, double] *> c_graph)[0],
-                                                                <double *> c_betweenness,
-                                                                normalized,
-                                                                <double *> c_weights,
-                                                                number_of_sources_in_batch,
-                                                                <int *> c_batch)
-    elif result_dtype == np.float32:
-        c_edge_betweenness_centrality[int, int, float, float]((<handle_t *> c_handle)[0],
-                                                              (<GraphCSRView[int, int, float] *> c_graph)[0],
-                                                              <float *> c_betweenness,
-                                                              normalized,
-                                                              <float *> c_weights,
-                                                              number_of_sources_in_batch,
-                                                              <int *> c_batch)
-    else:
-        raise ValueError("result_dtype can only be np.float64 or np.float32")
-
-def batch_edge_betweenness_centrality(input_graph,
-                                         normalized,
-                                         weights, vertices, result_dtype):
-    client = get_client()
-    comms = Comms.get_comms()
-    replicated_adjlists = input_graph.batch_adjlists
-    work_futures =  [client.submit(run_mg_work,
-                                   (data, input_graph.is_directed()),
-                                   normalized,
-                                   weights,
-                                   vertices,
-                                   result_dtype,
-                                   comms.sessionId,
-                                   workers=[worker]) for
-                    (worker, data) in replicated_adjlists.items()]
-    dask.distributed.wait(work_futures)
-    df = work_futures[0].result()
-    return df
-
-
-def sg_edge_betweenness_centrality(input_graph, normalized, weights,
-                                   vertices, result_dtype):
-    if not input_graph.adjlist:
-        input_graph.view_adj_list()
-
-    handle = Comms.get_default_handle()
-    adjlist = input_graph.adjlist
-    input_data = ((adjlist.offsets, adjlist.indices, adjlist.weights),
-                  input_graph.is_directed())
-    df = run_internal_work(handle, input_data, normalized, weights,
-                           vertices, result_dtype)
-    return df
-
-
-def edge_betweenness_centrality(input_graph, normalized, weights,
-                                vertices, result_dtype):
-    """
-    Call betweenness centrality
-    """
-    cdef GraphCSRViewDouble graph_double
-    cdef GraphCSRViewFloat graph_float
-
-
-    df = None
-
-    if not input_graph.adjlist:
-        input_graph.view_adj_list()
-
-    if Comms.is_initialized() and input_graph.batch_enabled == True:
-        df = batch_edge_betweenness_centrality(input_graph, normalized,
-                                                  weights, vertices,
-                                                  result_dtype)
-    else:
-        df = sg_edge_betweenness_centrality(input_graph, normalized,
-                                            weights, vertices, result_dtype)
-
-    if result_dtype == np.float64:
-        graph_double = get_graph_view[GraphCSRViewDouble](input_graph)
-        graph_double.get_source_indices(<int*>(<uintptr_t>df['src'].__cuda_array_interface__['data'][0]))
-    elif result_dtype == np.float32:
-        graph_float = get_graph_view[GraphCSRViewFloat](input_graph)
-        graph_float.get_source_indices(<int*>(<uintptr_t>df['src'].__cuda_array_interface__['data'][0]))
-
-    return df
diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py
index f639856f929..a6958aaaf49 100644
--- a/python/cugraph/cugraph/dask/__init__.py
+++ b/python/cugraph/cugraph/dask/__init__.py
@@ -28,6 +28,7 @@
 from .centrality.eigenvector_centrality import eigenvector_centrality
 from .cores.core_number import core_number
 from .centrality.betweenness_centrality import betweenness_centrality
+from .centrality.betweenness_centrality import edge_betweenness_centrality
 from .cores.k_core import k_core
 from .link_prediction.jaccard import jaccard
 from .link_prediction.sorensen import sorensen
diff --git a/python/cugraph/cugraph/dask/centrality/__init__.py b/python/cugraph/cugraph/dask/centrality/__init__.py
index e69de29bb2d..3cbf91040d4 100644
--- a/python/cugraph/cugraph/dask/centrality/__init__.py
+++ b/python/cugraph/cugraph/dask/centrality/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cugraph.centrality.betweenness_centrality import (
+    betweenness_centrality,
+    edge_betweenness_centrality,
+)
diff --git a/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py b/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py
index e048c91f34d..6aa708ea585 100644
--- a/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py
+++ b/python/cugraph/cugraph/dask/centrality/betweenness_centrality.py
@@ -17,6 +17,7 @@
 from pylibcugraph import (
     ResourceHandle,
     betweenness_centrality as pylibcugraph_betweenness_centrality,
+    edge_betweenness_centrality as pylibcugraph_edge_betweenness_centrality,
 )
 import cugraph.dask.comms.comms as Comms
 from cugraph.dask.common.input_utils import get_distributed_data
@@ -28,14 +29,23 @@
 from typing import Union
 
 
-def convert_to_cudf(cp_arrays: cp.ndarray) -> cudf.DataFrame:
+def convert_to_cudf(cp_arrays: cp.ndarray, edge_bc: bool) -> cudf.DataFrame:
     """
     create a cudf DataFrame from cupy arrays
     """
-    cupy_vertices, cupy_values = cp_arrays
     df = cudf.DataFrame()
-    df["vertex"] = cupy_vertices
-    df["betweenness_centrality"] = cupy_values
+    if edge_bc:
+        cupy_src_vertices, cupy_dst_vertices, cupy_values, cupy_edge_ids = cp_arrays
+        df["src"] = cupy_src_vertices
+        df["dst"] = cupy_dst_vertices
+        df["betweenness_centrality"] = cupy_values
+        if cupy_edge_ids is not None:
+            df["edge_id"] = cupy_edge_ids
+
+    else:
+        cupy_vertices, cupy_values = cp_arrays
+        df["vertex"] = cupy_vertices
+        df["betweenness_centrality"] = cupy_values
     return df
 
 
@@ -47,18 +57,29 @@ def _call_plc_betweenness_centrality(
     normalized: bool,
     endpoints: bool,
     do_expensive_check: bool,
+    edge_bc: bool,
 ) -> cudf.DataFrame:
 
-    cp_arrays = pylibcugraph_betweenness_centrality(
-        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
-        graph=mg_graph_x,
-        k=k,
-        random_state=random_state,
-        normalized=normalized,
-        include_endpoints=endpoints,
-        do_expensive_check=do_expensive_check,
-    )
-    return convert_to_cudf(cp_arrays)
+    if edge_bc:
+        cp_arrays = pylibcugraph_edge_betweenness_centrality(
+            resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+            graph=mg_graph_x,
+            k=k,
+            random_state=random_state,
+            normalized=normalized,
+            do_expensive_check=do_expensive_check,
+        )
+    else:
+        cp_arrays = pylibcugraph_betweenness_centrality(
+            resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+            graph=mg_graph_x,
+            k=k,
+            random_state=random_state,
+            normalized=normalized,
+            include_endpoints=endpoints,
+            do_expensive_check=do_expensive_check,
+        )
+    return convert_to_cudf(cp_arrays, edge_bc)
 
 
 def _mg_call_plc_betweenness_centrality(
@@ -68,8 +89,9 @@ def _mg_call_plc_betweenness_centrality(
     k: dict,
     random_state: int,
     normalized: bool,
-    endpoints: bool,
     do_expensive_check: bool,
+    endpoints: bool = False,
+    edge_bc: bool = False,
 ) -> dask_cudf.DataFrame:
 
     result = [
@@ -82,6 +104,7 @@ def _mg_call_plc_betweenness_centrality(
             normalized,
             endpoints,
             do_expensive_check,
+            edge_bc,
             workers=[w],
             allow_other_workers=False,
             pure=False,
@@ -89,6 +112,8 @@ def _mg_call_plc_betweenness_centrality(
         for i, w in enumerate(Comms.get_workers())
     ]
 
+    wait(result)
+
     ddf = dask_cudf.from_delayed(result, verify_meta=False).persist()
     wait(ddf)
     wait([r.release() for r in result])
@@ -101,6 +126,7 @@ def betweenness_centrality(
         int, list, cudf.Series, cudf.DataFrame, dask_cudf.Series, dask_cudf.DataFrame
     ] = None,
     normalized: bool = True,
+    weight: cudf.DataFrame = None,
     endpoints: bool = False,
     random_state: int = None,
 ) -> dask_cudf.DataFrame:
@@ -114,28 +140,39 @@ def betweenness_centrality(
     To improve performance. rather than doing an all-pair shortest path,
     a sample of k starting vertices can be used.
 
-    CuGraph does not currently support the 'endpoints' and 'weight' parameters
-    as seen in the corresponding networkX call.
+    CuGraph does not currently support 'weight' parameters.
 
     Parameters
     ----------
     input_graph: cuGraph.Graph
         The graph can be either directed (Graph(directed=True)) or undirected.
-        Weights in the graph are ignored, the current implementation uses a parallel
-        variation of the Brandes Algorithm (2001) to compute exact or approximate
-        betweenness. If weights are provided in the edgelist, they will not be
-        used.
+        The current implementation uses a parallel variation of the Brandes
+        Algorithm (2001) to compute exact or approximate betweenness.
+        If weights are provided in the edgelist, they will not be used.
 
     k : int, list or (dask)cudf object or None, optional (default=None)
-        If k is not None, use k node samples to estimate betweenness.  Higher
-        values give better approximation.  If k is either a list or a (dask)cudf,
-        use its content for estimation: it contain vertex identifiers. If k is None
-        (the default), all the vertices are used to estimate betweenness.  Vertices
-        obtained through sampling or defined as a list will be used as sources for
-        traversals inside the algorithm.
+        If k is not None, use k node samples to estimate betweenness. Higher
+        values give better approximation.  If k is either a list, a cudf DataFrame,
+        or a dask_cudf DataFrame, then its contents are assumed to be vertex
+        identifiers to be used for estimation. If k is None (the default), all the
+        vertices are used to estimate betweenness. Vertices obtained through
+        sampling or defined as a list will be used as sources for traversals inside
+        the algorithm.
 
     normalized : bool, optional (default=True)
-        If True normalize the resulting betweenness centrality values
+        If True, normalize the resulting betweenness centrality values by
+        __2 / ((n - 1) * (n - 2))__ for undirected Graphs, and
+        __1 / ((n - 1) * (n - 2))__ for directed Graphs
+        where n is the number of nodes in G.
+        Normalization will ensure that values are in [0, 1],
+        this normalization scales for the highest possible value where one
+        node is crossed by every single shortest path.
+
+    weight : (dask)cudf.DataFrame, optional (default=None)
+        Specifies the weights to be used for each edge.
+        Should contain a mapping between
+        edges and weights.
+        (Not Supported)
 
     endpoints : bool, optional (default=False)
         If true, include the endpoints in the shortest path counts.
@@ -184,6 +221,12 @@ def betweenness_centrality(
         )
         warnings.warn(warning_msg, UserWarning)
 
+    if weight is not None:
+        raise NotImplementedError(
+            "weighted implementation of betweenness "
+            "centrality not currently supported"
+        )
+
     if not isinstance(k, (dask_cudf.DataFrame, dask_cudf.Series)):
         if isinstance(k, (cudf.DataFrame, cudf.Series, list)):
             if isinstance(k, list):
@@ -216,17 +259,187 @@ def betweenness_centrality(
     client = get_client()
 
     ddf = _mg_call_plc_betweenness_centrality(
-        input_graph,
-        client,
-        Comms.get_session_id(),
-        k,
-        random_state,
-        normalized,
-        endpoints,
-        do_expensive_check,
+        input_graph=input_graph,
+        client=client,
+        sID=Comms.get_session_id(),
+        k=k,
+        random_state=random_state,
+        normalized=normalized,
+        endpoints=endpoints,
+        do_expensive_check=do_expensive_check,
     )
 
     if input_graph.renumbered:
         return input_graph.unrenumber(ddf, "vertex")
 
     return ddf
+
+
+def edge_betweenness_centrality(
+    input_graph,
+    k: Union[
+        int, list, cudf.Series, cudf.DataFrame, dask_cudf.Series, dask_cudf.DataFrame
+    ] = None,
+    normalized: bool = True,
+    weight: cudf.DataFrame = None,
+    random_state: int = None,
+) -> dask_cudf.DataFrame:
+    """
+    Compute the edge betweenness centrality for all edges of the graph G.
+    Betweenness centrality is a measure of the number of shortest paths
+    that pass over an edge.  An edge with a high betweenness centrality
+    score has more paths passing over it and is therefore believed to be
+    more important.
+
+    To improve performance. rather than doing an all-pair shortest path,
+    a sample of k starting vertices can be used.
+
+    CuGraph does not currently support the 'weight' parameter.
+
+    Parameters
+    ----------
+    input_graph: cuGraph.Graph
+        The graph can be either directed (Graph(directed=True)) or undirected.
+        The current implementation uses a parallel variation of the Brandes
+        Algorithm (2001) to compute exact or approximate betweenness.
+        If weights are provided in the edgelist, they will not be used.
+
+    k : int, list or (dask)cudf object or None, optional (default=None)
+        If k is not None, use k node samples to estimate betweenness. Higher
+        values give better approximation.  If k is either a list, a cudf DataFrame,
+        or a dask_cudf DataFrame, then its contents are assumed to be vertex
+        identifiers to be used for estimation. If k is None (the default), all the
+        vertices are used to estimate betweenness. Vertices obtained through
+        sampling or defined as a list will be used as sources for traversals inside
+        the algorithm.
+
+    normalized : bool, optional (default=True)
+        If True, normalize the resulting betweenness centrality values by
+        __2 / (n * (n - 1))__ for undirected Graphs, and
+        __1 / (n * (n - 1))__ for directed Graphs
+        where n is the number of nodes in G.
+        Normalization will ensure that values are in [0, 1],
+        this normalization scales for the highest possible value where one
+        edge is crossed by every single shortest path.
+
+    weight : (dask)cudf.DataFrame, optional (default=None)
+        Specifies the weights to be used for each edge.
+        Should contain a mapping between
+        edges and weights.
+        (Not Supported)
+
+    random_state : int, optional (default=None)
+        if k is specified and k is an integer, use random_state to initialize the
+        random number generator.
+        Using None defaults to a hash of process id, time, and hostname
+        If k is either None or list or cudf objects: random_state parameter is
+        ignored.
+
+    Returns
+    -------
+    betweenness_centrality : dask_cudf.DataFrame
+        GPU distributed data frame containing two dask_cudf.Series of size V:
+        the vertex identifiers and the corresponding betweenness centrality values.
+
+        ddf['src'] : dask_cudf.Series
+            Contains the vertex identifiers of the source of each edge
+
+        ddf['dst'] : dask_cudf.Series
+            Contains the vertex identifiers of the destination of each edge
+
+        ddf['betweenness_centrality'] : dask_cudf.Series
+            Contains the betweenness centrality of edges
+
+        ddf["edge_id"] : dask_cudf.Series
+            Contains the edge ids of edges if present.
+
+    Examples
+    --------
+    >>> import cugraph.dask as dcg
+    >>> import dask_cudf
+    >>> # ... Init a DASK Cluster
+    >>> #    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
+    >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
+    >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
+    >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
+    ...                          chunksize=chunksize, delimiter=" ",
+    ...                          names=["src", "dst", "value"],
+    ...                          dtype=["int32", "int32", "float32"])
+    >>> dg = cugraph.Graph(directed=True)
+    >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst')
+    >>> pr = dcg.edge_betweenness_centrality(dg)
+
+    """
+
+    if input_graph.store_transposed is True:
+        warning_msg = (
+            "Betweenness centrality expects the 'store_transposed' flag "
+            "to be set to 'False' for optimal performance during "
+            "the graph creation"
+        )
+        warnings.warn(warning_msg, UserWarning)
+
+    if weight is not None:
+        raise NotImplementedError(
+            "weighted implementation of edge betweenness "
+            "centrality not currently supported"
+        )
+
+    if not isinstance(k, (dask_cudf.DataFrame, dask_cudf.Series)):
+        if isinstance(k, (cudf.DataFrame, cudf.Series, list)):
+            if isinstance(k, list):
+                k_dtype = input_graph.nodes().dtype
+                k = cudf.Series(k, dtype=k_dtype)
+
+        if isinstance(k, (cudf.Series, cudf.DataFrame)):
+            splits = cp.array_split(cp.arange(len(k)), len(Comms.get_workers()))
+            k = {w: [k.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())}
+
+    else:
+        if k is not None:
+            k = get_distributed_data(k)
+            wait(k)
+            k = k.worker_to_parts
+
+    if input_graph.renumbered:
+        if isinstance(k, dask_cudf.DataFrame):
+            tmp_col_names = k.columns
+
+        elif isinstance(k, dask_cudf.Series):
+            tmp_col_names = None
+
+        if isinstance(k, (dask_cudf.DataFrame, dask_cudf.Series)):
+            k = input_graph.lookup_internal_vertex_id(k, tmp_col_names)
+
+    # FIXME: should we add this parameter as an option?
+    do_expensive_check = False
+
+    client = get_client()
+
+    ddf = _mg_call_plc_betweenness_centrality(
+        input_graph=input_graph,
+        client=client,
+        sID=Comms.get_session_id(),
+        k=k,
+        random_state=random_state,
+        normalized=normalized,
+        do_expensive_check=do_expensive_check,
+        edge_bc=True,
+    )
+
+    if input_graph.renumbered:
+        return input_graph.unrenumber(ddf, "vertex")
+
+    if input_graph.is_directed() is False:
+        # swap the src and dst vertices for the lower triangle only. Because
+        # this is a symmeterized graph, this operation results in a df with
+        # multiple src/dst entries.
+        ddf["src"], ddf["dst"] = ddf[["src", "dst"]].min(axis=1), ddf[
+            ["src", "dst"]
+        ].max(axis=1)
+        # overwrite the df with the sum of the values for all alike src/dst
+        # vertex pairs, resulting in half the edges of the original df from the
+        # symmeterized graph.
+        ddf = ddf.groupby(by=["src", "dst"]).sum().reset_index()
+
+    return ddf
diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
index c0efb425b75..ae2c57f5ef3 100644
--- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
+++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
@@ -201,14 +201,12 @@ def __from_edgelist(
                 value_col_names = [self.edgeWeightCol]
             elif len(edge_attr) == 3:
                 weight_col, id_col, type_col = edge_attr
-                input_ddf = input_ddf.rename(
-                    columns={
-                        weight_col: self.edgeWeightCol,
-                        id_col: self.edgeIdCol,
-                        type_col: self.edgeTypeCol,
-                    }
-                )
-
+                input_ddf = input_ddf[ddf_columns + [weight_col, id_col, type_col]]
+                input_ddf.columns = ddf_columns + [
+                    self.edgeWeightCol,
+                    self.edgeIdCol,
+                    self.edgeTypeCol,
+                ]
                 value_col_names = [self.edgeWeightCol, self.edgeIdCol, self.edgeTypeCol]
             else:
                 raise ValueError("Only 1 or 3 values may be provided" "for edge_attr")
diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py
index 4b159b279c0..15011fa8dbc 100644
--- a/python/cugraph/cugraph/structure/symmetrize.py
+++ b/python/cugraph/cugraph/structure/symmetrize.py
@@ -230,6 +230,9 @@ def symmetrize(
 
     """
 
+    if "edge_id" in input_df.columns and symmetrize:
+        raise ValueError("Edge IDs are not supported on undirected graphs")
+
     csg.null_check(input_df[source_col_name])
     csg.null_check(input_df[dest_col_name])
 
diff --git a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
index 6b30d9fcb2b..dedf85a034b 100644
--- a/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
+++ b/python/cugraph/cugraph/tests/centrality/test_batch_edge_betweenness_centrality_mg.py
@@ -18,7 +18,7 @@
 
 from cugraph.dask.common.mg_utils import is_single_gpu
 
-from cugraph.experimental.datasets import karate
+from cugraph.experimental.datasets import karate, netscience
 
 # Get parameters from standard betwenness_centrality_test
 # As tests directory is not a module, we need to add it to the path
@@ -29,7 +29,6 @@
     NORMALIZED_OPTIONS,
     DEFAULT_EPSILON,
     SUBSET_SIZE_OPTIONS,
-    SUBSET_SEED_OPTIONS,
 )
 
 from test_edge_betweenness_centrality import (
@@ -40,11 +39,11 @@
 # =============================================================================
 # Parameters
 # =============================================================================
-DATASETS = [karate]
+DATASETS = [karate, netscience]
 
 # FIXME: The "preset_gpu_count" from 21.08 and below are not supported and have
 # been removed
-RESULT_DTYPE_OPTIONS = [np.float64]
+RESULT_DTYPE_OPTIONS = [np.float32, np.float64]
 
 
 # =============================================================================
@@ -54,6 +53,7 @@ def setup_function():
     gc.collect()
 
 
+# FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize(
@@ -62,16 +62,12 @@ def setup_function():
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
-@pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 def test_mg_edge_betweenness_centrality(
     graph_file,
     directed,
     subset_size,
     normalized,
-    weight,
-    subset_seed,
     result_dtype,
     dask_client,
 ):
@@ -80,8 +76,8 @@ def test_mg_edge_betweenness_centrality(
         directed=directed,
         normalized=normalized,
         k=subset_size,
-        weight=weight,
-        seed=subset_seed,
+        weight=None,
+        seed=42,
         result_dtype=result_dtype,
         multi_gpu_batch=True,
     )
diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
index 759ed01a7eb..c9e31e804d4 100644
--- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
+++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py
@@ -313,7 +313,7 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
 @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
-def test_betweenness_centrality_0(
+def test_betweenness_centrality(
     graph_file,
     directed,
     subset_size,
diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py
index 0717925216a..12e9dd4c0a5 100644
--- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality.py
@@ -45,7 +45,6 @@
 DEFAULT_EPSILON = 0.0001
 
 SUBSET_SIZE_OPTIONS = [4, None]
-SUBSET_SEED_OPTIONS = [42]
 
 # NOTE: The following is not really being exploited in the tests as the
 # datasets that are used are too small to compare, but it ensures that both
@@ -157,6 +156,14 @@ def calc_edge_betweenness_centrality(
     return sorted_df
 
 
+def _rescale_e(betweenness, num_nodes, k):
+
+    for e in betweenness:
+        betweenness[e] *= num_nodes / k
+
+    return betweenness
+
+
 def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype):
     # NOTE: Networkx API does not allow passing a list of vertices
     # And the sampling is operated on Gnx.nodes() directly
@@ -180,6 +187,10 @@ def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype):
         Gnx, k=k, normalized=normalized, weight=weight, seed=seed
     )
 
+    if normalized or not Gnx.is_directed():
+        if k is not None:
+            nx_bc_dict = _rescale_e(nx_bc_dict, len(Gnx.nodes()), k)
+
     nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(
         columns={"betweenness_centrality": "ref_bc"}, copy=False
     )
@@ -200,9 +211,9 @@ def _calc_bc_subset_fixed(G, Gnx, normalized, weight, k, seed, result_dtype):
     # In the fixed set we compare cu_bc against itself as we random.seed(seed)
     # on the same seed and then sample on the number of vertices themselves
     if seed is None:
-        seed = 123  # random.seed(None) uses time, but we want same sources
-    random.seed(seed)  # It will be called again in cugraph's call
-    sources = random.sample(range(G.number_of_vertices()), k)
+        seed = 123  # We want the same sources so we use the same seed when
+        # randomly selecting vertices both below and internally(plc)
+    sources = G.select_random_vertices(seed, k)
 
     if G.renumbered:
         sources_df = cudf.DataFrame({"src": sources})
@@ -316,7 +327,6 @@ def generate_upper_triangle(dataframe):
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
 def test_edge_betweenness_centrality(
@@ -325,7 +335,6 @@ def test_edge_betweenness_centrality(
     subset_size,
     normalized,
     weight,
-    subset_seed,
     result_dtype,
     edgevals,
 ):
@@ -335,7 +344,7 @@ def test_edge_betweenness_centrality(
         normalized=normalized,
         k=subset_size,
         weight=weight,
-        seed=subset_seed,
+        seed=42,
         result_dtype=result_dtype,
         edgevals=edgevals,
     )
@@ -348,18 +357,15 @@ def test_edge_betweenness_centrality(
 @pytest.mark.parametrize("subset_size", [None])
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 @pytest.mark.parametrize("use_k_full", [True])
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
-@pytest.mark.skip(reason="Skipping large tests")
 def test_edge_betweenness_centrality_k_full(
     graph_file,
     directed,
     subset_size,
     normalized,
     weight,
-    subset_seed,
     result_dtype,
     use_k_full,
     edgevals,
@@ -372,7 +378,7 @@ def test_edge_betweenness_centrality_k_full(
         normalized=normalized,
         k=subset_size,
         weight=weight,
-        seed=subset_seed,
+        seed=42,
         result_dtype=result_dtype,
         use_k_full=use_k_full,
         edgevals=edgevals,
@@ -390,17 +396,14 @@ def test_edge_betweenness_centrality_k_full(
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("subset_seed", [None])
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
-@pytest.mark.skip(reason="Skipping large tests")
 def test_edge_betweenness_centrality_fixed_sample(
     graph_file,
     directed,
     subset_size,
     normalized,
     weight,
-    subset_seed,
     result_dtype,
     edgevals,
 ):
@@ -414,7 +417,7 @@ def test_edge_betweenness_centrality_fixed_sample(
         k=subset_size,
         normalized=normalized,
         weight=weight,
-        seed=subset_seed,
+        seed=None,
         result_dtype=result_dtype,
         edgevals=edgevals,
     )
@@ -427,17 +430,14 @@ def test_edge_betweenness_centrality_fixed_sample(
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("weight", [[]])
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS)
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
-@pytest.mark.skip(reason="Skipping large tests")
 def test_edge_betweenness_centrality_weight_except(
     graph_file,
     directed,
     subset_size,
     normalized,
     weight,
-    subset_seed,
     result_dtype,
     edgevals,
 ):
@@ -453,7 +453,7 @@ def test_edge_betweenness_centrality_weight_except(
             k=subset_size,
             normalized=normalized,
             weight=weight,
-            seed=subset_seed,
+            seed=42,
             result_dtype=result_dtype,
             edgevals=edgevals,
         )
@@ -466,7 +466,6 @@ def test_edge_betweenness_centrality_weight_except(
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("weight", [None])
-@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS)
 @pytest.mark.parametrize("result_dtype", [str])
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
 def test_edge_betweenness_invalid_dtype(
@@ -475,7 +474,6 @@ def test_edge_betweenness_invalid_dtype(
     subset_size,
     normalized,
     weight,
-    subset_seed,
     result_dtype,
     edgevals,
 ):
@@ -488,7 +486,7 @@ def test_edge_betweenness_invalid_dtype(
             k=subset_size,
             normalized=normalized,
             weight=weight,
-            seed=subset_seed,
+            seed=42,
             result_dtype=result_dtype,
             edgevals=edgevals,
         )
@@ -499,13 +497,14 @@ def test_edge_betweenness_invalid_dtype(
 @pytest.mark.parametrize("graph_file", DATASETS_SMALL)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS)
-def test_edge_betweenness_centrality_nx(graph_file, directed, edgevals):
+@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
+def test_edge_betweenness_centrality_nx(graph_file, directed, edgevals, normalized):
     dataset_path = graph_file.get_path()
     Gnx = utils.generate_nx_graph_from_file(dataset_path, directed, edgevals)
     assert nx.is_directed(Gnx) == directed
 
-    nx_bc = nx.edge_betweenness_centrality(Gnx)
-    cu_bc = cugraph.edge_betweenness_centrality(Gnx)
+    nx_bc = nx.edge_betweenness_centrality(Gnx, normalized=normalized)
+    cu_bc = cugraph.edge_betweenness_centrality(Gnx, normalized=normalized)
 
     # Calculating mismatch
     networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0])
@@ -519,6 +518,11 @@ def test_edge_betweenness_centrality_nx(graph_file, directed, edgevals):
             and cugraph_bc[i][0] == networkx_bc[i][0]
         ):
             err = err + 1
-            print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}")
+            print(
+                "type c_bc = ",
+                type(cugraph_bc[i][1]),
+                " type nx_bc = ",
+                type(networkx_bc[i][1]),
+            )
     print("Mismatches:", err)
     assert err < (0.01 * len(cugraph_bc))
diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
new file mode 100644
index 00000000000..aa41f8e1c82
--- /dev/null
+++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py
@@ -0,0 +1,231 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import pytest
+
+import dask_cudf
+from pylibcugraph.testing.utils import gen_fixture_params_product
+from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core
+
+import cugraph
+import cugraph.dask as dcg
+
+# from cugraph.dask.common.mg_utils import is_single_gpu
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def setup_function():
+    gc.collect()
+
+
+IS_DIRECTED = [True, False]
+INCLUDE_WEIGHTS = [False, True]
+INCLUDE_EDGE_IDS = [False, True]
+NORMALIZED_OPTIONS = [False, True]
+SUBSET_SIZE_OPTIONS = [4, None]
+
+
+# email_Eu_core is too expensive to test
+datasets = DATASETS_UNDIRECTED + [email_Eu_core]
+
+
+# =============================================================================
+# Pytest fixtures
+# =============================================================================
+
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    (IS_DIRECTED, "directed"),
+    (INCLUDE_WEIGHTS, "include_weights"),
+    (INCLUDE_EDGE_IDS, "include_edgeids"),
+    (NORMALIZED_OPTIONS, "normalized"),
+    (SUBSET_SIZE_OPTIONS, "subset_size"),
+)
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_combo(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(
+        zip(
+            (
+                "graph_file",
+                "directed",
+                "include_weights",
+                "include_edge_ids",
+                "normalized",
+                "subset_size",
+                "subset_seed",
+            ),
+            request.param,
+        )
+    )
+
+    return parameters
+
+
+@pytest.fixture(scope="module")
+def input_expected_output(input_combo):
+    """
+    This fixture returns the inputs and expected results from the edge
+    betweenness centrality algo.
+    (based on cuGraph edge betweenness centrality) which can be used
+    for validation.
+    """
+    directed = input_combo["directed"]
+    normalized = input_combo["normalized"]
+    k = input_combo["subset_size"]
+    subset_seed = 42
+    edge_ids = input_combo["include_edge_ids"]
+    weight = input_combo["include_weights"]
+
+    df = input_combo["graph_file"].get_edgelist()
+    if edge_ids:
+        if not directed:
+            # Edge ids not supported for undirected graph
+            return
+        dtype = df.dtypes[0]
+        edge_id = "edge_id"
+        df["edge_id"] = df.index
+        df = df.astype(dtype)
+
+    else:
+        edge_id = None
+
+    G = cugraph.Graph(directed=directed)
+    G.from_cudf_edgelist(
+        df, source="src", destination="dst", weight="wgt", edge_id=edge_id
+    )
+    if isinstance(k, int):
+        k = G.select_random_vertices(subset_seed, k)
+
+    input_combo["k"] = k
+    # Save the results back to the input_combo dictionary to prevent redundant
+    # cuGraph runs. Other tests using the input_combo fixture will look for
+    # them, and if not present they will have to re-run the same cuGraph call.
+    sg_cugraph_edge_bc = (
+        cugraph.edge_betweenness_centrality(G, k, normalized)
+        .sort_values(["src", "dst"])
+        .reset_index(drop=True)
+    )
+
+    input_data_path = input_combo["graph_file"].get_path()
+
+    input_combo["sg_cugraph_results"] = sg_cugraph_edge_bc
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+
+    if weight:
+        weight = ddf
+    else:
+        weight = None
+
+    if edge_ids:
+        dtype = ddf.dtypes[0]
+        edge_id = "edge_id"
+        ddf = ddf.assign(idx=1)
+        ddf["edge_id"] = ddf.idx.cumsum().astype(dtype) - 1
+    else:
+        edge_id = None
+
+    dg = cugraph.Graph(directed=directed)
+
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        weight="value",
+        edge_id=edge_id,
+        renumber=True,
+    )
+
+    input_combo["MGGraph"] = dg
+    input_combo["include_weights"] = weight
+
+    return input_combo
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+
+
+# @pytest.mark.skipif(
+#    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+# )
+@pytest.mark.mg
+def test_dask_edge_betweenness_centrality(
+    dask_client, benchmark, input_expected_output
+):
+    if input_expected_output is not None:
+        dg = input_expected_output["MGGraph"]
+        k = input_expected_output["k"]
+        normalized = input_expected_output["normalized"]
+        weight = input_expected_output["include_weights"]
+        if weight is not None:
+            with pytest.raises(NotImplementedError):
+                result_edge_bc = benchmark(
+                    dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
+                )
+
+        else:
+            result_edge_bc = benchmark(
+                dcg.edge_betweenness_centrality, dg, k, normalized, weight=weight
+            )
+            result_edge_bc = (
+                result_edge_bc.compute()
+                .sort_values(["src", "dst"])
+                .reset_index(drop=True)
+                .rename(columns={"betweenness_centrality": "mg_betweenness_centrality"})
+            )
+
+            if len(result_edge_bc.columns) > 3:
+                result_edge_bc = result_edge_bc.rename(
+                    columns={"edge_id": "mg_edge_id"}
+                )
+
+            expected_output = input_expected_output["sg_cugraph_results"].reset_index(
+                drop=True
+            )
+            result_edge_bc["betweenness_centrality"] = expected_output[
+                "betweenness_centrality"
+            ]
+            if len(expected_output.columns) > 3:
+                result_edge_bc["edge_id"] = expected_output["edge_id"]
+                edge_id_diff = result_edge_bc.query("mg_edge_id != edge_id")
+                assert len(edge_id_diff) == 0
+
+            edge_bc_diffs1 = result_edge_bc.query(
+                "mg_betweenness_centrality - betweenness_centrality > 0.01"
+            )
+            edge_bc_diffs2 = result_edge_bc.query(
+                "betweenness_centrality - mg_betweenness_centrality < -0.01"
+            )
+
+            assert len(edge_bc_diffs1) == 0
+            assert len(edge_bc_diffs2) == 0
diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py
index 02219002a7e..1b883b91e92 100644
--- a/python/cugraph/cugraph/tests/structure/test_graph.py
+++ b/python/cugraph/cugraph/tests/structure/test_graph.py
@@ -774,9 +774,12 @@ def test_create_graph_with_edge_ids(graph_file):
         edge_attr=["2", "id", "etype"],
     )
 
-    H = G.to_undirected()
     assert G.is_directed()
-    assert not H.is_directed()
+
+    # 'edge_ids are not supported for undirected graph"
+    with pytest.raises(ValueError):
+        G.to_undirected()
+    # assert not H.is_directed()
 
 
 @pytest.mark.sg
diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index 6a09c3de0da..2f7e63b5c55 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -25,6 +25,7 @@ set(cython_sources
     bfs.pyx
     core_number.pyx
     ecg.pyx
+    edge_betweenness_centrality.pyx
     egonet.pyx
     eigenvector_centrality.pyx
     generate_rmat_edgelist.pyx
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index c39075ce3fb..6f99d128938 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -81,6 +81,8 @@
 
 from pylibcugraph.select_random_vertices import select_random_vertices
 
+from pylibcugraph.edge_betweenness_centrality import edge_betweenness_centrality
+
 from pylibcugraph.generate_rmat_edgelist import generate_rmat_edgelist
 
 from pylibcugraph.generate_rmat_edgelists import generate_rmat_edgelists
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
index 6cd02ed6f17..532df624c99 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
@@ -212,3 +212,45 @@ cdef extern from "cugraph_c/centrality_algorithms.h":
             cugraph_centrality_result_t** result,
             cugraph_error_t** error
         )
+    
+    ###########################################################################
+    # edge betweenness centrality
+
+    ctypedef struct cugraph_edge_centrality_result_t:
+        pass
+
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_edge_centrality_result_get_src_vertices(
+            cugraph_edge_centrality_result_t* result
+        )
+    
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_edge_centrality_result_get_dst_vertices(
+            cugraph_edge_centrality_result_t* result
+        )
+
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_edge_centrality_result_get_edge_ids(
+            cugraph_edge_centrality_result_t* result
+        )
+    
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_edge_centrality_result_get_values(
+            cugraph_edge_centrality_result_t* result
+        )
+    
+    cdef void \
+        cugraph_edge_centrality_result_free(
+            cugraph_edge_centrality_result_t* result
+        )
+    
+    cdef cugraph_error_code_t \
+        cugraph_edge_betweenness_centrality(
+            const cugraph_resource_handle_t* handle,
+            cugraph_graph_t* graph,
+            const cugraph_type_erased_device_array_view_t* vertex_list,
+            bool_t normalized,
+            bool_t do_expensive_check,
+            cugraph_edge_centrality_result_t** result,
+            cugraph_error_t** error
+        )
diff --git a/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx
new file mode 100644
index 00000000000..c88c9fe8a67
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx
@@ -0,0 +1,197 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    bool_t,
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.centrality_algorithms cimport (
+    cugraph_edge_centrality_result_t,
+    cugraph_edge_betweenness_centrality,
+    cugraph_edge_centrality_result_get_src_vertices,
+    cugraph_edge_centrality_result_get_dst_vertices,
+    cugraph_edge_centrality_result_get_values,
+    cugraph_edge_centrality_result_get_edge_ids,
+    cugraph_edge_centrality_result_get_values,
+    cugraph_edge_centrality_result_free,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    copy_to_cupy_array,
+    create_cugraph_type_erased_device_array_view_from_py_obj,
+)
+from pylibcugraph.select_random_vertices import (
+    select_random_vertices
+)
+
+
+def edge_betweenness_centrality(ResourceHandle resource_handle,
+                                _GPUGraph graph,
+                                k,
+                                random_state,
+                                bool_t normalized,
+                                bool_t do_expensive_check):
+    """
+    Compute the edge betweenness centrality for all edges of the graph G.
+    Betweenness centrality is a measure of the number of shortest paths
+    that pass over an edge.  An edge with a high betweenness centrality
+    score has more paths passing over it and is therefore believed to be
+    more important.
+
+    Parameters
+    ----------
+    resource_handle : ResourceHandle
+        Handle to the underlying device resources needed for referencing data
+        and running algorithms.
+
+    graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    k : int or device array type or None, optional (default=None)
+        If k is not None, use k node samples to estimate the edge betweenness.
+        Higher values give better approximation.  If k is a device array type,
+        the contents are assumed to be vertex identifiers to be used for estimation.
+        If k is None (the default), all the vertices are used to estimate the edge
+        betweenness.  Vertices obtained through sampling or defined as a list will
+        be used as sources for traversals inside the algorithm.
+
+    random_state : int, optional (default=None)
+        if k is specified and k is an integer, use random_state to initialize the
+        random number generator.
+        Using None defaults to a hash of process id, time, and hostname
+        If k is either None or list or cudf objects: random_state parameter is
+        ignored.
+    
+    normalized : bool_t
+        Normalization will ensure that values are in [0, 1].
+
+    do_expensive_check : bool_t
+        A flag to run expensive checks for input arguments if True.
+    
+    Returns
+    -------
+    A tuple of device arrays corresponding to the sources, destinations, edge
+    betweenness centrality scores and edge ids (if provided).
+
+    array containing the vertices and the second item in the tuple is a device
+    array containing the eigenvector centrality scores for the corresponding
+    vertices.
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5],
+    ...     dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4],
+    ...     dtype=numpy.int32)
+    >>> edge_ids = cupy.asarray(
+    ...     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+    ...     dtype=numpy.int32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, store_transposed=False,
+    ...     renumber=False, do_expensive_check=False, edge_id_array=edge_ids)
+    >>> (srcs, dsts, values, edge_ids) = pylibcugraph.edge_betweenness_centrality(
+            resource_handle, G, None, None, True, False)
+    >>> srcs
+    [0 0 1 1 1 1 2 2 2 3 3 3 4 4 5 5]
+    >>> dsts
+    [1 2 0 2 3 4 0 1 3 1 2 5 1 5 3 4]
+    >>> values
+    [0.10555556 0.06111111 0.10555556 0.06666667 0.09444445 0.14444445
+     0.06111111 0.06666667 0.09444445 0.09444445 0.09444445 0.12222222
+     0.14444445 0.07777778 0.12222222 0.07777778]
+    >>> edge_ids
+    [ 0 11  8 12  1  2  3  4  5  9 13  6 10  7 14 15]
+
+    """
+
+    if isinstance(k, int):
+        # randomly select vertices
+        
+        #'select_random_vertices' internally creates a
+        # 'pylibcugraph.random.CuGraphRandomState'
+        vertex_list = select_random_vertices(
+            resource_handle, graph, random_state, k)
+    else:
+        # FiXME: Add CAPI check ensuring that k is a cuda array interface
+        vertex_list = k
+
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+        resource_handle.c_resource_handle_ptr
+    cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr
+
+    cdef cugraph_edge_centrality_result_t* result_ptr
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+
+    cdef cugraph_type_erased_device_array_view_t* \
+        vertex_list_view_ptr = \
+            create_cugraph_type_erased_device_array_view_from_py_obj(
+                vertex_list)
+
+    error_code = cugraph_edge_betweenness_centrality(c_resource_handle_ptr,
+                                                c_graph_ptr,
+                                                vertex_list_view_ptr,
+                                                normalized,
+                                                do_expensive_check,
+                                                &result_ptr,
+                                                &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_edge_betweenness_centrality")
+
+    # Extract individual device array pointers from result and copy to cupy
+    # arrays for returning.
+    cdef cugraph_type_erased_device_array_view_t* src_ptr = \
+        cugraph_edge_centrality_result_get_src_vertices(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* dst_ptr = \
+        cugraph_edge_centrality_result_get_dst_vertices(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* values_ptr = \
+        cugraph_edge_centrality_result_get_values(result_ptr)
+    
+    if graph.edge_id_view_ptr is NULL:
+        cupy_edge_ids = None
+    else:
+        edge_ids_ptr = cugraph_edge_centrality_result_get_edge_ids(result_ptr)
+        cupy_edge_ids = copy_to_cupy_array(c_resource_handle_ptr, edge_ids_ptr)
+        
+    
+    cupy_src_vertices = copy_to_cupy_array(c_resource_handle_ptr, src_ptr)
+    cupy_dst_vertices = copy_to_cupy_array(c_resource_handle_ptr, dst_ptr)
+    cupy_values = copy_to_cupy_array(c_resource_handle_ptr, values_ptr)
+
+    cugraph_edge_centrality_result_free(result_ptr)
+    cugraph_type_erased_device_array_view_free(vertex_list_view_ptr)
+
+    return (cupy_src_vertices, cupy_dst_vertices, cupy_values, cupy_edge_ids)
diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd
index e468738f529..4e52ed557ed 100644
--- a/python/pylibcugraph/pylibcugraph/graphs.pxd
+++ b/python/pylibcugraph/pylibcugraph/graphs.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,6 +16,7 @@
 
 from pylibcugraph._cugraph_c.graph cimport (
     cugraph_graph_t,
+    cugraph_type_erased_device_array_view_t,
 )
 
 
@@ -23,6 +24,7 @@ from pylibcugraph._cugraph_c.graph cimport (
 # This is not visible in python
 cdef class _GPUGraph:
     cdef cugraph_graph_t* c_graph_ptr
+    cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr
 
 cdef class SGGraph(_GPUGraph):
     pass
diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx
index 49b9747f0b3..fb4692bf3a8 100644
--- a/python/pylibcugraph/pylibcugraph/graphs.pyx
+++ b/python/pylibcugraph/pylibcugraph/graphs.pyx
@@ -171,8 +171,8 @@ cdef class SGGraph(_GPUGraph):
                 weight_array
             )
         
-        cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr = \
-            create_cugraph_type_erased_device_array_view_from_py_obj(
+
+        self.edge_id_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj(
                 edge_id_array
             )
         
@@ -188,7 +188,7 @@ cdef class SGGraph(_GPUGraph):
                 srcs_or_offsets_view_ptr,
                 dsts_or_indices_view_ptr,
                 weights_view_ptr,
-                edge_id_view_ptr,
+                self.edge_id_view_ptr,
                 edge_type_view_ptr,
                 store_transposed,
                 renumber,
@@ -206,7 +206,7 @@ cdef class SGGraph(_GPUGraph):
                 srcs_or_offsets_view_ptr,
                 dsts_or_indices_view_ptr,
                 weights_view_ptr,
-                edge_id_view_ptr,
+                self.edge_id_view_ptr,
                 edge_type_view_ptr,
                 store_transposed,
                 renumber,
@@ -225,8 +225,8 @@ cdef class SGGraph(_GPUGraph):
         cugraph_type_erased_device_array_view_free(srcs_or_offsets_view_ptr)
         cugraph_type_erased_device_array_view_free(dsts_or_indices_view_ptr)
         cugraph_type_erased_device_array_view_free(weights_view_ptr)
-        if edge_id_view_ptr is not NULL:
-            cugraph_type_erased_device_array_view_free(edge_id_view_ptr)
+        if self.edge_id_view_ptr is not NULL:
+            cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr)
         if edge_type_view_ptr is not NULL:
             cugraph_type_erased_device_array_view_free(edge_type_view_ptr)
 
@@ -341,7 +341,7 @@ cdef class MGGraph(_GPUGraph):
             create_cugraph_type_erased_device_array_view_from_py_obj(
                 weight_array
             )
-        cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr = \
+        self.edge_id_view_ptr = \
             create_cugraph_type_erased_device_array_view_from_py_obj(
                 edge_id_array
             )
@@ -356,7 +356,7 @@ cdef class MGGraph(_GPUGraph):
             srcs_view_ptr,
             dsts_view_ptr,
             weights_view_ptr,
-            edge_id_view_ptr,
+            self.edge_id_view_ptr,
             edge_type_view_ptr,
             store_transposed,
             num_edges,
@@ -370,8 +370,8 @@ cdef class MGGraph(_GPUGraph):
         cugraph_type_erased_device_array_view_free(srcs_view_ptr)
         cugraph_type_erased_device_array_view_free(dsts_view_ptr)
         cugraph_type_erased_device_array_view_free(weights_view_ptr)
-        if edge_id_view_ptr is not NULL:
-            cugraph_type_erased_device_array_view_free(edge_id_view_ptr)
+        if self.edge_id_view_ptr is not NULL:
+            cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr)
         if edge_type_view_ptr is not NULL:
             cugraph_type_erased_device_array_view_free(edge_type_view_ptr)
 
diff --git a/python/pylibcugraph/pylibcugraph/tests/test_edge_betweenness_centrality.py b/python/pylibcugraph/pylibcugraph/tests/test_edge_betweenness_centrality.py
new file mode 100644
index 00000000000..fa92147842c
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/tests/test_edge_betweenness_centrality.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import cupy as cp
+import numpy as np
+from pylibcugraph import (
+    ResourceHandle,
+    GraphProperties,
+    SGGraph,
+    edge_betweenness_centrality,
+)
+from pylibcugraph.testing import utils
+
+
+TOY = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "toy_graph.csv"
+
+
+# =============================================================================
+# Test helpers
+# =============================================================================
+def _get_param_args(param_name, param_values):
+    """
+    Returns a tuple of (<param_name>, <pytest.param list>) which can be applied
+    as the args to pytest.mark.parametrize(). The pytest.param list also
+    contains param id string formed from the param name and values.
+    """
+    return (param_name, [pytest.param(v, id=f"{param_name}={v}") for v in param_values])
+
+
+def _generic_edge_betweenness_centrality_test(
+    src_arr,
+    dst_arr,
+    edge_id_arr,
+    result_score_arr,
+    result_edge_id_arr,
+    num_edges,
+    store_transposed,
+    k,
+    random_state,
+    normalized,
+):
+    """
+    Builds a graph from the input arrays and runs edge bc using the other args,
+    similar to how edge bc is tested in libcugraph.
+    """
+    resource_handle = ResourceHandle()
+    graph_props = GraphProperties(is_symmetric=False, is_multigraph=False)
+    G = SGGraph(
+        resource_handle,
+        graph_props,
+        src_arr,
+        dst_arr,
+        store_transposed=store_transposed,
+        renumber=False,
+        do_expensive_check=True,
+        edge_id_array=edge_id_arr,
+    )
+
+    (_, _, values, edge_ids) = edge_betweenness_centrality(
+        resource_handle, G, k, random_state, normalized, do_expensive_check=False
+    )
+
+    result_score_arr = result_score_arr.get()
+    result_edge_id_arr = result_edge_id_arr.get()
+    centralities = values.get()
+    edge_ids = edge_ids.get()
+
+    for idx in range(num_edges):
+        expected_result_score = result_score_arr[idx]
+        actual_result_score = centralities[idx]
+
+        expected_result_edge_id = result_edge_id_arr[idx]
+        actual_result_edge_id = edge_ids[idx]
+
+        assert pytest.approx(expected_result_score, 1e-4) == actual_result_score, (
+            f"Edge {src_arr[idx]} {dst_arr[idx]} has centrality {actual_result_score},"
+            f" should have been {expected_result_score}"
+        )
+
+        assert pytest.approx(expected_result_edge_id, 1e-4) == actual_result_edge_id, (
+            f"Edge {src_arr[idx]} {dst_arr[idx]} has id {actual_result_edge_id},"
+            f" should have been {expected_result_edge_id}"
+        )
+
+
+def test_edge_betweenness_centrality():
+    num_edges = 16
+
+    graph_data = np.genfromtxt(TOY, delimiter=" ")
+    src = cp.asarray(graph_data[:, 0], dtype=np.int32)
+    dst = cp.asarray(graph_data[:, 1], dtype=np.int32)
+    edge_id = cp.array(
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], dtype=np.int32
+    )
+    result_score = cp.asarray(
+        [
+            0.10555556,
+            0.06111111,
+            0.10555556,
+            0.06666667,
+            0.09444445,
+            0.14444445,
+            0.06111111,
+            0.06666667,
+            0.09444445,
+            0.09444445,
+            0.09444445,
+            0.12222222,
+            0.14444445,
+            0.07777778,
+            0.12222222,
+            0.07777778,
+        ],
+        dtype=np.float32,
+    )
+    result_edge_ids = cp.asarray([0, 11, 8, 12, 1, 2, 3, 4, 5, 9, 13, 6, 10, 7, 14, 15])
+
+    store_transposed = False
+    k = None
+    random_state = None
+    normalized = True
+
+    _generic_edge_betweenness_centrality_test(
+        src,
+        dst,
+        edge_id,
+        result_score,
+        result_edge_ids,
+        num_edges,
+        store_transposed,
+        k,
+        random_state,
+        normalized,
+    )