diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index bfa50e69b1d..2702dcad0b5 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -30,6 +30,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
@@ -39,6 +40,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
@@ -61,7 +63,11 @@ jobs:
       build_type: branch
       node_type: "gpu-latest-1"
       arch: "amd64"
+      branch: ${{ inputs.branch }}
+      build_type: ${{ inputs.build_type || 'branch' }}
       container_image: "rapidsai/ci:latest"
+      date: ${{ inputs.date }}
+      node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
   wheel-build-pylibcugraph:
     secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 2447863ab86..12d49520d35 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -35,6 +35,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
@@ -42,18 +43,21 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: pull-request
   conda-notebook-tests:
     needs: conda-python-build
@@ -63,7 +67,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci:latest"
+      container_image: "rapidsai/ci:cuda11.8.0-ubuntu22.04-py3.10"
       run_script: "ci/test_notebooks.sh"
   docs-build:
     needs: conda-python-build
@@ -73,7 +77,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci:latest"
+      container_image: "rapidsai/ci:cuda11.8.0-ubuntu22.04-py3.10"
       run_script: "ci/build_docs.sh"
   wheel-build-pylibcugraph:
     needs: checks
@@ -97,7 +101,7 @@ jobs:
       package-name: pylibcugraph
       # On arm also need to install cupy from the specific webpage.
       test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest -v ./python/pylibcugraph/pylibcugraph/tests"
       test-smoketest: "python ci/wheel_smoke_test_pylibcugraph.py"
   wheel-build-cugraph:
     needs: wheel-tests-pylibcugraph
@@ -124,6 +128,6 @@ jobs:
       test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
       # Skip dataset downloads on arm to save CI time -- arm only runs smoke tests.
       # On arm also need to install cupy from the specific site.
-      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests"
+      test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -v -m sg ./python/cugraph/cugraph/tests"
       test-smoketest: "python ci/wheel_smoke_test_cugraph.py"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 693a45afd25..d4ba92b0dae 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -18,6 +18,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: nightly
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
@@ -26,6 +27,7 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120
     with:
+      matrix_filter: map(select(.CUDA_VER | startswith("11")))
       build_type: nightly
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
@@ -41,7 +43,7 @@ jobs:
       package-name: pylibcugraph
       # On arm also need to install cupy from the specific webpage.
       test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest -v ./python/pylibcugraph/pylibcugraph/tests"
   wheel-tests-cugraph:
     secrets: inherit
     uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120
@@ -52,7 +54,7 @@ jobs:
       sha: ${{ inputs.sha }}
       package-name: cugraph
       # Always want to test against latest dask/distributed.
-      test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
+      test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
       # On arm also need to install cupy from the specific webpage.
-      test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06"
-      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests"
+      test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08"
+      test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -v -m sg ./python/cugraph/cugraph/tests"
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/README.md b/benchmarks/cugraph/standalone/bulk_sampling/README.md
new file mode 100644
index 00000000000..f48eea5c556
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/README.md
@@ -0,0 +1,116 @@
+# cuGraph Bulk Sampling
+
+## Overview
+The `cugraph_bulk_sampling.py` script runs the bulk sampler for a variety of datasets, including
+both generated (rmat) datasets and disk (ogbn_papers100M, etc.) datasets.  It can also load
+replicas of these datasets to create a larger benchmark (i.e. ogbn_papers100M x2).
+
+## Arguments
+The script takes a variety of arguments to control sampling behavior.
+Required:
+    --output_root
+        The output root directory.  File/folder names are auto-generated.
+        For instance, if the output root directory is /home/samples,
+        the samples will be written to a new folder in /home/samples that
+        contains information about the sampling run as well as the time
+        of the run.
+    
+    --dataset_root
+        The folder where datasets are stored.  Uses the format described
+        in the input format section.
+    
+    --datasets
+        Comma-separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16).
+        For ogb datasets, can provide replication factor using brackets.
+        Will attempt to read from dataset_root/<datset_name>.
+    
+Optional:
+    --fanouts
+        Comma-separated list of fanout values (i.e. [10, 25]).
+        The default fanout is [10, 25].
+    
+    --batch_sizes
+        Comma-separated list of batch sizes (i.e. 500, 1000).
+        Defaults to "512,1024"
+
+    --seeds_per_call_opts
+        Comma-separated list of seeds per call.  Controls the number of input seed vertices processed
+        in a single sampling call.
+        Defaults to 524288
+    
+    --reverse_edges
+        Whether to reverse the edges of the input edgelist. Should be set to False for PyG and True for DGL.
+        Defaults to False (PyG).
+
+    --dask_worker_devices
+        Comma-separated list of the GPUs to assign to dask (i.e. "0,1,2").
+        Defaults to just the default GPU (0).
+        Changing this is strongly recommended in order to take advantage of all GPUs on the system.
+
+    --random_seed
+        Seed for random number generation.
+        Defaults to '62'
+    
+    --persist
+        Whether to aggressively use persist() in dask to make the ETL steps (NOT PART OF SAMPLING) faster.
+        Will probably make this script finish sooner at the expense of memory usage, but won't affect
+        sampling time.
+        Changing this is not recommended unless you know what you are doing.
+        Defaults to False.
+    
+## Input Format
+The script expects its input data in the following format:
+```
+<top level directory>
+|
+|------ meta.json
+|------ parquet
+|------ |---------- <node type 0 (i.e. paper)>
+|------ |---------- |---------------------------- [node_label.parquet]
+|------ |---------- <node type 1 (i.e. author)>
+|------ |---------- |---------------------------- [node_label.parquet]
+...
+|------ |---------- <edge type 0 (i.e. paper__cites__paper)>
+|------ |---------- |------------------------------------------ edge_index.parquet
+|------ |---------- <edge type 1 (i.e. author__writes__paper)>
+|------ |---------- |------------------------------------------ edge_index.parquet
+...
+
+```
+
+`node_label.parquet` only needs to be present for vertex types that have labeled
+nodes. It consists of two columns, "node" which contains node ids, and "label",
+which contains the labeled class of the node.
+
+`edge_index.parquet` is required for all edge types.  It has two columns, `src`
+and `dst`, representing the source and destination vertices of the edges in that
+edge type's COO edge index.
+
+`meta.json` is a json file containing metadata needed to properly process
+the parquet files.  It must have the following format:
+```
+{
+    "num_nodes": {
+        "<node type 0 (i.e. paper)">: <# nodes of node type 0>,
+        "<node type 1 (i.e. author)">: <# nodes of node type 1>,
+        ...
+    },
+    "num_edges": {
+        <edge type 0 (i.e. paper__cites__paper)>: <# edges of edge type 0>,
+        <edge type 1 (i.e. author__writes__paper)>: <# edges of edge type 1>,
+        ...
+    }
+}
+```
+
+## Output Meta
+The script, in addition to the samples, will also output a file named `output_meta.json`.
+This file contains various statistics about the sampling run, including the runtime,
+as well as information about the dataset and system that the samples were produced from.
+
+This metadata file can be used to gather the results from the sampling and training stages
+together.
+
+## Other Notes
+For rmat datasets, you will need to generate your own bogus features in the training stage.
+Since that is trivial, that is not done in this sampling script.
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb
new file mode 100644
index 00000000000..3ea158d1f61
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb
@@ -0,0 +1,1860 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9b8d43d5-3005-4b0b-b418-b84af104bc3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!export RAPIDS_NO_INITIALIZE=\"1\"\n",
+    "!export CUDF_SPILL=\"1\"\n",
+    "!export LIBCUDF_CUFILE_POLICY=\"OFF\"\n",
+    "\n",
+    "from cugraph_bulk_sampling import start_dask_client, benchmark_cugraph_bulk_sampling, load_disk_dataset, construct_graph\n",
+    "from cugraph_bulk_sampling import sample_graph\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f3f04da7-c937-4dab-b432-fc569522f411",
+   "metadata": {},
+   "source": [
+    "# Setup Cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "26324a75-1b34-4c7b-8a26-23bac23e91b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dask_worker_devices='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "fc8d56ef-4036-4105-9764-1c6cbb2bdb15",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Dask client/cluster created using LocalCUDACluster\n"
+     ]
+    }
+   ],
+   "source": [
+    "client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices,\n",
+    "                                    jit_unspill=False,\n",
+    "                                    rmm_pool_size=28e9,\n",
+    "                                    rmm_async=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5335b115-eeb0-470d-9884-79990506ead7",
+   "metadata": {},
+   "source": [
+    "# Setup Benchmark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c9c8fb66-6bdd-45d7-8564-cc28e383d966",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset='ogbn_papers100M'\n",
+    "dataset_root=\".\"\n",
+    "output_root=\".\"\n",
+    "reverse_edges=True\n",
+    "add_edge_types=False\n",
+    "batch_size=512\n",
+    "seeds_per_call=524288\n",
+    "fanout=[25,25]\n",
+    "replication_factor=4\n",
+    "seed=123\n",
+    "\n",
+    "dataset_dir=dataset_root\n",
+    "output_path=output_root\n",
+    "persist=False\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "37ed06f6-ad06-443a-be12-61800d59d221",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading edge index for edge type paper__cites__paper\n",
+      "Loading node labels for node type paper (offset=0)\n",
+      "Number of input edges = 6,462,743,488\n",
+      "constructed graph\n"
+     ]
+    }
+   ],
+   "source": [
+    "dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \\\n",
+    "    load_disk_dataset(\n",
+    "        dataset,\n",
+    "        dataset_dir=dataset_dir,\n",
+    "        reverse_edges=reverse_edges,\n",
+    "        replication_factor=replication_factor,\n",
+    "        persist=False,\n",
+    "        add_edge_types=add_edge_types\n",
+    "    )\n",
+    "num_input_edges = len(dask_edgelist_df)\n",
+    "print(\n",
+    "f\"Number of input edges = {num_input_edges:,}\"\n",
+    ")\n",
+    "\n",
+    "G = construct_graph(\n",
+    "dask_edgelist_df\n",
+    ")\n",
+    "del dask_edgelist_df\n",
+    "print('constructed graph')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f71cf5a3-7e4b-4497-9c14-a342cc5abbcd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "input memory: 103403895808\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute()\n",
+    "print(f'input memory: {input_memory}')\n",
+    "\n",
+    "output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}')\n",
+    "os.makedirs(output_subdir, exist_ok=True)\n",
+    "\n",
+    "output_sample_path = os.path.join(output_subdir, 'samples')\n",
+    "os.makedirs(output_sample_path,  exist_ok=True)\n",
+    "\n",
+    "batches_per_partition = 200_000 // batch_size"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d276c5c-65d6-4191-b2a5-37b30d2cd44b",
+   "metadata": {},
+   "source": [
+    "# Benchmarking Sample Graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "675b580c-6a7a-4571-88dd-0d4429f9e5ff",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 9.981931209564209\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '51.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '58.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '59.0MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '149.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '82.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '98.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '73.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '310.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '267.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '80.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '131.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '205.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '288.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '303.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '130.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.299846172332764\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '252.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '278.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '243.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '256.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '330.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '239.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '254.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '239.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '278.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '68.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '397.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '79.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '127.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '90.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '303.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '53.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.2623701095581055\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '73.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '179.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '253.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '366.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '98.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '130.1MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '152.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '435.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '463.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '151.5MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '379.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '192.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '150.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '416.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.487639665603638\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '176.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '292.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '118.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '60.2MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '204.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '208.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '185.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '254.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '175.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '102.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '83.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '197.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '142.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '227.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.208818197250366\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '261.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '360.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '225.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '428.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '288.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '202.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '258.3MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '203.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '278.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '102.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '596.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '301.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '203.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.344887971878052\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '171.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '225.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '245.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '315.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '248.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '147.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '64.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '301.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '152.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '276.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '240.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '625.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '117.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '192.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.830034255981445\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '202.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '101.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '223.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '115.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '59.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '260.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '82.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '194.4MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '82.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '168.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '181.7MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '157.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.967972278594971\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '65.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '339.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '208.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '59.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '440.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '159.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '228.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '261.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '129.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '579.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.228902339935303\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '169.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '179.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '214.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '57.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '85.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '283.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '81.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '240.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '83.1MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '193.2MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '117.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '408.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '309.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '237.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.624476432800293\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '159.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '181.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '134.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '214.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '281.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '201.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '399.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '291.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '228.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '125.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '383.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '313.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '152.3MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '100.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.998133420944214\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '209.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '211.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '115.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '239.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '40.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '274.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '190.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '61.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '496.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '242.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '164.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '276.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '341.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '295.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '131.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.723633289337158\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '220.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '375.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '246.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '291.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '179.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '202.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '259.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '241.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '283.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '261.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '124.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '252.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '134.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '277.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '100.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.884088754653931\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '544.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '65.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '222.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '136.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '257.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '86.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '233.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '191.8MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '385.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '365.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '288.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '263.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '257.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.929041385650635\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '106.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '89.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '198.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '77.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '393.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '285.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '247.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '316.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '86.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '244.2MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '255.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '218.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '317.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '297.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.864148378372192\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '176.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '205.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '67.0MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '250.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '197.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '201.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '252.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '359.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '280.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '309.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '371.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '256.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '197.4MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '297.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.87973165512085\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '237.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '210.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '182.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '109.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '256.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '289.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '456.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '230.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '271.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '290.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '308.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '309.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '206.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '82.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 5.986347436904907\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '264.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '33.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '91.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '116.7MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '170.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '154.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '109.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '71.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '116.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '280.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '271.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '561.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '277.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '350.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '296.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '93.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.348597764968872\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '209.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '254.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '236.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '121.5MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '68.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '344.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '75.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '52.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '146.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '127.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '267.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '218.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '466.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '279.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '262.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '211.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.334516286849976\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '252.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '40.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '99.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '211.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '295.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '556.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '264.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '129.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '83.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '250.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '254.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '249.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '152.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.598327398300171\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '226.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '227.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '230.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '90.4MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '190.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '278.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '167.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '186.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '449.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '96.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '406.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '110.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '249.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '594.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '278.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '142.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.590704679489136\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '257.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '339.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '221.2MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '158.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '455.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '196.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '159.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '225.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '726.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '134.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '191.8MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '263.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '170.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.666577577590942\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '536.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '19.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '113.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '138.4MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '197.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '144.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '128.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '307.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '150.0MB',\n",
+      "                                 'peak_bytes': '2.3GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '164.7MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '213.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '246.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '106.5MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '309.1MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '268.1MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.439242839813232\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '106.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '222.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '81.3MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '66.6MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '199.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '72.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '240.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '232.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '150.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '256.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '200.5MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '466.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '312.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '134.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '493.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.422755718231201\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '241.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '220.2MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '153.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '50.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '339.5MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '92.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '190.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '236.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '274.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '201.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '122.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '144.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '602.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '510.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '78.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.785901784896851\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '258.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '181.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '217.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '167.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '436.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '164.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '205.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '204.9MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '172.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '297.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '61.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '282.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '259.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '171.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '143.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.363157033920288\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '536.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '250.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '225.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.1GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '220.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '181.3MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '252.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '240.6MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '134.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '228.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '405.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '78.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '284.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '174.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '276.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '437.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.568510055541992\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '171.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '224.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '117.2MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '254.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '427.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '116.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '128.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '265.7MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '247.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.2GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '139.3MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '116.4MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '590.2MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '207.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '182.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.5GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '133.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '214.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.794158220291138\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '235.0MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '225.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '227.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '239.0MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '230.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '212.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '93.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '195.4MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '231.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '386.9MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '256.8MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '86.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '659.0MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '811.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '265.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '322.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.1GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 7.261108636856079\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '355.5MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '251.8MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '238.9MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '336.4MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '166.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '59.6GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '236.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '228.9MB',\n",
+      "                                 'peak_bytes': '2.4GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '259.5MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '245.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '562.3MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '232.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '206.6MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '181.0MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '177.0MB',\n",
+      "                                 'peak_bytes': '2.9GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '185.8MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '224.9MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "created batches\n",
+      "flushed all batches\n",
+      "function:  sample_graph\n",
+      "function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n",
+      "execution_time: 6.985189437866211\n",
+      "allocation_counts:\n",
+      "{   'tcp://127.0.0.1:33343': {   'current_bytes': '66.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '60.4GB'},\n",
+      "    'tcp://127.0.0.1:33565': {   'current_bytes': '256.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:33977': {   'current_bytes': '168.4MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.4GB'},\n",
+      "    'tcp://127.0.0.1:34603': {   'current_bytes': '413.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '60.2GB'},\n",
+      "    'tcp://127.0.0.1:36543': {   'current_bytes': '141.7MB',\n",
+      "                                 'peak_bytes': '2.5GB',\n",
+      "                                 'total_bytes': '59.5GB'},\n",
+      "    'tcp://127.0.0.1:39379': {   'current_bytes': '324.9MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '61.5GB'},\n",
+      "    'tcp://127.0.0.1:40517': {   'current_bytes': '368.7MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.2GB'},\n",
+      "    'tcp://127.0.0.1:40547': {   'current_bytes': '452.6MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '61.8GB'},\n",
+      "    'tcp://127.0.0.1:40565': {   'current_bytes': '154.8MB',\n",
+      "                                 'peak_bytes': '2.7GB',\n",
+      "                                 'total_bytes': '62.3GB'},\n",
+      "    'tcp://127.0.0.1:40769': {   'current_bytes': '181.3MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:42093': {   'current_bytes': '118.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:42897': {   'current_bytes': '280.7MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:43245': {   'current_bytes': '236.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.9GB'},\n",
+      "    'tcp://127.0.0.1:46157': {   'current_bytes': '275.1MB',\n",
+      "                                 'peak_bytes': '2.6GB',\n",
+      "                                 'total_bytes': '62.4GB'},\n",
+      "    'tcp://127.0.0.1:46757': {   'current_bytes': '183.6MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '61.7GB'},\n",
+      "    'tcp://127.0.0.1:46883': {   'current_bytes': '167.1MB',\n",
+      "                                 'peak_bytes': '2.8GB',\n",
+      "                                 'total_bytes': '62.0GB'}}\n",
+      "6.32 s ± 0 ns per loop (mean ± std. dev. of 1 run, 30 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit -n30 -r1\n",
+    "\n",
+    "\n",
+    "execution_time, allocation_counts = sample_graph(\n",
+    "    G,\n",
+    "    dask_label_df,\n",
+    "    output_sample_path,\n",
+    "    seed=seed,\n",
+    "    batch_size=batch_size,\n",
+    "    seeds_per_call=seeds_per_call,\n",
+    "    batches_per_partition=batches_per_partition,\n",
+    "    fanout=fanout,\n",
+    "    persist=persist,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27066cf3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.11 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh
new file mode 100755
index 00000000000..e62cb3cda29
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh
@@ -0,0 +1,50 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export RAPIDS_NO_INITIALIZE="1"
+export CUDF_SPILL="1"
+export LIBCUDF_CUFILE_POLICY=OFF
+
+
+dataset_name=$1
+dataset_root=$2
+output_root=$3
+batch_sizes=$4
+fanouts=$5
+reverse_edges=$6
+
+rm -rf $output_root
+mkdir -p $output_root
+
+# Change to 2 in Selene
+gpu_per_replica=4
+#--add_edge_ids \
+
+# Expand to 1, 4, 8 in Selene
+for i in 1,2,3,4:
+do 
+    for replication in 2;
+    do
+        dataset_name_with_replication="${dataset_name}[${replication}]"
+        dask_worker_devices=$(seq -s, 0 $((gpu_per_replica*replication-1)))
+        echo "Sampling dataset = $dataset_name_with_replication on devices = $dask_worker_devices"
+        python3 cugraph_bulk_sampling.py --datasets $dataset_name_with_replication \
+                --dataset_root $dataset_root \
+                --batch_sizes $batch_sizes \
+                --output_root $output_root \
+                --dask_worker_devices $dask_worker_devices \
+                --fanouts $fanouts \
+                --batch_sizes $batch_sizes \
+                --reverse_edges
+    done
+done
\ No newline at end of file
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
new file mode 100644
index 00000000000..d2a3716da8a
--- /dev/null
+++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py
@@ -0,0 +1,740 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import warnings
+import argparse
+import traceback
+
+from cugraph.testing.mg_utils import (
+    generate_edgelist_rmat,
+    # get_allocation_counts_dask_persist,
+    get_allocation_counts_dask_lazy,
+    sizeof_fmt,
+    get_peak_output_ratio_across_workers,
+    restart_client,
+    start_dask_client,
+    stop_dask_client,
+    enable_spilling,
+)
+
+from cugraph.structure.symmetrize import symmetrize
+from cugraph.experimental.gnn import BulkSampler
+
+import cugraph
+
+import json
+import re
+import os
+import gc
+from time import sleep, perf_counter
+from math import ceil
+
+import pandas as pd
+import numpy as np
+import cupy
+import cudf
+
+import dask_cudf
+import dask.dataframe as ddf
+from dask.distributed import default_client
+from cugraph.dask import get_n_workers
+
+from typing import Optional, Union, Dict
+
+
+def construct_graph(dask_dataframe):
+    """
+    Args:
+        dask_dataframe:
+            dask_dataframe contains weighted and undirected edges with self
+            loops. Multiple edges will likely be present as well.
+        directed:
+            If True, the graph will be directed.
+        renumber:
+            If True, the graph will be renumbered.
+    Returns:
+        G:  cugraph.Graph
+    """
+    assert dask_dataframe['src'].dtype == 'int64'
+    assert dask_dataframe['dst'].dtype == 'int64'
+
+    if 'etp' in dask_dataframe.columns:
+        assert dask_dataframe['etp'].dtype == 'int32'
+
+    G = cugraph.MultiGraph(directed=True)
+    G.from_dask_cudf_edgelist(
+        dask_dataframe,
+        source="src", 
+        destination="dst",
+        edge_type='etp' if 'etp' in dask_dataframe.columns else None,
+        renumber=False
+    )
+    return G
+
+
+def symmetrize_ddf(dask_dataframe):
+    source_col, dest_col = symmetrize(
+        dask_dataframe,
+        'src',
+        'dst',
+        multi=True,
+        symmetrize=True,
+    )
+
+    new_ddf = source_col.to_frame()
+    new_ddf['dst'] = dest_col
+
+    return new_ddf
+
+def renumber_ddf(dask_df, persist=False):
+    vertices = dask_cudf.concat([dask_df['src'], dask_df['dst']]).unique().reset_index(drop=True)
+    if persist:
+        vertices = vertices.persist()
+    
+    vertices.name = 'v'
+    vertices = vertices.reset_index().set_index('v').rename(columns={'index': 'm'})
+    if persist:
+        vertices = vertices.persist()
+
+    src = dask_df.merge(vertices, left_on='src', right_on='v', how='left').m.rename('src')
+    dst = dask_df.merge(vertices, left_on='dst', right_on='v', how='left').m.rename('dst')
+    df = src.to_frame()
+    df['dst'] = dst
+
+    return df.reset_index(drop=True)
+
+def _make_batch_ids(bdf: cudf.DataFrame, batch_size: int, num_workers: int, partition_info: Optional[Union[dict, str]] = None):
+    # Required by dask; need to skip dummy partitions.
+    if partition_info is None:
+        return cudf.DataFrame({
+            'batch': cudf.Series(dtype='int32'),
+            'start': cudf.Series(dtype='int64')
+        })
+    
+    partition = partition_info['number']
+    if partition is None:
+        raise ValueError('division is absent')
+
+    num_batches = int(ceil(len(bdf) / batch_size))
+    
+    batch_ids = cupy.repeat(
+        cupy.arange(num_batches * partition, num_batches * (partition + 1), dtype='int32'),
+        batch_size
+    )[:len(bdf)]
+
+    bdf = bdf.reset_index(drop=True)
+    bdf['batch'] = cudf.Series(batch_ids)
+
+    return bdf
+
+
+def _replicate_df(df: cudf.DataFrame, replication_factor: int, col_item_counts:Dict[str, int], partition_info: Optional[Union[dict, str]] = None):
+    # Required by dask; need to skip dummy partitions.
+    if partition_info is None:
+        return cudf.DataFrame({
+            col: cudf.Series(dtype=df[col].dtype) for col in col_item_counts.keys()
+        })
+    
+    original_df = df.copy()
+
+    if replication_factor > 1:
+        for r in range(1, replication_factor):
+            df_replicated = original_df
+            for col, offset in col_item_counts.items():
+                df_replicated[col] += offset * r
+        
+            df = cudf.concat([df, df_replicated], ignore_index=True)
+    
+    return df
+
+
+@get_allocation_counts_dask_lazy(return_allocations=True, logging=True)
+def sample_graph(G, label_df, output_path,seed=42, batch_size=500, seeds_per_call=200000, batches_per_partition=100, fanout=[5, 5, 5], persist=False):
+    cupy.random.seed(seed)
+
+    sampler = BulkSampler(
+        batch_size=batch_size,
+        output_path=output_path,
+        graph=G,
+        fanout_vals=fanout,
+        with_replacement=False,
+        random_state=seed,
+        seeds_per_call=seeds_per_call,
+        batches_per_partition=batches_per_partition,
+        log_level = logging.INFO
+    )
+
+    n_workers = len(default_client().scheduler_info()['workers'])
+
+    meta = cudf.DataFrame({
+        'node': cudf.Series(dtype='int64'),
+        'batch': cudf.Series(dtype='int32')
+    })
+
+    batch_df = label_df.map_partitions(_make_batch_ids, batch_size, n_workers, meta=meta)
+    #batch_df = batch_df.sort_values(by='node')
+    
+    # should always persist the batch dataframe or performance may be suboptimal
+    batch_df = batch_df.persist()
+
+    del label_df
+    print('created batches')
+    
+
+    start_time = perf_counter()
+    sampler.add_batches(batch_df, start_col_name='node', batch_col_name='batch')
+    sampler.flush()
+    end_time = perf_counter()
+    print('flushed all batches')
+    return (end_time - start_time)
+
+
+def assign_offsets_pyg(node_counts: Dict[str, int], replication_factor:int=1):
+    # cuGraph-PyG assigns offsets based on lexicographic order
+    node_offsets = {}
+    node_offsets_replicated = {}
+    count = 0
+    count_replicated = 0
+    for node_type in sorted(node_counts.keys()):
+        node_offsets[node_type] = count
+        node_offsets_replicated[node_type] = count_replicated
+
+        count += node_counts[node_type]
+        count_replicated += node_counts[node_type] * replication_factor
+    
+    return node_offsets, node_offsets_replicated, count_replicated
+
+def generate_rmat_dataset(dataset, seed=62, labeled_percentage=0.01, num_labels=256, reverse_edges=False, persist=False, add_edge_types=False):
+    """
+    Generates an rmat dataset.  Currently does not support heterogeneous datasets.
+
+    Parameters
+    ----------
+    dataset: The specifier of the rmat dataset (i.e. rmat_20_16)
+    seed: The seed to use for random number generation
+    num_labels: The number of classes for the labeled nodes
+    reverse_edges: Whether to reverse the edges in the edgelist (should be True for DGL, False, for PyG)
+    """
+
+    dataset = dataset.split('_')
+    scale = int(dataset[1])
+    edgefactor = int(dataset[2])
+
+    dask_edgelist_df = generate_edgelist_rmat(
+        scale=scale, edgefactor=edgefactor, seed=seed, unweighted=True, mg=True,
+    )
+    dask_edgelist_df = dask_edgelist_df.astype("int64")
+    dask_edgelist_df = dask_edgelist_df.reset_index(drop=True)
+
+
+    dask_edgelist_df = renumber_ddf(dask_edgelist_df).persist()
+    if persist:
+        dask_edgelist_df = dask_edgelist_df.persist()
+
+    dask_edgelist_df = symmetrize_ddf(dask_edgelist_df).persist()
+    if persist:
+        dask_edgelist_df = dask_edgelist_df.persist()
+
+    if add_edge_types:
+        dask_edgelist_df['etp'] = cupy.int32(0) # doesn't matter what the value is, really
+    
+    # generator = np.random.default_rng(seed=seed)
+    num_labeled_nodes = int(2**(scale+1) * labeled_percentage)
+    label_df = pd.DataFrame({
+        'node': np.arange(num_labeled_nodes),
+        # 'label': generator.integers(0, num_labels - 1, num_labeled_nodes).astype('float32')
+    })
+    
+    n_workers = len(default_client().scheduler_info()['workers'])
+    dask_label_df = ddf.from_pandas(label_df, npartitions=n_workers*2)
+    del label_df
+    gc.collect()
+
+    dask_label_df = dask_cudf.from_dask_dataframe(dask_label_df)
+
+    node_offsets = {'paper': 0}
+    edge_offsets = {('paper','cites','paper'):0}
+    total_num_nodes = int(dask_cudf.concat([dask_edgelist_df.src, dask_edgelist_df.dst]).nunique().compute())
+
+    if reverse_edges:
+        dask_edgelist_df = dask_edgelist_df.rename(columns={'src':'dst', 'dst':'src'})
+
+    return dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes
+
+
+def load_disk_dataset(dataset, dataset_dir='.', reverse_edges=True, replication_factor=1, persist=False, add_edge_types=False):
+    from pathlib import Path
+    path = Path(dataset_dir) / dataset
+    parquet_path = path / 'parquet'
+
+    n_workers = get_n_workers()
+
+    with open(os.path.join(path, 'meta.json')) as meta_file:
+        meta = json.load(meta_file)
+    
+    node_offsets, node_offsets_replicated, total_num_nodes = \
+        assign_offsets_pyg(meta['num_nodes'], replication_factor=replication_factor)
+
+    edge_index_dict = {}
+    for edge_type in meta['num_edges'].keys():
+        print(f'Loading edge index for edge type {edge_type}')
+
+        can_edge_type = tuple(edge_type.split('__'))
+        edge_index_dict[can_edge_type] = dask_cudf.read_parquet(
+            Path(parquet_path) / edge_type / 'edge_index.parquet'
+        ).repartition(n_workers*2)
+
+        edge_index_dict[can_edge_type]['src'] += node_offsets_replicated[can_edge_type[0]]
+        edge_index_dict[can_edge_type]['dst'] += node_offsets_replicated[can_edge_type[-1]]
+
+        edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type]
+        if persist:
+            edge_index_dict = edge_index_dict.persist()
+
+        if replication_factor > 1:
+            edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].map_partitions(
+                _replicate_df,
+                replication_factor,
+                {
+                    'src': meta['num_nodes'][can_edge_type[0]],
+                    'dst': meta['num_nodes'][can_edge_type[2]],
+                },
+                meta=cudf.DataFrame({'src':cudf.Series(dtype='int64'), 'dst':cudf.Series(dtype='int64')})
+            )
+            
+            if persist:
+                edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].persist()
+        
+        gc.collect()
+
+        if reverse_edges:
+            edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].rename(columns={'src':'dst','dst':'src'})
+            
+        if persist:
+            edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].persist()
+    
+    # Assign numeric edge type ids based on lexicographic order
+    edge_offsets = {}
+    edge_count = 0
+    for num_edge_type, can_edge_type in enumerate(sorted(edge_index_dict.keys())):
+        if add_edge_types:
+            edge_index_dict[can_edge_type]['etp'] = cupy.int32(num_edge_type)
+        edge_offsets[can_edge_type] = edge_count
+        edge_count += len(edge_index_dict[can_edge_type])
+    
+    all_edges_df = dask_cudf.concat(
+        list(edge_index_dict.values())
+    )
+    
+    if persist:
+        all_edges_df = all_edges_df.persist()
+
+    del edge_index_dict
+    gc.collect()
+
+    node_labels = {}
+    for node_type, offset in node_offsets_replicated.items():
+        print(f'Loading node labels for node type {node_type} (offset={offset})')
+        node_label_path = os.path.join(os.path.join(parquet_path, node_type), 'node_label.parquet')
+        if os.path.exists(node_label_path):
+            node_labels[node_type] = dask_cudf.read_parquet(node_label_path).repartition(n_workers).drop('label',axis=1).persist()
+            node_labels[node_type]['node'] += offset
+            node_labels[node_type] = node_labels[node_type].persist()
+
+            if replication_factor > 1:
+                node_labels[node_type] = node_labels[node_type].map_partitions(
+                    _replicate_df,
+                    replication_factor,
+                    {
+                        'node': meta['num_nodes'][node_type]
+                    },
+                    meta=cudf.DataFrame({'node':cudf.Series(dtype='int64')})
+                )
+                
+                if persist:
+                    node_labels[node_type] = node_labels[node_type].persist()
+
+            gc.collect()
+    
+    node_labels_df = dask_cudf.concat(
+        list(node_labels.values())
+    )
+    
+    if persist:
+        node_labels_df = node_labels_df.persist()
+
+    del node_labels
+    gc.collect()
+
+    return all_edges_df, node_labels_df, node_offsets_replicated, edge_offsets, total_num_nodes
+    
+
+def benchmark_cugraph_bulk_sampling(
+                                    dataset,
+                                    output_path,
+                                    seed,
+                                    batch_size,
+                                    seeds_per_call,
+                                    fanout,
+                                    reverse_edges=True,
+                                    dataset_dir='.',
+                                    replication_factor=1,
+                                    num_labels=256,
+                                    labeled_percentage=0.001,
+                                    persist=False,
+                                    add_edge_types=False):
+    """
+    Entry point for the benchmark.
+
+    Parameters
+    ----------
+    dataset: str
+        The dataset to sample.  Can be rmat_{scale}_{edgefactor}, or the name of an ogb dataset.
+    output_path: str
+        The output path, where samples and metadata will be stored.
+    seed: int
+        The random seed.
+    batch_size: int
+        The batch size (number of input seeds in a single sampling batch).
+    seeds_per_call: int
+        The number of input seeds in a single sampling call.
+    fanout: list[int]
+        The fanout.
+    reverse_edges: bool
+        Whether to reverse edges when constructing the graph.
+    dataset_dir: str
+        The directory where datasets are stored (only for ogb datasets)
+    replication_factor: int
+        The number of times to replicate the dataset.
+    num_labels: int
+        The number of random labels to generate (only for rmat datasets)
+    labeled_percentage: float
+        The percentage of the data that is labeled (only for rmat datasets)
+        Defaults to 0.001 to match papers100M
+    persist: bool
+        Whether to aggressively persist data in dask in attempt to speed up ETL.
+        Defaults to False.
+    add_edge_types: bool
+        Whether to add edge types to the edgelist.
+        Defaults to False.
+    """
+    print(dataset)
+    if dataset[0:4] == 'rmat':
+        dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \
+            generate_rmat_dataset(
+                dataset,
+                reverse_edges=reverse_edges,
+                seed=seed,
+                labeled_percentage=labeled_percentage,
+                num_labels=num_labels,
+                persist=persist,
+                add_edge_types=add_edge_types
+            )
+
+    else:
+        dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \
+            load_disk_dataset(
+                dataset,
+                dataset_dir=dataset_dir,
+                reverse_edges=reverse_edges,
+                replication_factor=replication_factor,
+                persist=persist,
+                add_edge_types=add_edge_types
+            )
+
+    num_input_edges = len(dask_edgelist_df)
+    print(
+        f"Number of input edges = {num_input_edges:,}"
+    )
+
+    G = construct_graph(
+        dask_edgelist_df
+    )
+    del dask_edgelist_df
+    print('constructed graph')
+
+    input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute()
+    print(f'input memory: {input_memory}')
+
+    output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}')
+    os.makedirs(output_subdir)
+
+    output_sample_path = os.path.join(output_subdir, 'samples')
+    os.makedirs(output_sample_path)
+
+    batches_per_partition = 200_000 // batch_size
+    execution_time, allocation_counts = sample_graph(
+        G,
+        dask_label_df,
+        output_sample_path,
+        seed=seed,
+        batch_size=batch_size,
+        seeds_per_call=seeds_per_call,
+        batches_per_partition=batches_per_partition,
+        fanout=fanout,
+        persist=persist,
+    )
+
+    output_meta = {
+        'dataset': dataset,
+        'dataset_dir': dataset_dir,
+        'seed': seed,
+        'node_offsets': node_offsets,
+        'edge_offsets': {'__'.join(k): v for k, v in edge_offsets.items()},
+        'total_num_nodes': total_num_nodes,
+        'total_num_edges': num_input_edges,
+        'batch_size': batch_size,
+        'seeds_per_call': seeds_per_call,
+        'batches_per_partition': batches_per_partition,
+        'fanout': fanout,
+        'replication_factor': replication_factor,
+        'num_sampling_gpus': len(G._plc_graph),
+        'execution_time': execution_time,
+    }
+
+    with open(os.path.join(output_subdir, 'output_meta.json'), 'w') as f:
+        json.dump(
+            output_meta,
+            f,
+            indent='\t'
+        )
+
+    print('allocation counts b:')
+    print(allocation_counts.values())
+
+    (
+        input_to_peak_ratio,
+        output_to_peak_ratio,
+        input_memory_per_worker,
+        peak_allocation_across_workers,
+    ) = get_memory_statistics(
+        allocation_counts=allocation_counts, input_memory=input_memory
+    )
+    print(f"Number of edges in final graph = {G.number_of_edges():,}")
+    print("-" * 80)
+    return (
+        num_input_edges,
+        input_to_peak_ratio,
+        output_to_peak_ratio,
+        input_memory_per_worker,
+        peak_allocation_across_workers,
+    )
+
+
+def get_memory_statistics(allocation_counts, input_memory):
+    """
+    Get memory statistics for the benchmark.
+    """
+    output_to_peak_ratio = get_peak_output_ratio_across_workers(allocation_counts)
+    peak_allocation_across_workers = max(
+        [a["peak_bytes"] for a in allocation_counts.values()]
+    )
+    input_memory_per_worker = input_memory / len(allocation_counts.keys())
+    input_to_peak_ratio = peak_allocation_across_workers / input_memory_per_worker
+    print(f"Edge List Memory = {sizeof_fmt(input_memory_per_worker)}")
+    print(f"Peak Memory across workers = {sizeof_fmt(peak_allocation_across_workers)}")
+    print(f"Max Peak to output graph ratio across workers = {output_to_peak_ratio:.2f}")
+    print(
+        f"Max Peak to avg input graph ratio across workers = {input_to_peak_ratio:.2f}"
+    )
+    return (
+        input_to_peak_ratio,
+        output_to_peak_ratio,
+        input_memory_per_worker,
+        peak_allocation_across_workers,
+    )
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument(
+        '--output_root',
+        type=str,
+        help='The output root directory.  File/folder names are auto-generated.',
+        required=True,
+    )
+
+    parser.add_argument(
+        '--dataset_root',
+        type=str,
+        help='The dataset root directory containing ogb datasets.',
+        required=True,
+    )
+
+    parser.add_argument(
+        '--datasets',
+        type=str,
+        help=(
+            'Comma separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16).'
+            ' For ogb datasets, can provide replication factor using brackets.'
+        ),
+        required=True,
+    )
+
+    parser.add_argument(
+        '--fanouts',
+        type=str,
+        help='Comma separated list of fanouts (i.e. 10_25,5_5_5)',
+        required=False,
+        default='10_25',
+    )
+
+    parser.add_argument(
+        '--batch_sizes',
+        type=str,
+        help='Comma separated list of batch sizes (i.e. 500,1000)',
+        required=False,
+        default='512,1024'
+    )
+
+    parser.add_argument(
+        '--seeds_per_call_opts',
+        type=str,
+        help='Comma separated list of seeds per call (i.e. 1000000,2000000)',
+        required=False,
+        default='524288',
+    )
+    
+    parser.add_argument(
+        '--reverse_edges',
+        action='store_true',
+        help='Whether to reverse the edges for DGL (defaults to False).  Should be True for DGL, False for PyG.',
+        required=False,
+        default=False,
+    )
+
+    parser.add_argument(
+        '--dask_worker_devices',
+        type=str,
+        help='Comma separated list of dask worker devices',
+        required=False,
+        default="0"
+    )
+
+    parser.add_argument(
+        '--random_seed',
+        type=int,
+        help='Random seed',
+        required=False,
+        default=62
+    )
+
+    parser.add_argument(
+        '--persist',
+        action='store_true',
+        help='Will add additional persist() calls to speed up ETL.  Does not affect sampling runtime.',
+        required=False,
+        default=False,
+    )
+
+    parser.add_argument(
+        '--add_edge_types',
+        action='store_true',
+        help='Adds edge types to the edgelist.  Required for PyG if not providing edge ids.',
+        required=False,
+        default=False,
+    )
+
+    return parser.parse_args()
+
+
+# call __main__ function
+if __name__ == "__main__":
+    logging.basicConfig()
+
+    args = get_args()
+    fanouts = [[int(f) for f in fanout.split('_')] for fanout in args.fanouts.split(',')]
+    datasets = args.datasets.split(',')
+    batch_sizes = [int(b) for b in args.batch_sizes.split(',')]
+    seeds_per_call_opts = [int(s) for s in args.seeds_per_call_opts.split(',')]
+    dask_worker_devices = [int(d) for d in args.dask_worker_devices.split(',')]
+
+    client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices, jit_unspill=False, rmm_pool_size=28e9, rmm_async=True)
+    enable_spilling()
+    stats_ls = []
+    client.run(enable_spilling)
+    for dataset in datasets:
+        if re.match(r'([A-z]|[0-9])+\[[0-9]+\]', dataset):
+            replication_factor = int(dataset[-2])
+            dataset = dataset[:-3]
+        else:
+            replication_factor = 1
+
+        for fanout in fanouts:
+            for batch_size in batch_sizes:
+                for seeds_per_call in seeds_per_call_opts:
+                    print(f'dataset: {dataset}')
+                    print(f'batch size: {batch_size}')
+                    print(f'fanout: {fanout}')
+                    print(f'seeds_per_call: {seeds_per_call}')
+
+                    try:
+                        stats_d = {}
+                        (
+                            num_input_edges,
+                            input_to_peak_ratio,
+                            output_to_peak_ratio,
+                            input_memory_per_worker,
+                            peak_allocation_across_workers,
+                        ) = benchmark_cugraph_bulk_sampling(
+                            dataset=dataset,
+                            output_path=args.output_root,
+                            seed=args.random_seed,
+                            batch_size=batch_size,
+                            seeds_per_call=seeds_per_call,
+                            fanout=fanout,
+                            dataset_dir=args.dataset_root,
+                            reverse_edges=args.reverse_edges,
+                            replication_factor=replication_factor,
+                            persist=args.persist,
+                            add_edge_types=args.add_edge_types,
+                        )
+                        stats_d["dataset"] = dataset
+                        stats_d["num_input_edges"] = num_input_edges
+                        stats_d["batch_size"] = batch_size
+                        stats_d["fanout"] = fanout
+                        stats_d["seeds_per_call"] = seeds_per_call
+                        stats_d["input_memory_per_worker"] = sizeof_fmt(input_memory_per_worker)
+                        stats_d["peak_allocation_across_workers"] = sizeof_fmt(
+                            peak_allocation_across_workers
+                        )
+                        stats_d["input_to_peak_ratio"] = input_to_peak_ratio
+                        stats_d["output_to_peak_ratio"] = output_to_peak_ratio
+                        stats_ls.append(stats_d)
+                    except Exception as e:
+                        warnings.warn('An Exception Occurred!')
+                        print(e)
+                        traceback.print_exc()
+                    restart_client(client)
+                    sleep(10)
+
+        stats_df = pd.DataFrame(
+            stats_ls,
+            columns=[
+                "dataset",
+                "num_input_edges",
+                "directed",
+                "renumber",
+                "input_memory_per_worker",
+                "peak_allocation_across_workers",
+                "input_to_peak_ratio",
+                "output_to_peak_ratio",
+            ],
+        )
+        stats_df.to_csv("cugraph_sampling_stats.csv")
+        print("-" * 40 + f"dataset = {dataset} completed" + "-" * 40)
+
+    # Cleanup Dask Cluster
+    stop_dask_client(client, cluster)
diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
index 3bb98ce4150..cf9cba2af4d 100644
--- a/cpp/include/cugraph/algorithms.hpp
+++ b/cpp/include/cugraph/algorithms.hpp
@@ -378,10 +378,11 @@ rmm::device_uvector<weight_t> betweenness_centrality(
  * @param normalized         A flag indicating whether or not to normalize the result
  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
  *
- * @return device vector containing the centralities.
+ * @return edge_property_t containing the centralities.
  */
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
-rmm::device_uvector<weight_t> edge_betweenness_centrality(
+edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
@@ -1181,6 +1182,9 @@ void sssp(raft::handle_t const& handle,
 /**
  * @brief Compute PageRank scores.
  *
+ * @deprecated This API will be deprecated to replaced by the new version below
+ *             that returns metadata about the algorithm.
+ *
  * This function computes general (if @p personalization_vertices is `nullptr`) or personalized (if
  * @p personalization_vertices is not `nullptr`.) PageRank scores.
  *
@@ -1236,6 +1240,74 @@ void pagerank(raft::handle_t const& handle,
               bool has_initial_guess  = false,
               bool do_expensive_check = false);
 
+/**
+ * @brief Metadata about the execution of one of the centrality algorithms
+ */
+// FIXME:  This structure should be propagated to other algorithms that converge
+//   (eigenvector centrality, hits and katz centrality)
+//
+struct centrality_algorithm_metadata_t {
+  size_t number_of_iterations_{};
+  bool converged_{};
+};
+
+/**
+ * @brief Compute PageRank scores.
+ *
+ * This function computes general (if @p personalization_vertices is `nullptr`) or personalized (if
+ * @p personalization_vertices is not `nullptr`.) PageRank scores.
+ *
+ * @throws cugraph::logic_error on erroneous input arguments or if fails to converge before @p
+ * max_iterations.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam result_t Type of PageRank scores.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0.
+ * @param precomputed_vertex_out_weight_sums Pointer to an array storing sums of out-going edge
+ * weights for the vertices (for re-use) or `std::nullopt`. If `std::nullopt`, these values are
+ * freshly computed. Computing these values outside this function reduces the number of memory
+ * allocations/deallocations and computing if a user repeatedly computes PageRank scores using the
+ * same graph with different personalization vectors.
+ * @param personalization Optional tuple containing device spans of vertex identifiers and
+ * personalization values for the vertices (compute personalized PageRank) or `std::nullopt`
+ * (compute general PageRank).
+ * @param initial_pageranks Optional device span containing initial PageRank values.  If
+ * specified this array will be used as the initial values and the PageRank values will be
+ * updated in place.  If not specified then the initial values will be set to 1.0 divided by
+ * the number of vertices in the graph and the return value will contain an `rmm::device_uvector`
+ * containing the resulting PageRank values.
+ * @param alpha PageRank damping factor.
+ * @param epsilon Error tolerance to check convergence. Convergence is assumed if the sum of the
+ * differences in PageRank values between two consecutive iterations is less than the number of
+ * vertices in the graph multiplied by @p epsilon.
+ * @param max_iterations Maximum number of PageRank iterations.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing the optional pagerank results (populated if @p initial_pageranks is
+ * set to `std::nullopt`) and a metadata structure with metadata indicating how many iterations
+ * were run and whether the algorithm converged or not.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
+std::tuple<rmm::device_uvector<result_t>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, true, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<raft::device_span<weight_t const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<vertex_t const>, raft::device_span<result_t const>>>
+    personalization,
+  std::optional<raft::device_span<result_t const>> initial_pageranks,
+  result_t alpha,
+  result_t epsilon,
+  size_t max_iterations   = 500,
+  bool do_expensive_check = false);
+
 /**
  * @brief Compute Eigenvector Centrality scores.
  *
diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
index 81ece768edb..cd8739114f2 100644
--- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh
+++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -190,9 +190,12 @@ void decompress_edge_partition_to_edgelist(
   edge_partition_device_view_t<vertex_t, edge_t, multi_gpu> edge_partition,
   std::optional<edge_partition_edge_property_device_view_t<edge_t, weight_t const*>>
     edge_partition_weight_view,
+  std::optional<edge_partition_edge_property_device_view_t<edge_t, edge_t const*>>
+    edge_partition_id_view,
   vertex_t* edgelist_majors /* [OUT] */,
   vertex_t* edgelist_minors /* [OUT] */,
   std::optional<weight_t*> edgelist_weights /* [OUT] */,
+  std::optional<edge_t*> edgelist_ids /* [OUT] */,
   std::optional<std::vector<vertex_t>> const& segment_offsets)
 {
   auto number_of_edges = edge_partition.number_of_edges();
@@ -203,6 +206,13 @@ void decompress_edge_partition_to_edgelist(
                edge_partition.indices(),
                edge_partition.indices() + number_of_edges,
                edgelist_minors);
+  if (edge_partition_id_view) {
+    assert(edgelist_ids.has_value());
+    thrust::copy(handle.get_thrust_policy(),
+                 (*edge_partition_id_view).value_first(),
+                 (*edge_partition_id_view).value_first() + number_of_edges,
+                 (*edgelist_ids));
+  }
   if (edge_partition_weight_view) {
     assert(edgelist_weights.has_value());
     thrust::copy(handle.get_thrust_policy(),
diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp
index 1c01568ae17..017b32d0470 100644
--- a/cpp/include/cugraph/graph_functions.hpp
+++ b/cpp/include/cugraph/graph_functions.hpp
@@ -350,12 +350,14 @@ void renumber_local_ext_vertices(raft::handle_t const& handle,
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
  * handles to various CUDA libraries) to run graph algorithms.
  * @param graph_view Graph view object of the graph to be decompressed.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
  * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
  * @param renumber_map If valid, return the renumbered edge list based on the provided @p
  * renumber_map
  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
- * @return Tuple of edge sources, destinations, and (optional) edge weights (if @p
- * edge_weight_view.has_value() is true).
+ * @return Tuple of edge sources, destinations, (optional) edge weights (if
+ * @p edge_weight_view.has_value() is true) and (optional) edge ids (if
+ * @p edge_id_view.has_value() is true).
  */
 template <typename vertex_t,
           typename edge_t,
@@ -364,11 +366,13 @@ template <typename vertex_t,
           bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>,
            rmm::device_uvector<vertex_t>,
-           std::optional<rmm::device_uvector<weight_t>>>
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>>
 decompress_to_edgelist(
   raft::handle_t const& handle,
   graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
   std::optional<raft::device_span<vertex_t const>> renumber_map,
   bool do_expensive_check = false);
 
diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh
index d29e7c47d14..501e74cf47b 100644
--- a/cpp/include/cugraph/utilities/device_functors.cuh
+++ b/cpp/include/cugraph/utilities/device_functors.cuh
@@ -57,16 +57,28 @@ struct pack_bool_t {
   }
 };
 
-template <typename Iterator>
+template <typename index_t, typename Iterator>
 struct indirection_t {
   Iterator first{};
 
-  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(size_t i) const
+  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
   {
     return *(first + i);
   }
 };
 
+template <typename index_t, typename Iterator>
+struct indirection_if_idx_valid_t {
+  Iterator first{};
+  index_t invalid_idx{};
+  typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
+
+  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
+  {
+    return (i != invalid_idx) ? *(first + i) : invalid_value;
+  }
+};
+
 template <typename T>
 struct not_equal_t {
   T compare{};
diff --git a/cpp/include/cugraph_c/centrality_algorithms.h b/cpp/include/cugraph_c/centrality_algorithms.h
index 5fa3520a9cb..0ac0e58540f 100644
--- a/cpp/include/cugraph_c/centrality_algorithms.h
+++ b/cpp/include/cugraph_c/centrality_algorithms.h
@@ -56,6 +56,22 @@ cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_vertices(
 cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_values(
   cugraph_centrality_result_t* result);
 
+/**
+ * @brief     Get the number of iterations executed from the algorithm metadata
+ *
+ * @param [in]   result   The result from a centrality algorithm
+ * @return the number of iterations
+ */
+size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t* result);
+
+/**
+ * @brief     Returns true if the centrality algorithm converged
+ *
+ * @param [in]   result   The result from a centrality algorithm
+ * @return True if the centrality algorithm converged, false otherwise
+ */
+bool_t cugraph_centrality_result_converged(cugraph_centrality_result_t* result);
+
 /**
  * @brief     Free centrality result
  *
@@ -114,9 +130,68 @@ cugraph_error_code_t cugraph_pagerank(
   cugraph_centrality_result_t** result,
   cugraph_error_t** error);
 
+/**
+ * @brief     Compute pagerank
+ *
+ * @deprecated This version of pagerank should be dropped in favor
+ *             of the cugraph_pagerank_allow_nonconvergence version.
+ *             Eventually that version will be renamed to this version.
+ *
+ * @param [in]  handle      Handle for accessing resources
+ * @param [in]  graph       Pointer to graph
+ * @param [in]  precomputed_vertex_out_weight_vertices
+ *                          Optionally send in precomputed sum of vertex out weights
+ *                          (a performance optimization).  This defines the vertices.
+ *                          Set to NULL if no value is passed.
+ * @param [in]  precomputed_vertex_out_weight_sums
+ *                          Optionally send in precomputed sum of vertex out weights
+ *                          (a performance optimization).  Set to NULL if
+ *                          no value is passed.
+ * @param [in]  initial_guess_vertices
+ *                          Optionally send in an initial guess of the pagerank values
+ *                          (a performance optimization).  This defines the vertices.
+ *                          Set to NULL if no value is passed. If NULL, initial PageRank
+ *                          values are set to 1.0 divided by the number of vertices in
+ *                          the graph.
+ * @param [in]  initial_guess_values
+ *                          Optionally send in an initial guess of the pagerank values
+ *                          (a performance optimization).  Set to NULL if
+ *                          no value is passed. If NULL, initial PageRank values are set
+ *                          to 1.0 divided by the number of vertices in the graph.
+ * @param [in]  alpha       PageRank damping factor.
+ * @param [in]  epsilon     Error tolerance to check convergence. Convergence is assumed
+ *                          if the sum of the differences in PageRank values between two
+ *                          consecutive iterations is less than the number of vertices
+ *                          in the graph multiplied by @p epsilon.
+ * @param [in]  max_iterations Maximum number of PageRank iterations.
+ * @param [in]  do_expensive_check A flag to run expensive checks for input arguments (if set to
+ * `true`).
+ * @param [out] result      Opaque pointer to pagerank results
+ * @param [out] error       Pointer to an error object storing details of any error.  Will
+ *                          be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_pagerank_allow_nonconvergence(
+  const cugraph_resource_handle_t* handle,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+  const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+  const cugraph_type_erased_device_array_view_t* initial_guess_values,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool_t do_expensive_check,
+  cugraph_centrality_result_t** result,
+  cugraph_error_t** error);
+
 /**
  * @brief     Compute personalized pagerank
  *
+ * @deprecated This version of personalized pagerank should be dropped in favor
+ *             of the cugraph_personalized_pagerank_allow_nonconvergence version.
+ *             Eventually that version will be renamed to this version.
+ *
  * @param [in]  handle      Handle for accessing resources
  * @param [in]  graph       Pointer to graph
  * @param [in]  precomputed_vertex_out_weight_vertices
@@ -171,6 +246,63 @@ cugraph_error_code_t cugraph_personalized_pagerank(
   cugraph_centrality_result_t** result,
   cugraph_error_t** error);
 
+/**
+ * @brief     Compute personalized pagerank
+ *
+ * @param [in]  handle      Handle for accessing resources
+ * @param [in]  graph       Pointer to graph
+ * @param [in]  precomputed_vertex_out_weight_vertices
+ *                          Optionally send in precomputed sum of vertex out weights
+ *                          (a performance optimization).  This defines the vertices.
+ *                          Set to NULL if no value is passed.
+ * @param [in]  precomputed_vertex_out_weight_sums
+ *                          Optionally send in precomputed sum of vertex out weights
+ *                          (a performance optimization).  Set to NULL if
+ *                          no value is passed.
+ * @param [in]  initial_guess_vertices
+ *                          Optionally send in an initial guess of the pagerank values
+ *                          (a performance optimization).  This defines the vertices.
+ *                          Set to NULL if no value is passed. If NULL, initial PageRank
+ *                          values are set to 1.0 divided by the number of vertices in
+ *                          the graph.
+ * @param [in]  initial_guess_values
+ *                          Optionally send in an initial guess of the pagerank values
+ *                          (a performance optimization).  Set to NULL if
+ *                          no value is passed. If NULL, initial PageRank values are set
+ *                          to 1.0 divided by the number of vertices in the graph.
+ * @param [in]  personalization_vertices Pointer to an array storing personalization vertex
+ * identifiers (compute personalized PageRank).
+ * @param [in]  personalization_values Pointer to an array storing personalization values for the
+ * vertices in the personalization set.
+ * @param [in]  alpha       PageRank damping factor.
+ * @param [in]  epsilon     Error tolerance to check convergence. Convergence is assumed
+ *                          if the sum of the differences in PageRank values between two
+ *                          consecutive iterations is less than the number of vertices
+ *                          in the graph multiplied by @p epsilon.
+ * @param [in]  max_iterations Maximum number of PageRank iterations.
+ * @param [in]  do_expensive_check A flag to run expensive checks for input arguments (if set to
+ * `true`).
+ * @param [out] result      Opaque pointer to pagerank results
+ * @param [out] error       Pointer to an error object storing details of any error.  Will
+ *                          be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_personalized_pagerank_allow_nonconvergence(
+  const cugraph_resource_handle_t* handle,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+  const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+  const cugraph_type_erased_device_array_view_t* initial_guess_values,
+  const cugraph_type_erased_device_array_view_t* personalization_vertices,
+  const cugraph_type_erased_device_array_view_t* personalization_values,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool_t do_expensive_check,
+  cugraph_centrality_result_t** result,
+  cugraph_error_t** error);
+
 /**
  * @brief     Compute eigenvector centrality
  *
@@ -294,6 +426,15 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_src_
 cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_dst_vertices(
   cugraph_edge_centrality_result_t* result);
 
+/**
+ * @brief     Get the edge ids from an edge centrality result
+ *
+ * @param [in]   result   The result from an edge centrality algorithm
+ * @return type erased array of edge ids
+ */
+cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_edge_ids(
+  cugraph_edge_centrality_result_t* result);
+
 /**
  * @brief     Get the centrality values from an edge centrality algorithm result
  *
diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
index f097f9c43a2..ab14ff6c685 100644
--- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
+++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
@@ -27,7 +27,8 @@
 
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/device_atomics.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/hashing/detail/default_hash.cuh>
+#include <cudf/hashing/detail/hash_functions.cuh>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -118,7 +119,7 @@ union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
  */
 template <typename Key,
           typename Element,
-          typename Hasher    = cudf::detail::default_hash<Key>,
+          typename Hasher    = cudf::hashing::detail::default_hash<Key>,
           typename Equality  = equal_to<Key>,
           typename Allocator = default_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
diff --git a/cpp/src/c_api/betweenness_centrality.cpp b/cpp/src/c_api/betweenness_centrality.cpp
index 0387b050262..3cf3e92e960 100644
--- a/cpp/src/c_api/betweenness_centrality.cpp
+++ b/cpp/src/c_api/betweenness_centrality.cpp
@@ -144,7 +144,7 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_list_{};
   bool_t normalized_{};
   bool do_expensive_check_{};
-  cugraph::c_api::cugraph_centrality_result_t* result_{};
+  cugraph::c_api::cugraph_edge_centrality_result_t* result_{};
 
   edge_betweenness_centrality_functor(cugraph_resource_handle_t const* handle,
                                       cugraph_graph_t* graph,
@@ -190,6 +190,10 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun
         cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>,
                                  weight_t>*>(graph_->edge_weights_);
 
+      auto edge_ids = reinterpret_cast<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>,
+                                 edge_t>*>(graph_->edge_ids_);
+
       auto number_map = reinterpret_cast<rmm::device_uvector<vertex_t>*>(graph_->number_map_);
 
       rmm::device_uvector<vertex_t> local_vertices(0, handle_.get_stream());
@@ -230,14 +234,24 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun
           normalized_,
           do_expensive_check_);
 
-      CUGRAPH_FAIL("Need to clean up return type");
+      auto [src_ids, dst_ids, output_centralities, output_edge_ids] =
+        cugraph::decompress_to_edgelist(
+          handle_,
+          graph_view,
+          std::make_optional(centralities.view()),
+          (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt,
+          (number_map != nullptr) ? std::make_optional(raft::device_span<vertex_t const>{
+                                      number_map->data(), number_map->size()})
+                                  : std::nullopt);
 
-#if 0
       result_ = new cugraph::c_api::cugraph_edge_centrality_result_t{
         new cugraph::c_api::cugraph_type_erased_device_array_t(src_ids, graph_->vertex_type_),
         new cugraph::c_api::cugraph_type_erased_device_array_t(dst_ids, graph_->vertex_type_),
-        new cugraph::c_api::cugraph_type_erased_device_array_t(centralities, graph_->weight_type_)};
-#endif
+        output_edge_ids ? new cugraph::c_api::cugraph_type_erased_device_array_t(*output_edge_ids,
+                                                                                 graph_->edge_type_)
+                        : nullptr,
+        new cugraph::c_api::cugraph_type_erased_device_array_t(*output_centralities,
+                                                               graph_->weight_type_)};
     }
   }
 };
diff --git a/cpp/src/c_api/centrality_result.cpp b/cpp/src/c_api/centrality_result.cpp
index c3ded9fbd89..08e7c0341f2 100644
--- a/cpp/src/c_api/centrality_result.cpp
+++ b/cpp/src/c_api/centrality_result.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,6 +34,18 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_centrality_result_ge
     internal_pointer->values_->view());
 }
 
+size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t* result)
+{
+  auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_centrality_result_t*>(result);
+  return internal_pointer->num_iterations_;
+}
+
+bool_t cugraph_centrality_result_converged(cugraph_centrality_result_t* result)
+{
+  auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_centrality_result_t*>(result);
+  return internal_pointer->converged_ ? bool_t::TRUE : bool_t::FALSE;
+}
+
 extern "C" void cugraph_centrality_result_free(cugraph_centrality_result_t* result)
 {
   auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_centrality_result_t*>(result);
diff --git a/cpp/src/c_api/centrality_result.hpp b/cpp/src/c_api/centrality_result.hpp
index e39db686152..e0acde9cce3 100644
--- a/cpp/src/c_api/centrality_result.hpp
+++ b/cpp/src/c_api/centrality_result.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,11 +24,14 @@ namespace c_api {
 struct cugraph_centrality_result_t {
   cugraph_type_erased_device_array_t* vertex_ids_{};
   cugraph_type_erased_device_array_t* values_{};
+  size_t num_iterations_{0};
+  bool converged_{false};
 };
 
 struct cugraph_edge_centrality_result_t {
   cugraph_type_erased_device_array_t* src_ids_{};
   cugraph_type_erased_device_array_t* dst_ids_{};
+  cugraph_type_erased_device_array_t* edge_ids_{};
   cugraph_type_erased_device_array_t* values_{};
 };
 
diff --git a/cpp/src/c_api/pagerank.cpp b/cpp/src/c_api/pagerank.cpp
index 2565a1aebe2..50eda152c67 100644
--- a/cpp/src/c_api/pagerank.cpp
+++ b/cpp/src/c_api/pagerank.cpp
@@ -120,9 +120,7 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor {
 
       auto number_map = reinterpret_cast<rmm::device_uvector<vertex_t>*>(graph_->number_map_);
 
-      rmm::device_uvector<weight_t> pageranks(graph_view.local_vertex_partition_range_size(),
-                                              handle_.get_stream());
-
+      rmm::device_uvector<weight_t> initial_pageranks(0, handle_.get_stream());
       rmm::device_uvector<vertex_t> personalization_vertices(0, handle_.get_stream());
       rmm::device_uvector<weight_t> personalization_values(0, handle_.get_stream());
 
@@ -201,7 +199,7 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor {
                    initial_guess_values.size(),
                    handle_.get_stream());
 
-        pageranks = cugraph::detail::
+        initial_pageranks = cugraph::detail::
           collect_local_vertex_values_from_ext_vertex_value_pairs<vertex_t, weight_t, multi_gpu>(
             handle_,
             std::move(initial_guess_vertices),
@@ -213,25 +211,30 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor {
             do_expensive_check_);
       }
 
-      cugraph::pagerank<vertex_t, edge_t, weight_t, weight_t, multi_gpu>(
-        handle_,
-        graph_view,
-        (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
-        precomputed_vertex_out_weight_sums_
-          ? std::make_optional(precomputed_vertex_out_weight_sums.data())
-          : std::nullopt,
-        personalization_vertices_ ? std::make_optional(personalization_vertices.data())
-                                  : std::nullopt,
-        personalization_values_ ? std::make_optional(personalization_values.data()) : std::nullopt,
-        personalization_vertices_
-          ? std::make_optional(static_cast<vertex_t>(personalization_vertices.size()))
-          : std::nullopt,
-        pageranks.data(),
-        static_cast<weight_t>(alpha_),
-        static_cast<weight_t>(epsilon_),
-        max_iterations_,
-        initial_guess_values_ != nullptr,
-        do_expensive_check_);
+      auto [pageranks, metadata] =
+        cugraph::pagerank<vertex_t, edge_t, weight_t, weight_t, multi_gpu>(
+          handle_,
+          graph_view,
+          (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
+          precomputed_vertex_out_weight_sums_
+            ? std::make_optional(
+                raft::device_span<weight_t const>{precomputed_vertex_out_weight_sums.data(),
+                                                  precomputed_vertex_out_weight_sums.size()})
+            : std::nullopt,
+          personalization_vertices_
+            ? std::make_optional(
+                std::make_tuple(raft::device_span<vertex_t const>{personalization_vertices.data(),
+                                                                  personalization_vertices.size()},
+                                raft::device_span<weight_t const>{personalization_values.data(),
+                                                                  personalization_values.size()}))
+            : std::nullopt,
+          initial_guess_values_ != nullptr ? std::make_optional(raft::device_span<weight_t const>{
+                                               initial_pageranks.data(), initial_pageranks.size()})
+                                           : std::nullopt,
+          static_cast<weight_t>(alpha_),
+          static_cast<weight_t>(epsilon_),
+          max_iterations_,
+          do_expensive_check_);
 
       rmm::device_uvector<vertex_t> vertex_ids(graph_view.local_vertex_partition_range_size(),
                                                handle_.get_stream());
@@ -239,7 +242,9 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor {
 
       result_ = new cugraph::c_api::cugraph_centrality_result_t{
         new cugraph::c_api::cugraph_type_erased_device_array_t(vertex_ids, graph_->vertex_type_),
-        new cugraph::c_api::cugraph_type_erased_device_array_t(pageranks, graph_->weight_type_)};
+        new cugraph::c_api::cugraph_type_erased_device_array_t(pageranks, graph_->weight_type_),
+        metadata.number_of_iterations_,
+        metadata.converged_};
     }
   }
 };
@@ -305,6 +310,75 @@ extern "C" cugraph_error_code_t cugraph_pagerank(
                            max_iterations,
                            do_expensive_check);
 
+  auto return_value = cugraph::c_api::run_algorithm(graph, functor, result, error);
+
+  CAPI_EXPECTS(cugraph_centrality_result_converged(*result) == bool_t::TRUE,
+               CUGRAPH_UNKNOWN_ERROR,
+               "PageRank failed to converge.",
+               *error);
+
+  return return_value;
+}
+
+extern "C" cugraph_error_code_t cugraph_pagerank_allow_nonconvergence(
+  const cugraph_resource_handle_t* handle,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+  const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+  const cugraph_type_erased_device_array_view_t* initial_guess_values,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool_t do_expensive_check,
+  cugraph_centrality_result_t** result,
+  cugraph_error_t** error)
+{
+  if (precomputed_vertex_out_weight_vertices != nullptr) {
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     precomputed_vertex_out_weight_vertices)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and precomputed_vertex_out_weight_vertices must match",
+                 *error);
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->weight_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     precomputed_vertex_out_weight_sums)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and precomputed_vertex_out_weight_sums must match",
+                 *error);
+  }
+  if (initial_guess_vertices != nullptr) {
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     initial_guess_vertices)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and initial_guess_vertices must match",
+                 *error);
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->weight_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     initial_guess_values)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and initial_guess_values must match",
+                 *error);
+  }
+  pagerank_functor functor(handle,
+                           graph,
+                           precomputed_vertex_out_weight_vertices,
+                           precomputed_vertex_out_weight_sums,
+                           initial_guess_vertices,
+                           initial_guess_values,
+                           nullptr,
+                           nullptr,
+                           alpha,
+                           epsilon,
+                           max_iterations,
+                           do_expensive_check);
+
   return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }
 
@@ -373,6 +447,94 @@ extern "C" cugraph_error_code_t cugraph_personalized_pagerank(
                  *error);
   }
 
+  pagerank_functor functor(handle,
+                           graph,
+                           precomputed_vertex_out_weight_vertices,
+                           precomputed_vertex_out_weight_sums,
+                           initial_guess_vertices,
+                           initial_guess_values,
+                           personalization_vertices,
+                           personalization_values,
+                           alpha,
+                           epsilon,
+                           max_iterations,
+                           do_expensive_check);
+
+  auto return_value = cugraph::c_api::run_algorithm(graph, functor, result, error);
+
+  CAPI_EXPECTS(cugraph_centrality_result_converged(*result) == bool_t::TRUE,
+               CUGRAPH_UNKNOWN_ERROR,
+               "PageRank failed to converge.",
+               *error);
+
+  return return_value;
+}
+
+extern "C" cugraph_error_code_t cugraph_personalized_pagerank_allow_nonconvergence(
+  const cugraph_resource_handle_t* handle,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+  const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+  const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+  const cugraph_type_erased_device_array_view_t* initial_guess_values,
+  const cugraph_type_erased_device_array_view_t* personalization_vertices,
+  const cugraph_type_erased_device_array_view_t* personalization_values,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool_t do_expensive_check,
+  cugraph_centrality_result_t** result,
+  cugraph_error_t** error)
+{
+  if (precomputed_vertex_out_weight_vertices != nullptr) {
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     precomputed_vertex_out_weight_vertices)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and precomputed_vertex_out_weight_vertices must match",
+                 *error);
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->weight_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     precomputed_vertex_out_weight_sums)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and precomputed_vertex_out_weight_sums must match",
+                 *error);
+  }
+  if (initial_guess_vertices != nullptr) {
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     initial_guess_vertices)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and initial_guess_vertices must match",
+                 *error);
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->weight_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     initial_guess_values)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and initial_guess_values must match",
+                 *error);
+  }
+  if (personalization_vertices != nullptr) {
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     personalization_vertices)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and personalization_vector must match",
+                 *error);
+    CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->weight_type_ ==
+                   reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                     personalization_values)
+                     ->type_,
+                 CUGRAPH_INVALID_INPUT,
+                 "vertex type of graph and personalization_vector must match",
+                 *error);
+  }
+
   pagerank_functor functor(handle,
                            graph,
                            precomputed_vertex_out_weight_vertices,
diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh
index 5631fadde96..0a87531d6ca 100644
--- a/cpp/src/centrality/betweenness_centrality_impl.cuh
+++ b/cpp/src/centrality/betweenness_centrality_impl.cuh
@@ -16,8 +16,12 @@
 #pragma once
 
 #include <prims/count_if_v.cuh>
+#include <prims/edge_bucket.cuh>
+#include <prims/extract_transform_e.cuh>
 #include <prims/extract_transform_v_frontier_outgoing_e.cuh>
+#include <prims/fill_edge_property.cuh>
 #include <prims/per_v_transform_reduce_incoming_outgoing_e.cuh>
+#include <prims/transform_e.cuh>
 #include <prims/transform_reduce_v.cuh>
 #include <prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh>
 #include <prims/update_edge_src_dst_property.cuh>
@@ -55,6 +59,24 @@ struct brandes_e_op_t {
   }
 };
 
+template <typename vertex_t>
+struct extract_edge_e_op_t {
+  vertex_t d{};
+
+  template <typename edge_t, typename weight_t>
+  __device__ thrust::optional<thrust::tuple<vertex_t, vertex_t>> operator()(
+    vertex_t src,
+    vertex_t dst,
+    thrust::tuple<vertex_t, edge_t, weight_t> src_props,
+    thrust::tuple<vertex_t, edge_t, weight_t> dst_props,
+    weight_t edge_centrality)
+  {
+    return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1)))
+             ? thrust::optional<thrust::tuple<vertex_t, vertex_t>>{thrust::make_tuple(src, dst)}
+             : thrust::nullopt;
+  }
+};
+
 }  // namespace
 
 namespace cugraph {
@@ -77,16 +99,16 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
   constexpr int bucket_idx_cur{0};
   constexpr int bucket_idx_next{1};
 
-  rmm::device_uvector<edge_t> sigma(graph_view.local_vertex_partition_range_size(),
-                                    handle.get_stream());
-  rmm::device_uvector<vertex_t> distance(graph_view.local_vertex_partition_range_size(),
-                                         handle.get_stream());
-  detail::scalar_fill(handle, distance.data(), distance.size(), invalid_distance);
-  detail::scalar_fill(handle, sigma.data(), sigma.size(), edge_t{0});
+  rmm::device_uvector<edge_t> sigmas(graph_view.local_vertex_partition_range_size(),
+                                     handle.get_stream());
+  rmm::device_uvector<vertex_t> distances(graph_view.local_vertex_partition_range_size(),
+                                          handle.get_stream());
+  detail::scalar_fill(handle, distances.data(), distances.size(), invalid_distance);
+  detail::scalar_fill(handle, sigmas.data(), sigmas.size(), edge_t{0});
 
-  edge_src_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> src_sigma(
+  edge_src_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> src_sigmas(
     handle, graph_view);
-  edge_dst_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, vertex_t> dst_distance(
+  edge_dst_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, vertex_t> dst_distances(
     handle, graph_view);
 
   auto vertex_partition =
@@ -97,7 +119,7 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
       handle.get_thrust_policy(),
       vertex_frontier.bucket(bucket_idx_cur).begin(),
       vertex_frontier.bucket(bucket_idx_cur).end(),
-      [d_sigma = sigma.begin(), d_distance = distance.begin(), vertex_partition] __device__(
+      [d_sigma = sigmas.begin(), d_distance = distances.begin(), vertex_partition] __device__(
         auto v) {
         auto offset        = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v);
         d_distance[offset] = 0;
@@ -108,15 +130,15 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
   edge_t hop{0};
 
   while (true) {
-    update_edge_src_property(handle, graph_view, sigma.begin(), src_sigma);
-    update_edge_dst_property(handle, graph_view, distance.begin(), dst_distance);
+    update_edge_src_property(handle, graph_view, sigmas.begin(), src_sigmas);
+    update_edge_dst_property(handle, graph_view, distances.begin(), dst_distances);
 
     auto [new_frontier, new_sigma] =
       transform_reduce_v_frontier_outgoing_e_by_dst(handle,
                                                     graph_view,
                                                     vertex_frontier.bucket(bucket_idx_cur),
-                                                    src_sigma.view(),
-                                                    dst_distance.view(),
+                                                    src_sigmas.view(),
+                                                    dst_distances.view(),
                                                     cugraph::edge_dummy_property_t{}.view(),
                                                     brandes_e_op_t<vertex_t>{},
                                                     reduce_op::plus<vertex_t>());
@@ -127,8 +149,8 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
                       std::move(new_sigma),
                       vertex_frontier,
                       std::vector<size_t>{bucket_idx_next},
-                      thrust::make_zip_iterator(distance.begin(), sigma.begin()),
-                      thrust::make_zip_iterator(distance.begin(), sigma.begin()),
+                      thrust::make_zip_iterator(distances.begin(), sigmas.begin()),
+                      thrust::make_zip_iterator(distances.begin(), sigmas.begin()),
                       [hop] __device__(auto v, auto old_values, auto v_sigma) {
                         return thrust::make_tuple(
                           thrust::make_optional(bucket_idx_next),
@@ -143,7 +165,7 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<edge_t>> brandes_b
     ++hop;
   }
 
-  return std::make_tuple(std::move(distance), std::move(sigma));
+  return std::make_tuple(std::move(distances), std::move(sigmas));
 }
 
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
@@ -152,8 +174,8 @@ void accumulate_vertex_results(
   graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
   raft::device_span<weight_t> centralities,
-  rmm::device_uvector<vertex_t>&& distance,
-  rmm::device_uvector<edge_t>&& sigma,
+  rmm::device_uvector<vertex_t>&& distances,
+  rmm::device_uvector<edge_t>&& sigmas,
   bool with_endpoints,
   bool do_expensive_check)
 {
@@ -162,26 +184,26 @@ void accumulate_vertex_results(
   vertex_t diameter = transform_reduce_v(
     handle,
     graph_view,
-    distance.begin(),
+    distances.begin(),
     [] __device__(auto, auto d) { return (d == invalid_distance) ? vertex_t{0} : d; },
     vertex_t{0},
     reduce_op::maximum<vertex_t>{},
     do_expensive_check);
 
-  rmm::device_uvector<weight_t> delta(sigma.size(), handle.get_stream());
-  detail::scalar_fill(handle, delta.data(), delta.size(), weight_t{0});
+  rmm::device_uvector<weight_t> deltas(sigmas.size(), handle.get_stream());
+  detail::scalar_fill(handle, deltas.data(), deltas.size(), weight_t{0});
 
   if (with_endpoints) {
     vertex_t count = count_if_v(
       handle,
       graph_view,
-      distance.begin(),
+      distances.begin(),
       [] __device__(auto, auto d) { return (d != invalid_distance); },
       do_expensive_check);
 
     thrust::transform(handle.get_thrust_policy(),
-                      distance.begin(),
-                      distance.end(),
+                      distances.begin(),
+                      distances.end(),
                       centralities.begin(),
                       centralities.begin(),
                       [count] __device__(auto d, auto centrality) {
@@ -205,12 +227,12 @@ void accumulate_vertex_results(
   update_edge_src_property(
     handle,
     graph_view,
-    thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()),
+    thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
     src_properties);
   update_edge_dst_property(
     handle,
     graph_view,
-    thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()),
+    thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
     dst_properties);
 
   // FIXME: To do this efficiently, I need a version of
@@ -243,29 +265,167 @@ void accumulate_vertex_results(
       },
       weight_t{0},
       reduce_op::plus<weight_t>{},
-      delta.begin(),
+      deltas.begin(),
       do_expensive_check);
 
     update_edge_src_property(
       handle,
       graph_view,
-      thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()),
+      thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
       src_properties);
     update_edge_dst_property(
       handle,
       graph_view,
-      thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()),
+      thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
       dst_properties);
 
     thrust::transform(handle.get_thrust_policy(),
                       centralities.begin(),
                       centralities.end(),
-                      delta.begin(),
+                      deltas.begin(),
                       centralities.begin(),
                       thrust::plus<weight_t>());
   }
 }
 
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+void accumulate_edge_results(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  edge_property_view_t<edge_t, weight_t*> centralities_view,
+  rmm::device_uvector<vertex_t>&& distances,
+  rmm::device_uvector<edge_t>&& sigmas,
+  bool do_expensive_check)
+{
+  constexpr vertex_t invalid_distance = std::numeric_limits<vertex_t>::max();
+
+  vertex_t diameter = transform_reduce_v(
+    handle,
+    graph_view,
+    distances.begin(),
+    [] __device__(auto, auto d) { return (d == invalid_distance) ? vertex_t{0} : d; },
+    vertex_t{0},
+    reduce_op::maximum<vertex_t>{},
+    do_expensive_check);
+
+  rmm::device_uvector<weight_t> deltas(sigmas.size(), handle.get_stream());
+  detail::scalar_fill(handle, deltas.data(), deltas.size(), weight_t{0});
+
+  edge_src_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>,
+                      thrust::tuple<vertex_t, edge_t, weight_t>>
+    src_properties(handle, graph_view);
+  edge_dst_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>,
+                      thrust::tuple<vertex_t, edge_t, weight_t>>
+    dst_properties(handle, graph_view);
+
+  update_edge_src_property(
+    handle,
+    graph_view,
+    thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
+    src_properties);
+  update_edge_dst_property(
+    handle,
+    graph_view,
+    thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
+    dst_properties);
+
+  //
+  //   For now this will do a O(E) pass over all edges over the diameter
+  //   of the graph.
+  //
+  // Based on Brandes algorithm, we want to follow back pointers in non-increasing
+  // distance from S to compute delta
+  //
+  for (vertex_t d = diameter; d > 0; --d) {
+    //
+    //  Populate edge_list with edges where `thrust::get<0>(dst_props) == d`
+    //  and `thrust::get<0>(dst_props) == (d-1)`
+    //
+    cugraph::edge_bucket_t<vertex_t, void, true, multi_gpu, true> edge_list(handle);
+
+    {
+      auto [src, dst] = extract_transform_e(handle,
+                                            graph_view,
+                                            src_properties.view(),
+                                            dst_properties.view(),
+                                            centralities_view,
+                                            extract_edge_e_op_t<vertex_t>{d},
+                                            do_expensive_check);
+
+      thrust::sort(handle.get_thrust_policy(),
+                   thrust::make_zip_iterator(src.begin(), dst.begin()),
+                   thrust::make_zip_iterator(src.end(), dst.end()));
+
+      // Eliminate duplicates in case of a multi-graph
+      auto new_edgelist_end = thrust::unique(handle.get_thrust_policy(),
+                                             thrust::make_zip_iterator(src.begin(), dst.begin()),
+                                             thrust::make_zip_iterator(src.end(), dst.end()));
+
+      src.resize(
+        thrust::distance(thrust::make_zip_iterator(src.begin(), dst.begin()), new_edgelist_end),
+        handle.get_stream());
+      dst.resize(src.size(), handle.get_stream());
+
+      edge_list.insert(src.begin(), src.end(), dst.begin());
+    }
+
+    transform_e(
+      handle,
+      graph_view,
+      edge_list,
+      src_properties.view(),
+      dst_properties.view(),
+      centralities_view,
+      [d] __device__(auto src, auto dst, auto src_props, auto dst_props, auto edge_centrality) {
+        if ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) {
+          auto sigma_v = static_cast<weight_t>(thrust::get<1>(src_props));
+          auto sigma_w = static_cast<weight_t>(thrust::get<1>(dst_props));
+          auto delta_w = thrust::get<2>(dst_props);
+
+          return edge_centrality + (sigma_v / sigma_w) * (1 + delta_w);
+        } else {
+          return edge_centrality;
+        }
+      },
+      centralities_view,
+      do_expensive_check);
+
+    per_v_transform_reduce_outgoing_e(
+      handle,
+      graph_view,
+      src_properties.view(),
+      dst_properties.view(),
+      cugraph::edge_dummy_property_t{}.view(),
+      [d] __device__(auto, auto, auto src_props, auto dst_props, auto) {
+        if ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) {
+          auto sigma_v = static_cast<weight_t>(thrust::get<1>(src_props));
+          auto sigma_w = static_cast<weight_t>(thrust::get<1>(dst_props));
+          auto delta_w = thrust::get<2>(dst_props);
+
+          return (sigma_v / sigma_w) * (1 + delta_w);
+        } else {
+          return weight_t{0};
+        }
+      },
+      weight_t{0},
+      reduce_op::plus<weight_t>{},
+      deltas.begin(),
+      do_expensive_check);
+
+    update_edge_src_property(
+      handle,
+      graph_view,
+      thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
+      src_properties);
+    update_edge_dst_property(
+      handle,
+      graph_view,
+      thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()),
+      dst_properties);
+  }
+}
+
 template <typename vertex_t,
           typename edge_t,
           typename weight_t,
@@ -284,7 +444,24 @@ rmm::device_uvector<weight_t> betweenness_centrality(
   //
   // Betweenness Centrality algorithm based on the Brandes Algorithm (2001)
   //
-  if (do_expensive_check) {}
+  if (do_expensive_check) {
+    auto vertex_partition =
+      vertex_partition_device_view_t<vertex_t, multi_gpu>(graph_view.local_vertex_partition_view());
+    auto num_invalid_vertices =
+      thrust::count_if(handle.get_thrust_policy(),
+                       vertices_begin,
+                       vertices_end,
+                       [vertex_partition] __device__(auto val) {
+                         return !(vertex_partition.is_valid_vertex(val) &&
+                                  vertex_partition.in_local_vertex_partition_range_nocheck(val));
+                       });
+    if constexpr (multi_gpu) {
+      num_invalid_vertices = host_scalar_allreduce(
+        handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream());
+    }
+    CUGRAPH_EXPECTS(num_invalid_vertices == 0,
+                    "Invalid input argument: sources have invalid vertex IDs.");
+  }
 
   rmm::device_uvector<weight_t> centralities(graph_view.local_vertex_partition_range_size(),
                                              handle.get_stream());
@@ -333,14 +510,14 @@ rmm::device_uvector<weight_t> betweenness_centrality(
     // FIXME:  This has an inefficiency in early iterations, as it doesn't have enough work to
     //         keep the GPUs busy.  But we can't run too many at once or we will run out of
     //         memory. Need to investigate options to improve this performance
-    auto [distance, sigma] =
+    auto [distances, sigmas] =
       brandes_bfs(handle, graph_view, edge_weight_view, vertex_frontier, do_expensive_check);
     accumulate_vertex_results(handle,
                               graph_view,
                               edge_weight_view,
                               raft::device_span<weight_t>{centralities.data(), centralities.size()},
-                              std::move(distance),
-                              std::move(sigma),
+                              std::move(distances),
+                              std::move(sigmas),
                               include_endpoints,
                               do_expensive_check);
   }
@@ -379,7 +556,8 @@ template <typename vertex_t,
           typename weight_t,
           bool multi_gpu,
           typename VertexIterator>
-rmm::device_uvector<weight_t> edge_betweenness_centrality(
+edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
@@ -388,10 +566,88 @@ rmm::device_uvector<weight_t> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check)
 {
-  CUGRAPH_FAIL("Not Implemented");
-  // Edge betweenness is computed like vertex betweenness, but you accumulate
-  // centrality on each edge.  We need to adapt this to support edge properties
-  // properly.
+  //
+  // Betweenness Centrality algorithm based on the Brandes Algorithm (2001)
+  //
+  if (do_expensive_check) {
+    auto vertex_partition =
+      vertex_partition_device_view_t<vertex_t, multi_gpu>(graph_view.local_vertex_partition_view());
+    auto num_invalid_vertices =
+      thrust::count_if(handle.get_thrust_policy(),
+                       vertices_begin,
+                       vertices_end,
+                       [vertex_partition] __device__(auto val) {
+                         return !(vertex_partition.is_valid_vertex(val) &&
+                                  vertex_partition.in_local_vertex_partition_range_nocheck(val));
+                       });
+    if constexpr (multi_gpu) {
+      num_invalid_vertices = host_scalar_allreduce(
+        handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream());
+    }
+    CUGRAPH_EXPECTS(num_invalid_vertices == 0,
+                    "Invalid input argument: sources have invalid vertex IDs.");
+  }
+
+  edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t> centralities(
+    handle, graph_view);
+
+  fill_edge_property(handle, graph_view, weight_t{0}, centralities, do_expensive_check);
+
+  size_t num_sources = thrust::distance(vertices_begin, vertices_end);
+  std::vector<size_t> source_offsets{{0, num_sources}};
+  int my_rank = 0;
+
+  if constexpr (multi_gpu) {
+    auto source_counts =
+      host_scalar_allgather(handle.get_comms(), num_sources, handle.get_stream());
+
+    num_sources = std::accumulate(source_counts.begin(), source_counts.end(), 0);
+    source_offsets.resize(source_counts.size() + 1);
+    source_offsets[0] = 0;
+    std::inclusive_scan(source_counts.begin(), source_counts.end(), source_offsets.begin() + 1);
+    my_rank = handle.get_comms().get_rank();
+  }
+
+  //
+  // FIXME: This could be more efficient using something akin to the
+  // technique in WCC.  Take the entire set of sources, insert them into
+  // a tagged frontier (tagging each source with itself).  Then we can
+  // expand from multiple sources concurrently. The challenge is managing
+  // the memory explosion.
+  //
+  for (size_t source_idx = 0; source_idx < num_sources; ++source_idx) {
+    //
+    //  BFS
+    //
+    constexpr size_t bucket_idx_cur = 0;
+    constexpr size_t num_buckets    = 2;
+
+    vertex_frontier_t<vertex_t, void, multi_gpu, true> vertex_frontier(handle, num_buckets);
+
+    if ((source_idx >= source_offsets[my_rank]) && (source_idx < source_offsets[my_rank + 1])) {
+      vertex_frontier.bucket(bucket_idx_cur)
+        .insert(vertices_begin + (source_idx - source_offsets[my_rank]),
+                vertices_begin + (source_idx - source_offsets[my_rank]) + 1);
+    }
+
+    //
+    //  Now we need to do modified BFS
+    //
+    // FIXME:  This has an inefficiency in early iterations, as it doesn't have enough work to
+    //         keep the GPUs busy.  But we can't run too many at once or we will run out of
+    //         memory. Need to investigate options to improve this performance
+    auto [distances, sigmas] =
+      brandes_bfs(handle, graph_view, edge_weight_view, vertex_frontier, do_expensive_check);
+    accumulate_edge_results(handle,
+                            graph_view,
+                            edge_weight_view,
+                            centralities.mutable_view(),
+                            std::move(distances),
+                            std::move(sigmas),
+                            do_expensive_check);
+  }
+
+  return centralities;
 }
 
 }  // namespace detail
@@ -431,7 +687,8 @@ rmm::device_uvector<weight_t> betweenness_centrality(
 }
 
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
-rmm::device_uvector<weight_t> edge_betweenness_centrality(
+edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, weight_t>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
diff --git a/cpp/src/centrality/betweenness_centrality_mg.cu b/cpp/src/centrality/betweenness_centrality_mg.cu
index 7bb1f4db6d1..2df843c95c9 100644
--- a/cpp/src/centrality/betweenness_centrality_mg.cu
+++ b/cpp/src/centrality/betweenness_centrality_mg.cu
@@ -73,7 +73,8 @@ template rmm::device_uvector<double> betweenness_centrality(
   bool const include_endpoints,
   bool do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int32_t, false, true>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int32_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
@@ -81,7 +82,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int64_t, false, true>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
@@ -89,7 +91,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int64_t, int64_t, false, true>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int64_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
@@ -97,7 +100,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int32_t, false, true>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int32_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
@@ -105,7 +109,8 @@ template rmm::device_uvector<double> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int64_t, false, true>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
@@ -113,7 +118,8 @@ template rmm::device_uvector<double> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int64_t, int64_t, false, true>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int64_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
diff --git a/cpp/src/centrality/betweenness_centrality_sg.cu b/cpp/src/centrality/betweenness_centrality_sg.cu
index 1d10b720d09..191857ff5dd 100644
--- a/cpp/src/centrality/betweenness_centrality_sg.cu
+++ b/cpp/src/centrality/betweenness_centrality_sg.cu
@@ -73,7 +73,8 @@ template rmm::device_uvector<double> betweenness_centrality(
   bool const include_endpoints,
   bool do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int32_t, false, false>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int32_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
@@ -81,7 +82,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int64_t, false, false>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
@@ -89,7 +91,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<float> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int64_t, int64_t, false, false>, float>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int64_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
@@ -97,7 +100,8 @@ template rmm::device_uvector<float> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int32_t, false, false>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int32_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
@@ -105,7 +109,8 @@ template rmm::device_uvector<double> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int32_t, int64_t, false, false>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int32_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
@@ -113,7 +118,8 @@ template rmm::device_uvector<double> edge_betweenness_centrality(
   bool const normalized,
   bool const do_expensive_check);
 
-template rmm::device_uvector<double> edge_betweenness_centrality(
+template edge_property_t<graph_view_t<int64_t, int64_t, false, false>, double>
+edge_betweenness_centrality(
   const raft::handle_t& handle,
   graph_view_t<int64_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
index 62ede6eaafb..b388ba53e81 100644
--- a/cpp/src/community/detail/common_methods.cuh
+++ b/cpp/src/community/detail/common_methods.cuh
@@ -18,6 +18,7 @@
 #include <community/detail/common_methods.hpp>
 
 #include <detail/graph_partition_utils.cuh>
+#include <prims/kv_store.cuh>
 #include <prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh>
 #include <prims/per_v_transform_reduce_incoming_outgoing_e.cuh>
 #include <prims/reduce_op.cuh>
@@ -42,6 +43,11 @@
 
 CUCO_DECLARE_BITWISE_COMPARABLE(float)
 CUCO_DECLARE_BITWISE_COMPARABLE(double)
+// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched.
+namespace cuco {
+template <>
+struct is_bitwise_comparable<cuco::pair<int32_t, float>> : std::true_type {};
+}  // namespace cuco
 
 namespace cugraph {
 namespace detail {
diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
index bbd720131de..e811aafc776 100644
--- a/cpp/src/community/detail/refine_impl.cuh
+++ b/cpp/src/community/detail/refine_impl.cuh
@@ -48,6 +48,11 @@
 
 CUCO_DECLARE_BITWISE_COMPARABLE(float)
 CUCO_DECLARE_BITWISE_COMPARABLE(double)
+// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched.
+namespace cuco {
+template <>
+struct is_bitwise_comparable<cuco::pair<int32_t, float>> : std::true_type {};
+}  // namespace cuco
 
 namespace cugraph {
 namespace detail {
diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh
index 88e9623e043..67574719b45 100644
--- a/cpp/src/detail/graph_partition_utils.cuh
+++ b/cpp/src/detail/graph_partition_utils.cuh
@@ -19,7 +19,7 @@
 
 #include <raft/core/device_span.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
diff --git a/cpp/src/link_analysis/pagerank_impl.cuh b/cpp/src/link_analysis/pagerank_impl.cuh
index 49d1a3eabb9..3a84cdedfda 100644
--- a/cpp/src/link_analysis/pagerank_impl.cuh
+++ b/cpp/src/link_analysis/pagerank_impl.cuh
@@ -46,22 +46,19 @@
 namespace cugraph {
 namespace detail {
 
-// FIXME: personalization_vector_size is confusing in OPG (local or aggregate?)
 template <typename GraphViewType, typename weight_t, typename result_t>
-void pagerank(
+centrality_algorithm_metadata_t pagerank(
   raft::handle_t const& handle,
   GraphViewType const& pull_graph_view,
   std::optional<edge_property_view_t<typename GraphViewType::edge_type, weight_t const*>>
     edge_weight_view,
-  std::optional<weight_t const*> precomputed_vertex_out_weight_sums,
-  std::optional<typename GraphViewType::vertex_type const*> personalization_vertices,
-  std::optional<result_t const*> personalization_values,
-  std::optional<typename GraphViewType::vertex_type> personalization_vector_size,
-  result_t* pageranks,
+  std::optional<raft::device_span<weight_t const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<typename GraphViewType::vertex_type const>,
+                           raft::device_span<result_t const>>> personalization,
+  raft::device_span<result_t> pageranks,
   result_t alpha,
   result_t epsilon,
   size_t max_iterations,
-  bool has_initial_guess,
   bool do_expensive_check)
 {
   using vertex_t = typename GraphViewType::vertex_type;
@@ -75,27 +72,26 @@ void pagerank(
                 "GraphViewType should support the pull model.");
 
   auto const num_vertices = pull_graph_view.number_of_vertices();
-  if (num_vertices == 0) { return; }
+  if (num_vertices == 0) { return centrality_algorithm_metadata_t{0, true}; }
 
   auto aggregate_personalization_vector_size =
-    personalization_vertices ? GraphViewType::is_multi_gpu
-                                 ? host_scalar_allreduce(handle.get_comms(),
-                                                         *personalization_vector_size,
-                                                         raft::comms::op_t::SUM,
-                                                         handle.get_stream())
-                                 : *personalization_vector_size
-                             : vertex_t{0};
+    personalization ? GraphViewType::is_multi_gpu
+                        ? host_scalar_allreduce(handle.get_comms(),
+                                                std::get<0>(*personalization).size(),
+                                                raft::comms::op_t::SUM,
+                                                handle.get_stream())
+                        : std::get<0>(*personalization).size()
+                    : vertex_t{0};
 
   // 1. check input arguments
 
-  CUGRAPH_EXPECTS((personalization_vertices.has_value() == false) ||
-                    (personalization_values.has_value() && personalization_vector_size.has_value()),
-                  "Invalid input argument: if personalization_vertices.has_value() is true, "
-                  "personalization_values.has_value() and personalization_vector_size.has_value() "
-                  "should be true as well.");
+  CUGRAPH_EXPECTS((personalization.has_value() == false) ||
+                    (std::get<0>(*personalization).size() == std::get<1>(*personalization).size()),
+                  "Invalid input argument: if personalization.has_value() is true, the size of "
+                  "vertices and values should match");
   CUGRAPH_EXPECTS(
-    (personalization_vertices.has_value() == false) || (aggregate_personalization_vector_size > 0),
-    "Invalid input argument: if personalization_vertices.has_value() is true, the input "
+    (personalization.has_value() == false) || (aggregate_personalization_vector_size > 0),
+    "Invalid input argument: if personalizations.has_value() is true, the input "
     "personalization vector size should not be 0.");
   CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0),
                   "Invalid input argument: alpha should be in [0.0, 1.0].");
@@ -106,7 +102,7 @@ void pagerank(
       auto num_negative_precomputed_vertex_out_weight_sums =
         count_if_v(handle,
                    pull_graph_view,
-                   *precomputed_vertex_out_weight_sums,
+                   precomputed_vertex_out_weight_sums->data(),
                    [] __device__(auto, auto val) { return val < result_t{0.0}; });
       CUGRAPH_EXPECTS(
         num_negative_precomputed_vertex_out_weight_sums == 0,
@@ -126,17 +122,10 @@ void pagerank(
         "Invalid input argument: input edge weights should have non-negative values.");
     }
 
-    if (has_initial_guess) {
-      auto num_negative_values = count_if_v(
-        handle, pull_graph_view, pageranks, [] __device__(auto, auto val) { return val < 0.0; });
-      CUGRAPH_EXPECTS(num_negative_values == 0,
-                      "Invalid input argument: initial guess values should be non-negative.");
-    }
-
     if constexpr (GraphViewType::is_multi_gpu) {
       auto num_gpus_with_valid_personalization_vector =
         host_scalar_allreduce(handle.get_comms(),
-                              personalization_vertices ? int{1} : int{0},
+                              personalization ? int{1} : int{0},
                               raft::comms::op_t::SUM,
                               handle.get_stream());
       CUGRAPH_EXPECTS(
@@ -151,8 +140,8 @@ void pagerank(
         pull_graph_view.local_vertex_partition_view());
       auto num_invalid_vertices =
         thrust::count_if(handle.get_thrust_policy(),
-                         *personalization_vertices,
-                         *personalization_vertices + *personalization_vector_size,
+                         std::get<0>(*personalization).begin(),
+                         std::get<0>(*personalization).end(),
                          [vertex_partition] __device__(auto val) {
                            return !(vertex_partition.is_valid_vertex(val) &&
                                     vertex_partition.in_local_vertex_partition_range_nocheck(val));
@@ -163,17 +152,36 @@ void pagerank(
       }
       CUGRAPH_EXPECTS(num_invalid_vertices == 0,
                       "Invalid input argument: peresonalization vertices have invalid vertex IDs.");
-      auto num_negative_values =
-        thrust::count_if(handle.get_thrust_policy(),
-                         *personalization_values,
-                         *personalization_values + *personalization_vector_size,
-                         [] __device__(auto val) { return val < 0.0; });
+      auto num_negative_values = thrust::count_if(handle.get_thrust_policy(),
+                                                  std::get<1>(*personalization).begin(),
+                                                  std::get<1>(*personalization).end(),
+                                                  [] __device__(auto val) { return val < 0.0; });
       if constexpr (GraphViewType::is_multi_gpu) {
         num_negative_values = host_scalar_allreduce(
           handle.get_comms(), num_negative_values, raft::comms::op_t::SUM, handle.get_stream());
       }
       CUGRAPH_EXPECTS(num_negative_values == 0,
                       "Invalid input argument: peresonalization values should be non-negative.");
+
+      rmm::device_uvector<vertex_t> check_for_duplicates(std::get<0>(*personalization).size(),
+                                                         handle.get_stream());
+      thrust::copy(handle.get_thrust_policy(),
+                   std::get<0>(*personalization).begin(),
+                   std::get<0>(*personalization).end(),
+                   check_for_duplicates.begin());
+
+      thrust::sort(
+        handle.get_thrust_policy(), check_for_duplicates.begin(), check_for_duplicates.end());
+
+      auto num_uniques =
+        thrust::count_if(handle.get_thrust_policy(),
+                         thrust::make_counting_iterator(size_t{0}),
+                         thrust::make_counting_iterator(check_for_duplicates.size()),
+                         detail::is_first_in_run_t<vertex_t const*>{check_for_duplicates.data()});
+
+      CUGRAPH_EXPECTS(
+        static_cast<size_t>(num_uniques) == check_for_duplicates.size(),
+        "Invalid input argument: personalization vertices not contain duplicate entries.");
     }
   }
 
@@ -196,35 +204,16 @@ void pagerank(
     }
   }
   auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums
-                                  ? *precomputed_vertex_out_weight_sums
+                                  ? (*precomputed_vertex_out_weight_sums).data()
                                   : (*tmp_vertex_out_weight_sums).data();
 
-  // 3. initialize pagerank values
-
-  if (has_initial_guess) {
-    auto sum = reduce_v(handle, pull_graph_view, pageranks, result_t{0.0});
-    CUGRAPH_EXPECTS(sum > 0.0,
-                    "Invalid input argument: sum of the PageRank initial "
-                    "guess values should be positive.");
-    thrust::transform(handle.get_thrust_policy(),
-                      pageranks,
-                      pageranks + pull_graph_view.local_vertex_partition_range_size(),
-                      pageranks,
-                      [sum] __device__(auto val) { return val / sum; });
-  } else {
-    thrust::fill(handle.get_thrust_policy(),
-                 pageranks,
-                 pageranks + pull_graph_view.local_vertex_partition_range_size(),
-                 result_t{1.0} / static_cast<result_t>(num_vertices));
-  }
-
-  // 4. sum the personalization values
+  // 3. sum the personalization values
 
   result_t personalization_sum{0.0};
   if (aggregate_personalization_vector_size > 0) {
     personalization_sum = thrust::reduce(handle.get_thrust_policy(),
-                                         *personalization_values,
-                                         *personalization_values + *personalization_vector_size,
+                                         std::get<1>(*personalization).begin(),
+                                         std::get<1>(*personalization).end(),
                                          result_t{0.0});
     if constexpr (GraphViewType::is_multi_gpu) {
       personalization_sum = host_scalar_allreduce(
@@ -243,18 +232,13 @@ void pagerank(
   edge_src_property_t<GraphViewType, result_t> edge_src_pageranks(handle, pull_graph_view);
   size_t iter{0};
   while (true) {
-    thrust::copy(handle.get_thrust_policy(),
-                 pageranks,
-                 pageranks + pull_graph_view.local_vertex_partition_range_size(),
-                 old_pageranks.data());
-
-    auto vertex_val_first =
-      thrust::make_zip_iterator(thrust::make_tuple(pageranks, vertex_out_weight_sums));
+    thrust::copy(
+      handle.get_thrust_policy(), pageranks.begin(), pageranks.end(), old_pageranks.data());
 
     auto dangling_sum = transform_reduce_v(
       handle,
       pull_graph_view,
-      vertex_val_first,
+      thrust::make_zip_iterator(pageranks.begin(), vertex_out_weight_sums),
       [] __device__(auto, auto val) {
         auto const pagerank       = thrust::get<0>(val);
         auto const out_weight_sum = thrust::get<1>(val);
@@ -262,19 +246,21 @@ void pagerank(
       },
       result_t{0.0});
 
-    thrust::transform(handle.get_thrust_policy(),
-                      vertex_val_first,
-                      vertex_val_first + pull_graph_view.local_vertex_partition_range_size(),
-                      pageranks,
-                      [] __device__(auto val) {
-                        auto const pagerank       = thrust::get<0>(val);
-                        auto const out_weight_sum = thrust::get<1>(val);
-                        auto const divisor =
-                          out_weight_sum == result_t{0.0} ? result_t{1.0} : out_weight_sum;
-                        return pagerank / divisor;
-                      });
+    thrust::transform(
+      handle.get_thrust_policy(),
+      thrust::make_zip_iterator(pageranks.begin(), vertex_out_weight_sums),
+      thrust::make_zip_iterator(
+        pageranks.end(),
+        vertex_out_weight_sums + pull_graph_view.local_vertex_partition_range_size()),
+      pageranks.begin(),
+      [] __device__(auto val) {
+        auto const pagerank       = thrust::get<0>(val);
+        auto const out_weight_sum = thrust::get<1>(val);
+        auto const divisor = out_weight_sum == result_t{0.0} ? result_t{1.0} : out_weight_sum;
+        return pagerank / divisor;
+      });
 
-    update_edge_src_property(handle, pull_graph_view, pageranks, edge_src_pageranks);
+    update_edge_src_property(handle, pull_graph_view, pageranks.data(), edge_src_pageranks);
 
     auto unvarying_part = aggregate_personalization_vector_size == 0
                             ? (dangling_sum * alpha + static_cast<result_t>(1.0 - alpha)) /
@@ -293,7 +279,7 @@ void pagerank(
         },
         unvarying_part,
         reduce_op::plus<result_t>{},
-        pageranks);
+        pageranks.begin());
     } else {
       per_v_transform_reduce_incoming_e(
         handle,
@@ -306,20 +292,23 @@ void pagerank(
         },
         unvarying_part,
         reduce_op::plus<result_t>{},
-        pageranks);
+        pageranks.begin());
     }
 
     if (aggregate_personalization_vector_size > 0) {
       auto vertex_partition = vertex_partition_device_view_t<vertex_t, GraphViewType::is_multi_gpu>(
         pull_graph_view.local_vertex_partition_view());
-      auto val_first = thrust::make_zip_iterator(
-        thrust::make_tuple(*personalization_vertices, *personalization_values));
       thrust::for_each(
         handle.get_thrust_policy(),
-        val_first,
-        val_first + *personalization_vector_size,
-        [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__(
-          auto val) {
+        thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(*personalization).begin(),
+                                                     std::get<1>(*personalization).begin())),
+        thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(*personalization).end(),
+                                                     std::get<1>(*personalization).end())),
+        [vertex_partition,
+         pageranks = pageranks.data(),
+         dangling_sum,
+         personalization_sum,
+         alpha] __device__(auto val) {
           auto v     = thrust::get<0>(val);
           auto value = thrust::get<1>(val);
           *(pageranks + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)) +=
@@ -331,7 +320,7 @@ void pagerank(
     auto diff_sum = transform_reduce_v(
       handle,
       pull_graph_view,
-      thrust::make_zip_iterator(thrust::make_tuple(pageranks, old_pageranks.data())),
+      thrust::make_zip_iterator(thrust::make_tuple(pageranks.begin(), old_pageranks.begin())),
       [] __device__(auto, auto val) { return std::abs(thrust::get<0>(val) - thrust::get<1>(val)); },
       result_t{0.0});
 
@@ -340,9 +329,11 @@ void pagerank(
     if (diff_sum < epsilon) {
       break;
     } else if (iter >= max_iterations) {
-      CUGRAPH_FAIL("PageRank failed to converge.");
+      break;
     }
   }
+
+  return centrality_algorithm_metadata_t{iter, (iter < max_iterations)};
 }
 
 }  // namespace detail
@@ -364,19 +355,102 @@ void pagerank(raft::handle_t const& handle,
 {
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
-  detail::pagerank(handle,
-                   graph_view,
-                   edge_weight_view,
-                   precomputed_vertex_out_weight_sums,
-                   personalization_vertices,
-                   personalization_values,
-                   personalization_vector_size,
-                   pageranks,
-                   alpha,
-                   epsilon,
-                   max_iterations,
-                   has_initial_guess,
-                   do_expensive_check);
+  CUGRAPH_EXPECTS((personalization_vertices.has_value() == false) ||
+                    (personalization_values.has_value() && personalization_vector_size.has_value()),
+                  "Invalid input argument: if personalization_vertices.has_value() is true, ");
+
+  // initialize pagerank values
+  if (has_initial_guess) {
+    if (do_expensive_check) {
+      auto num_negative_values = count_if_v(
+        handle, graph_view, pageranks, [] __device__(auto, auto val) { return val < 0.0; });
+      CUGRAPH_EXPECTS(num_negative_values == 0,
+                      "Invalid input argument: initial guess values should be non-negative.");
+    }
+
+    auto sum = reduce_v(handle, graph_view, pageranks, result_t{0.0});
+    CUGRAPH_EXPECTS(sum > 0.0,
+                    "Invalid input argument: sum of the PageRank initial "
+                    "guess values should be positive.");
+    thrust::transform(handle.get_thrust_policy(),
+                      pageranks,
+                      pageranks + graph_view.local_vertex_partition_range_size(),
+                      pageranks,
+                      [sum] __device__(auto val) { return val / sum; });
+  } else {
+    thrust::fill(handle.get_thrust_policy(),
+                 pageranks,
+                 pageranks + graph_view.local_vertex_partition_range_size(),
+                 result_t{1.0} / static_cast<result_t>(graph_view.number_of_vertices()));
+  }
+
+  auto metadata = detail::pagerank(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::make_optional(raft::device_span<weight_t const>{
+      *precomputed_vertex_out_weight_sums,
+      static_cast<size_t>(graph_view.local_vertex_partition_range_size())}),
+    personalization_vertices
+      ? std::make_optional(std::make_tuple(
+          raft::device_span<vertex_t const>{*personalization_vertices,
+                                            static_cast<size_t>(*personalization_vector_size)},
+          raft::device_span<result_t const>{*personalization_values,
+                                            static_cast<size_t>(*personalization_vector_size)}))
+      : std::nullopt,
+    raft::device_span<result_t>{
+      pageranks, static_cast<size_t>(graph_view.local_vertex_partition_range_size())},
+    alpha,
+    epsilon,
+    max_iterations,
+    do_expensive_check);
+
+  CUGRAPH_EXPECTS(metadata.converged_, "PageRank failed to converge.");
+}
+
+template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
+std::tuple<rmm::device_uvector<result_t>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, true, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<raft::device_span<weight_t const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<vertex_t const>, raft::device_span<result_t const>>>
+    personalization,
+  std::optional<raft::device_span<result_t const>> initial_pageranks,
+  result_t alpha,
+  result_t epsilon,
+  size_t max_iterations,
+  bool do_expensive_check)
+{
+  CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
+
+  rmm::device_uvector<result_t> local_pageranks(graph_view.local_vertex_partition_range_size(),
+                                                handle.get_stream());
+  if (!initial_pageranks) {
+    thrust::fill(handle.get_thrust_policy(),
+                 local_pageranks.begin(),
+                 local_pageranks.end(),
+                 result_t{1.0} / graph_view.number_of_vertices());
+  } else {
+    thrust::copy(handle.get_thrust_policy(),
+                 initial_pageranks->begin(),
+                 initial_pageranks->end(),
+                 local_pageranks.begin());
+  }
+
+  auto metadata =
+    detail::pagerank(handle,
+                     graph_view,
+                     edge_weight_view,
+                     precomputed_vertex_out_weight_sums,
+                     personalization,
+                     raft::device_span<result_t>{local_pageranks.data(), local_pageranks.size()},
+                     alpha,
+                     epsilon,
+                     max_iterations,
+                     do_expensive_check);
+
+  return std::make_tuple(std::move(local_pageranks), metadata);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/link_analysis/pagerank_mg.cu b/cpp/src/link_analysis/pagerank_mg.cu
index d6dd5f60544..dc9892f69a8 100644
--- a/cpp/src/link_analysis/pagerank_mg.cu
+++ b/cpp/src/link_analysis/pagerank_mg.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,4 +102,82 @@ template void pagerank(raft::handle_t const& handle,
                        bool has_initial_guess,
                        bool do_expensive_check);
 
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int64_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int64_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int64_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, true, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int64_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
 }  // namespace cugraph
diff --git a/cpp/src/link_analysis/pagerank_sg.cu b/cpp/src/link_analysis/pagerank_sg.cu
index 3dc0adc45df..51d123fe337 100644
--- a/cpp/src/link_analysis/pagerank_sg.cu
+++ b/cpp/src/link_analysis/pagerank_sg.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -102,4 +102,82 @@ template void pagerank(raft::handle_t const& handle,
                        bool has_initial_guess,
                        bool do_expensive_check);
 
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int64_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int64_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<float>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<raft::device_span<float const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int64_t const>, raft::device_span<float const>>>
+    personalization,
+  std::optional<raft::device_span<float const>> initial_pageranks,
+  float alpha,
+  float epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<double>, centrality_algorithm_metadata_t> pagerank(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, true, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<raft::device_span<double const>> precomputed_vertex_out_weight_sums,
+  std::optional<std::tuple<raft::device_span<int64_t const>, raft::device_span<double const>>>
+    personalization,
+  std::optional<raft::device_span<double const>> initial_pageranks,
+  double alpha,
+  double epsilon,
+  size_t max_iterations,
+  bool do_expensive_check);
+
 }  // namespace cugraph
diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh
index 2d0d0a876e6..98453d46c3f 100644
--- a/cpp/src/prims/detail/nbr_intersection.cuh
+++ b/cpp/src/prims/detail/nbr_intersection.cuh
@@ -974,7 +974,7 @@ nbr_intersection(raft::handle_t const& handle,
             .get_stream());  // initially store minimum degrees (upper bound for intersection sizes)
         if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) {
           auto second_element_to_idx_map =
-            detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view());
+            detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view());
           thrust::transform(
             handle.get_thrust_policy(),
             get_dataframe_buffer_begin(vertex_pair_buffer),
@@ -1005,7 +1005,7 @@ nbr_intersection(raft::handle_t const& handle,
           handle.get_stream());
         if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) {
           auto second_element_to_idx_map =
-            detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view());
+            detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view());
           thrust::tabulate(
             handle.get_thrust_policy(),
             rx_v_pair_nbr_intersection_sizes.begin(),
diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index 8395fc55833..f20865c92dc 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -16,11 +16,14 @@
 #pragma once
 
 #include <cugraph/utilities/dataframe_buffer.hpp>
+#include <cugraph/utilities/device_functors.cuh>
 
 #include <cuco/static_map.cuh>
+#include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
@@ -29,6 +32,7 @@
 #include <thrust/iterator/iterator_traits.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/sort.h>
+#include <thrust/unique.h>
 
 #include <algorithm>
 #include <memory>
@@ -45,7 +49,7 @@ namespace cugraph {
 namespace detail {
 
 template <typename KeyIterator, typename ValueIterator>
-struct binary_search_find_op_t {
+struct kv_binary_search_find_op_t {
   using key_type   = typename thrust::iterator_traits<KeyIterator>::value_type;
   using value_type = typename thrust::iterator_traits<ValueIterator>::value_type;
 
@@ -67,7 +71,7 @@ struct binary_search_find_op_t {
 };
 
 template <typename KeyIterator>
-struct binary_search_contains_op_t {
+struct kv_binary_search_contains_op_t {
   using key_type = typename thrust::iterator_traits<KeyIterator>::value_type;
 
   KeyIterator store_key_first{};
@@ -79,6 +83,105 @@ struct binary_search_contains_op_t {
   }
 };
 
+template <typename KeyIterator>
+struct kv_cuco_insert_and_increment_t {
+  using key_type        = typename thrust::iterator_traits<KeyIterator>::value_type;
+  using cuco_store_type = cuco::experimental::static_map<
+    key_type,
+    size_t,
+    cuco::experimental::extent<std::size_t>,
+    cuda::thread_scope_device,
+    thrust::equal_to<key_type>,
+    cuco::experimental::linear_probing<1,  // CG size
+                                       cuco::murmurhash3_32<key_t>>,
+    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+
+  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+  KeyIterator key_first{};
+  size_t* counter{nullptr};
+  size_t invalid_idx{};
+
+  __device__ size_t operator()(size_t i)
+  {
+    auto pair             = thrust::make_tuple(*(key_first + i), size_t{0} /* dummy */);
+    auto [iter, inserted] = device_ref.insert_and_find(pair);
+    if (inserted) {
+      cuda::atomic_ref<size_t, cuda::thread_scope_device> atomic_counter(*counter);
+      auto idx       = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed);
+      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      cuda::atomic_ref<typename ref_type::mapped_type, ref_type::thread_scope> ref((*iter).second);
+      ref.store(idx, cuda::std::memory_order_relaxed);
+      return idx;
+    } else {
+      return invalid_idx;
+    }
+  }
+};
+
+template <typename KeyIterator, typename StencilIterator, typename PredOp>
+struct kv_cuco_insert_if_and_increment_t {
+  using key_type        = typename thrust::iterator_traits<KeyIterator>::value_type;
+  using cuco_store_type = cuco::experimental::static_map<
+    key_type,
+    size_t,
+    cuco::experimental::extent<std::size_t>,
+    cuda::thread_scope_device,
+    thrust::equal_to<key_type>,
+    cuco::experimental::linear_probing<1,  // CG size
+                                       cuco::murmurhash3_32<key_t>>,
+    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+
+  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+  KeyIterator key_first{};
+  StencilIterator stencil_first{};
+  PredOp pred_op{};
+  size_t* counter{nullptr};
+  size_t invalid_idx{};
+
+  __device__ size_t operator()(size_t i)
+  {
+    if (pred_op(*(stencil_first + i)) == false) { return invalid_idx; }
+
+    auto pair             = thrust::make_tuple(*(key_first + i), size_t{0} /* dummy */);
+    auto [iter, inserted] = device_ref.insert_and_find(pair);
+    if (inserted) {
+      cuda::atomic_ref<size_t, cuda::thread_scope_device> atomic_counter(*counter);
+      auto idx       = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed);
+      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      cuda::atomic_ref<typename ref_type::mapped_type, ref_type::thread_scope> ref((*iter).second);
+      ref.store(idx, cuda::std::memory_order_relaxed);
+      return idx;
+    } else {
+      return invalid_idx;
+    }
+  }
+};
+
+template <typename key_t, typename value_t>
+struct kv_cuco_insert_and_assign_t {
+  using cuco_store_type = cuco::experimental::static_map<
+    key_t,
+    std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
+    cuco::experimental::extent<std::size_t>,
+    cuda::thread_scope_device,
+    thrust::equal_to<key_t>,
+    cuco::experimental::linear_probing<1,  // CG size
+                                       cuco::murmurhash3_32<key_t>>,
+    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
+
+  typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag> device_ref{};
+
+  __device__ void operator()(thrust::tuple<key_t, value_t> pair)
+  {
+    auto [iter, inserted] = device_ref.insert_and_find(pair);
+    if (!inserted) {
+      using ref_type = typename cuco_store_type::ref_type<cuco::experimental::insert_and_find_tag>;
+      cuda::atomic_ref<typename ref_type::mapped_type, ref_type::thread_scope> ref((*iter).second);
+      ref.store(thrust::get<1>(pair), cuda::std::memory_order_relaxed);
+    }
+  }
+};
+
 template <typename ViewType>
 struct kv_binary_search_store_device_view_t {
   using key_type   = typename ViewType::key_type;
@@ -112,18 +215,19 @@ struct kv_binary_search_store_device_view_t {
 };
 
 template <typename ViewType>
-struct kv_cuco_store_device_view_t {
-  using key_type                    = typename ViewType::key_type;
-  using value_type                  = typename ViewType::value_type;
-  using cuco_store_device_view_type = typename ViewType::cuco_store_type::device_view;
+struct kv_cuco_store_find_device_view_t {
+  using key_type   = typename ViewType::key_type;
+  using value_type = typename ViewType::value_type;
+  using cuco_store_device_ref_type =
+    typename ViewType::cuco_store_type::ref_type<cuco::experimental::find_tag>;
 
   static_assert(!ViewType::binary_search);
 
-  __host__ kv_cuco_store_device_view_t(ViewType view)
-    : cuco_store_device_view(view.cuco_store_device_view())
+  __host__ kv_cuco_store_find_device_view_t(ViewType view)
+    : cuco_store_device_ref(view.cuco_store_find_device_ref())
   {
     if constexpr (std::is_arithmetic_v<value_type>) {
-      invalid_value = cuco_store_device_view.get_empty_value_sentinel();
+      invalid_value = cuco_store_device_ref.empty_value_sentinel();
     } else {
       store_value_first = view.store_value_first();
       invalid_value     = view.invalid_value();
@@ -132,11 +236,11 @@ struct kv_cuco_store_device_view_t {
 
   __device__ value_type find(key_type key) const
   {
-    auto found = cuco_store_device_view.find(key);
-    if (found == cuco_store_device_view.end()) {
+    auto found = cuco_store_device_ref.find(key);
+    if (found == cuco_store_device_ref.end()) {
       return invalid_value;
     } else {
-      auto val = found->second.load(cuda::std::memory_order_relaxed);
+      auto val = (*found).second;
       if constexpr (std::is_arithmetic_v<value_type>) {
         return val;
       } else {
@@ -145,7 +249,7 @@ struct kv_cuco_store_device_view_t {
     }
   }
 
-  cuco_store_device_view_type cuco_store_device_view{};
+  cuco_store_device_ref_type cuco_store_device_ref{};
   std::conditional_t<!std::is_arithmetic_v<value_type>,
                      typename ViewType::value_iterator,
                      std::byte /* dummy */>
@@ -185,7 +289,7 @@ class kv_binary_search_store_view_t {
                       key_first,
                       key_last,
                       value_first,
-                      binary_search_find_op_t<KeyIterator, ValueIterator>{
+                      kv_binary_search_find_op_t<KeyIterator, ValueIterator>{
                         store_key_first_, store_key_last_, store_value_first_, invalid_value_});
   }
 
@@ -195,11 +299,12 @@ class kv_binary_search_store_view_t {
                 ResultValueIterator value_first,
                 rmm::cuda_stream_view stream) const
   {
-    thrust::transform(rmm::exec_policy(stream),
-                      key_first,
-                      key_last,
-                      value_first,
-                      binary_search_contains_op_t<KeyIterator>{store_key_first_, store_key_last_});
+    thrust::transform(
+      rmm::exec_policy(stream),
+      key_first,
+      key_last,
+      value_first,
+      kv_binary_search_contains_op_t<KeyIterator>{store_key_first_, store_key_last_});
   }
 
   KeyIterator store_key_first() const { return store_key_first_; }
@@ -227,31 +332,29 @@ class kv_cuco_store_view_t {
 
   static constexpr bool binary_search = false;
 
-  using cuco_store_type =
-    cuco::static_map<key_t,
-                     std::conditional_t<std::is_arithmetic_v<value_type>, value_type, size_t>,
-                     cuda::thread_scope_device,
-                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>>>;
+  using cuco_store_type = cuco::experimental::static_map<
+    key_t,
+    std::conditional_t<std::is_arithmetic_v<value_type>, value_type, size_t>,
+    cuco::experimental::extent<std::size_t>,
+    cuda::thread_scope_device,
+    thrust::equal_to<key_t>,
+    cuco::experimental::linear_probing<1,  // CG size
+                                       cuco::murmurhash3_32<key_t>>,
+    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
 
-  // FIXME: const_cast as a temporary workaround for
-  // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function)
   template <typename type = value_type>
   kv_cuco_store_view_t(cuco_store_type const* store,
                        std::enable_if_t<std::is_arithmetic_v<type>, int32_t> = 0)
-    : cuco_store_(const_cast<cuco_store_type*>(store))
+    : cuco_store_(store)
   {
   }
 
-  // FIXME: const_cast as a temporary workaround for
-  // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function)
   template <typename type = value_type>
   kv_cuco_store_view_t(cuco_store_type const* store,
                        ValueIterator value_first,
                        type invalid_value,
                        std::enable_if_t<!std::is_arithmetic_v<type>, int32_t> = 0)
-    : cuco_store_(const_cast<cuco_store_type*>(store)),
-      store_value_first_(value_first),
-      invalid_value_(invalid_value)
+    : cuco_store_(store), store_value_first_(value_first), invalid_value_(invalid_value)
   {
   }
 
@@ -262,34 +365,17 @@ class kv_cuco_store_view_t {
             rmm::cuda_stream_view stream) const
   {
     if constexpr (std::is_arithmetic_v<value_type>) {
-      cuco_store_->find(key_first,
-                        key_last,
-                        value_first,
-                        cuco::detail::MurmurHash3_32<key_t>{},
-                        thrust::equal_to<key_t>{},
-                        stream);
+      cuco_store_->find(key_first, key_last, value_first, stream.value());
     } else {
       rmm::device_uvector<size_t> indices(thrust::distance(key_first, key_last), stream);
-      cuco_store_->find(key_first,
-                        key_last,
-                        indices.begin(),
-                        cuco::detail::MurmurHash3_32<key_t>{},
-                        thrust::equal_to<key_t>{},
-                        stream);
-      auto invalid_idx = cuco_store_->get_empty_value_sentinel();
-      thrust::transform(rmm::exec_policy(stream),
-                        indices.begin(),
-                        indices.end(),
-                        value_first,
-                        [store_value_first = store_value_first_,
-                         invalid_value     = invalid_value_,
-                         invalid_idx] __device__(auto idx) {
-                          if (idx != invalid_idx) {
-                            return *(store_value_first + idx);
-                          } else {
-                            return invalid_value;
-                          }
-                        });
+      auto invalid_idx = cuco_store_->empty_value_sentinel();
+      cuco_store_->find(key_first, key_last, indices.begin(), stream.value());
+      thrust::transform(
+        rmm::exec_policy(stream),
+        indices.begin(),
+        indices.end(),
+        value_first,
+        indirection_if_idx_valid_t{store_value_first_, invalid_idx, invalid_value_});
     }
   }
 
@@ -299,15 +385,10 @@ class kv_cuco_store_view_t {
                 ResultValueIterator value_first,
                 rmm::cuda_stream_view stream) const
   {
-    cuco_store_->contains(key_first,
-                          key_last,
-                          value_first,
-                          cuco::detail::MurmurHash3_32<key_t>{},
-                          thrust::equal_to<key_t>{},
-                          stream);
+    cuco_store_->contains(key_first, key_last, value_first, stream.value());
   }
 
-  auto cuco_store_device_view() const { return cuco_store_->get_device_view(); }
+  auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::experimental::find); }
 
   template <typename type = value_type>
   std::enable_if_t<!std::is_arithmetic_v<type>, ValueIterator> store_value_first() const
@@ -315,21 +396,19 @@ class kv_cuco_store_view_t {
     return store_value_first_;
   }
 
-  key_t invalid_key() const { return cuco_store_->get_empty_key_sentinel(); }
+  key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); }
 
   value_type invalid_value() const
   {
     if constexpr (std::is_arithmetic_v<value_type>) {
-      return cuco_store_->get_empty_value_sentinel();
+      return cuco_store_->empty_value_sentinel();
     } else {
       return invalid_value_;
     }
   }
 
  private:
-  // FIXME: cuco_store should be a const pointer but we can't due to
-  // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function)
-  cuco_store_type* cuco_store_{};
+  cuco_store_type const* cuco_store_{};
   std::conditional_t<std::is_arithmetic_v<value_type>, ValueIterator, std::byte /* dummy */>
     store_value_first_{};
 
@@ -395,6 +474,29 @@ class kv_binary_search_store_t {
     }
   }
 
+  auto retrieve_all(rmm::cuda_stream_view stream)
+  {
+    rmm::device_uvector<key_t> tmp_store_keys(store_keys_.size(), stream);
+    auto tmp_store_values =
+      allocate_dataframe_buffer<value_t>(size_dataframe_buffer(store_values_), stream);
+    thrust::copy(
+      rmm::exec_policy(stream), store_keys_.begin(), store_keys_.end(), tmp_store_keys.begin());
+    thrust::copy(rmm::exec_policy(stream),
+                 get_dataframe_buffer_begin(store_values_),
+                 get_dataframe_buffer_end(store_values_),
+                 get_dataframe_buffer_begin(tmp_store_values));
+    return std::make_tuple(std::move(tmp_store_keys), std::move(tmp_store_values));
+  }
+
+  auto release(rmm::cuda_stream_view stream)
+  {
+    auto tmp_store_keys   = std::move(store_keys_);
+    auto tmp_store_values = std::move(store_values_);
+    store_keys_           = rmm::device_uvector<key_t>(0, stream);
+    store_values_         = allocate_dataframe_buffer<value_t>(0, stream);
+    return std::make_tuple(std::move(tmp_store_keys), std::move(tmp_store_values));
+  }
+
   key_t const* store_key_first() const { return store_keys_.cbegin(); }
 
   key_t const* store_key_last() const { return store_keys_.cend(); }
@@ -403,6 +505,10 @@ class kv_binary_search_store_t {
 
   value_t invalid_value() const { return invalid_value_; }
 
+  size_t size() const { return store_keys_.size(); }
+
+  size_t capacity() const { return store_keys_.size(); }
+
  private:
   rmm::device_uvector<key_t> store_keys_;
   decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{})) store_values_;
@@ -421,14 +527,28 @@ class kv_cuco_store_t {
     std::invoke_result_t<decltype(get_dataframe_buffer_cbegin<value_buffer_type>),
                          value_buffer_type&>;
 
-  using cuco_store_type =
-    cuco::static_map<key_t,
-                     std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
-                     cuda::thread_scope_device,
-                     rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<char>>>;
+  using cuco_store_type = cuco::experimental::static_map<
+    key_t,
+    std::conditional_t<std::is_arithmetic_v<value_t>, value_t, size_t>,
+    cuco::experimental::extent<std::size_t>,
+    cuda::thread_scope_device,
+    thrust::equal_to<key_t>,
+    cuco::experimental::linear_probing<1,  // CG size
+                                       cuco::murmurhash3_32<key_t>>,
+    rmm::mr::stream_allocator_adaptor<rmm::mr::polymorphic_allocator<std::byte>>>;
 
   kv_cuco_store_t(rmm::cuda_stream_view stream) {}
 
+  kv_cuco_store_t(size_t capacity,
+                  key_t invalid_key,
+                  value_t invalid_value,
+                  rmm::cuda_stream_view stream)
+  {
+    allocate(capacity, invalid_key, invalid_value, stream);
+    capacity_ = capacity;
+    size_     = 0;
+  }
+
   template <typename KeyIterator, typename ValueIterator>
   kv_cuco_store_t(KeyIterator key_first,
                   KeyIterator key_last,
@@ -437,51 +557,228 @@ class kv_cuco_store_t {
                   value_t invalid_value,
                   rmm::cuda_stream_view stream)
   {
-    double constexpr load_factor = 0.7;
-    auto num_keys                = static_cast<size_t>(thrust::distance(key_first, key_last));
-    auto cuco_size               = std::max(
-      static_cast<size_t>(static_cast<double>(num_keys) / load_factor),
-      static_cast<size_t>(num_keys) + 1);  // cuco::static_map requires at least one empty slot
-    auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(
-      rmm::mr::polymorphic_allocator<char>(rmm::mr::get_current_device_resource()), stream);
+    auto num_keys = static_cast<size_t>(thrust::distance(key_first, key_last));
+    allocate(num_keys, invalid_key, invalid_value, stream);
+    if constexpr (!std::is_arithmetic_v<value_t>) { invalid_value_ = invalid_value; }
+    capacity_ = num_keys;
+    size_     = 0;
+
+    insert(key_first, key_last, value_first, stream);
+  }
+
+  template <typename KeyIterator, typename ValueIterator>
+  void insert(KeyIterator key_first,
+              KeyIterator key_last,
+              ValueIterator value_first,
+              rmm::cuda_stream_view stream)
+  {
+    auto num_keys = static_cast<size_t>(thrust::distance(key_first, key_last));
+    if (num_keys == 0) return;
+
     if constexpr (std::is_arithmetic_v<value_t>) {
-      cuco_store_ =
-        std::make_unique<cuco_store_type>(cuco_size,
-                                          cuco::sentinel::empty_key<key_t>{invalid_key},
-                                          cuco::sentinel::empty_value<value_t>{invalid_value},
-                                          stream_adapter,
-                                          stream);
+      auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first));
+      size_ += cuco_store_->insert(pair_first, pair_first + num_keys, stream.value());
+    } else {
+      auto old_store_value_size = size_dataframe_buffer(store_values_);
+      // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this
+      // requires placing the atomic variable on managed memory and this adds additional
+      // complication.
+      rmm::device_scalar<size_t> counter(old_store_value_size, stream);
+      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
+      thrust::tabulate(
+        rmm::exec_policy(stream),
+        store_value_offsets.begin(),
+        store_value_offsets.end(),
+        kv_cuco_insert_and_increment_t<KeyIterator>{
+          mutable_device_ref, key_first, counter.data(), std::numeric_limits<size_t>::max()});
+      size_ += counter.value(stream);
+      resize_dataframe_buffer(store_values_, size_, stream);
+      thrust::scatter_if(rmm::exec_policy(stream),
+                         value_first,
+                         value_first + num_keys,
+                         store_value_offsets.begin() /* map */,
+                         store_value_offsets.begin() /* stencil */,
+                         get_dataframe_buffer_begin(store_values_),
+                         not_equal_t<size_t>{std::numeric_limits<size_t>::max()});
+    }
+  }
 
+  template <typename KeyIterator, typename ValueIterator, typename StencilIterator, typename PredOp>
+  void insert_if(KeyIterator key_first,
+                 KeyIterator key_last,
+                 ValueIterator value_first,
+                 StencilIterator stencil_first,
+                 PredOp pred_op,
+                 rmm::cuda_stream_view stream)
+  {
+    auto num_keys = static_cast<size_t>(thrust::distance(key_first, key_last));
+    if (num_keys == 0) return;
+
+    if constexpr (std::is_arithmetic_v<value_t>) {
       auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first));
-      cuco_store_->insert(pair_first,
-                          pair_first + num_keys,
-                          cuco::detail::MurmurHash3_32<key_t>{},
-                          thrust::equal_to<key_t>{},
-                          stream);
+      size_ += cuco_store_->insert_if(
+        pair_first, pair_first + num_keys, stencil_first, pred_op, stream.value());
     } else {
-      cuco_store_ = std::make_unique<cuco_store_type>(
-        cuco_size,
-        cuco::sentinel::empty_key<key_t>{invalid_key},
-        cuco::sentinel::empty_value<size_t>{std::numeric_limits<size_t>::max()},
-        stream_adapter,
-        stream);
-      store_values_  = allocate_dataframe_buffer<value_t>(num_keys, stream);
-      invalid_value_ = invalid_value;
+      auto old_store_value_size = size_dataframe_buffer(store_values_);
+      // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this
+      // requires placing the atomic variable on managed memory and this adds additional
+      // complication.
+      rmm::device_scalar<size_t> counter(old_store_value_size, stream);
+      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
+      thrust::tabulate(rmm::exec_policy(stream),
+                       store_value_offsets.begin(),
+                       store_value_offsets.end(),
+                       kv_cuco_insert_if_and_increment_t<KeyIterator, StencilIterator, PredOp>{
+                         mutable_device_ref,
+                         key_first,
+                         stencil_first,
+                         pred_op,
+                         counter.data(),
+                         std::numeric_limits<size_t>::max()});
+      size_ += counter.value(stream);
+      resize_dataframe_buffer(store_values_, size_, stream);
+      thrust::scatter_if(rmm::exec_policy(stream),
+                         value_first,
+                         value_first + num_keys,
+                         store_value_offsets.begin() /* map */,
+                         store_value_offsets.begin() /* stencil */,
+                         get_dataframe_buffer_begin(store_values_),
+                         not_equal_t<size_t>{std::numeric_limits<size_t>::max()});
+    }
+  }
+
+  template <typename KeyIterator, typename ValueIterator>
+  void insert_and_assign(KeyIterator key_first,
+                         KeyIterator key_last,
+                         ValueIterator value_first,
+                         rmm::cuda_stream_view stream)
+  {
+    auto num_keys = static_cast<size_t>(thrust::distance(key_first, key_last));
+    if (num_keys == 0) return;
 
+    if constexpr (std::is_arithmetic_v<value_t>) {
+      auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first));
+      // FIXME: a temporary solution till insert_and_assign is added to
+      // cuco::experimental::static_map
+      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      thrust::for_each(rmm::exec_policy(stream),
+                       pair_first,
+                       pair_first + num_keys,
+                       detail::kv_cuco_insert_and_assign_t<key_t, value_t>{mutable_device_ref});
+      // FIXME: this is an upper bound of size_, as some inserts may fail due to existing keys
+      size_ += num_keys;
+    } else {
+      auto old_store_value_size = size_dataframe_buffer(store_values_);
+      // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this
+      // requires placing the atomic variable on managed memory and this adds additional
+      // complication.
+      rmm::device_scalar<size_t> counter(old_store_value_size, stream);
+      auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find);
+      rmm::device_uvector<size_t> store_value_offsets(num_keys, stream);
+      thrust::tabulate(
+        rmm::exec_policy(stream),
+        store_value_offsets.begin(),
+        store_value_offsets.end(),
+        kv_cuco_insert_and_increment_t<KeyIterator>{
+          mutable_device_ref, key_first, counter.data(), std::numeric_limits<size_t>::max()});
+      size_ += counter.value(stream);
+      resize_dataframe_buffer(store_values_, size_, stream);
+      thrust::scatter_if(rmm::exec_policy(stream),
+                         value_first,
+                         value_first + num_keys,
+                         store_value_offsets.begin() /* map */,
+                         store_value_offsets.begin() /* stencil */,
+                         get_dataframe_buffer_begin(store_values_),
+                         not_equal_t<size_t>{std::numeric_limits<size_t>::max()});
+
+      // now perform assigns (for k,v pairs that failed to insert)
+
+      rmm::device_uvector<size_t> kv_indices(num_keys, stream);
+      thrust::sequence(rmm::exec_policy(), kv_indices.begin(), kv_indices.end(), size_t{0});
       auto pair_first = thrust::make_zip_iterator(
-        thrust::make_tuple(key_first, thrust::make_counting_iterator(size_t{0})));
-      cuco_store_->insert(pair_first,
-                          pair_first + num_keys,
-                          cuco::detail::MurmurHash3_32<key_t>{},
-                          thrust::equal_to<key_t>{},
-                          stream);
-      thrust::copy(rmm::exec_policy(stream),
-                   value_first,
-                   value_first + num_keys,
-                   get_dataframe_buffer_begin(store_values_));
+        thrust::make_tuple(store_value_offsets.begin(), kv_indices.begin()));
+      kv_indices.resize(
+        thrust::distance(
+          pair_first,
+          thrust::remove_if(rmm::exec_policy(stream),
+                            pair_first,
+                            pair_first + num_keys,
+                            [invalid_idx = std::numeric_limits<size_t>::max()] __device__(
+                              auto pair) { return thrust::get<0>(pair) != invalid_idx; })),
+        stream);
+      store_value_offsets.resize(0, stream);
+      store_value_offsets.shrink_to_fit(stream);
+
+      thrust::sort(rmm::exec_policy(stream),
+                   kv_indices.begin(),
+                   kv_indices.end(),
+                   [key_first] __device__(auto lhs, auto rhs) {
+                     return *(key_first + lhs) < *(key_first + rhs);
+                   });
+      kv_indices.resize(thrust::distance(kv_indices.begin(),
+                                         thrust::unique(rmm::exec_policy(stream),
+                                                        kv_indices.begin(),
+                                                        kv_indices.end(),
+                                                        [key_first] __device__(auto lhs, auto rhs) {
+                                                          return *(key_first + lhs) ==
+                                                                 *(key_first + rhs);
+                                                        })),
+                        stream);
+
+      thrust::for_each(
+        rmm::exec_policy(stream),
+        kv_indices.begin(),
+        kv_indices.end(),
+        [key_first,
+         value_first,
+         store_value_first = get_dataframe_buffer_begin(store_values_),
+         device_ref        = cuco_store_->ref(cuco::experimental::find)] __device__(auto kv_idx) {
+          size_t store_value_offset{};
+          auto found = device_ref.find(*(key_first + kv_idx));
+          assert(found != device_ref.end());
+          store_value_offset                        = (*found).second;
+          *(store_value_first + store_value_offset) = *(value_first + kv_idx);
+        });
     }
   }
 
+  auto retrieve_all(rmm::cuda_stream_view stream)
+  {
+    rmm::device_uvector<key_t> keys(size_, stream);
+    auto values = allocate_dataframe_buffer<value_t>(0, stream);
+    if constexpr (std::is_arithmetic_v<value_t>) {
+      values.resize(size_, stream);
+      auto pair_last = cuco_store_->retrieve_all(keys.begin(), values.begin(), stream.value());
+      // FIXME: this resize (& shrink_to_fit) shouldn't be necessary if size_ is exact
+      keys.resize(thrust::distance(keys.begin(), std::get<0>(pair_last)), stream);
+      values.resize(keys.size(), stream);
+    } else {
+      rmm::device_uvector<size_t> indices(size_, stream);
+      auto pair_last = cuco_store_->retrieve_all(keys.begin(), indices.begin(), stream.value());
+      // FIXME: this resize (& shrink_to_fit) shouldn't be necessary if size_ is exact
+      keys.resize(thrust::distance(keys.begin(), std::get<0>(pair_last)), stream);
+      indices.resize(keys.size(), stream);
+      resize_dataframe_buffer(values, keys.size(), stream);
+      thrust::gather(rmm::exec_policy(stream),
+                     indices.begin(),
+                     indices.end(),
+                     get_dataframe_buffer_begin(store_values_),
+                     get_dataframe_buffer_begin(values));
+    }
+    return std::make_tuple(std::move(keys), std::move(values));
+  }
+
+  auto release(rmm::cuda_stream_view stream)
+  {
+    auto [retrieved_keys, retrieved_values] = retrieve_all(stream);
+    allocate(0, invalid_key(), invalid_value(), stream);
+    capacity_ = 0;
+    size_     = 0;
+    return std::make_tuple(std::move(retrieved_keys), std::move(retrieved_values));
+  }
+
   cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); }
 
   template <typename type = value_t>
@@ -490,18 +787,60 @@ class kv_cuco_store_t {
     return get_dataframe_buffer_cbegin(store_values_);
   }
 
-  key_t invalid_key() const { return cuco_store_.get_empty_key_sentinel(); }
+  key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); }
 
   value_t invalid_value() const
   {
     if constexpr (std::is_arithmetic_v<value_t>) {
-      return cuco_store_.get_empty_value_sentinel();
+      return cuco_store_->empty_value_sentinel();
     } else {
       return invalid_value_;
     }
   }
 
+  // FIXME: currently this returns an upper-bound
+  size_t size() const { return size_; }
+
+  size_t capacity() const { return capacity_; }
+
  private:
+  void allocate(size_t num_keys,
+                key_t invalid_key,
+                value_t invalid_value,
+                rmm::cuda_stream_view stream)
+  {
+    double constexpr load_factor = 0.7;
+    auto cuco_size               = std::max(
+      static_cast<size_t>(static_cast<double>(num_keys) / load_factor),
+      static_cast<size_t>(num_keys) + 1);  // cuco::static_map requires at least one empty slot
+
+    auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(
+      rmm::mr::polymorphic_allocator<std::byte>(rmm::mr::get_current_device_resource()), stream);
+    if constexpr (std::is_arithmetic_v<value_t>) {
+      cuco_store_ = std::make_unique<cuco_store_type>(
+        cuco_size,
+        cuco::sentinel::empty_key<key_t>{invalid_key},
+        cuco::sentinel::empty_value<value_t>{invalid_value},
+        thrust::equal_to<key_t>{},
+        cuco::experimental::linear_probing<1,  // CG size
+                                           cuco::murmurhash3_32<key_t>>{},
+        stream_adapter,
+        stream.value());
+    } else {
+      cuco_store_ = std::make_unique<cuco_store_type>(
+        cuco_size,
+        cuco::sentinel::empty_key<key_t>{invalid_key},
+        cuco::sentinel::empty_value<size_t>{std::numeric_limits<size_t>::max()},
+        thrust::equal_to<key_t>{},
+        cuco::experimental::linear_probing<1,  // CG size
+                                           cuco::murmurhash3_32<key_t>>{},
+        stream_adapter,
+        stream);
+      store_values_ = allocate_dataframe_buffer<value_t>(0, stream);
+      reserve_dataframe_buffer(store_values_, num_keys, stream);
+    }
+  }
+
   std::unique_ptr<cuco_store_type> cuco_store_{nullptr};
   std::conditional_t<!std::is_arithmetic_v<value_t>,
                      decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{})),
@@ -510,6 +849,12 @@ class kv_cuco_store_t {
 
   std::conditional_t<!std::is_arithmetic_v<value_t>, value_t, std::byte /* dummy */>
     invalid_value_{};
+  size_t capacity_{0};
+  size_t size_{
+    0};  // caching as cuco_store_->size() is expensive (this scans the entire slots to handle
+         // user inserts through a device reference (and currently this is an upper bound (this
+         // will become exact once we fully switch to cuco::experimental::static_map and use the
+         // static_map class's insert_and_assign function; this function will be added soon)
 };
 
 }  // namespace detail
@@ -528,6 +873,23 @@ class kv_store_t {
 
   kv_store_t(rmm::cuda_stream_view stream) : store_(stream) {}
 
+  /* when use_binary_search = false */
+  template <bool binary_search = use_binary_search>
+  kv_store_t(
+    size_t capacity /* one can expect good performance till the capacity, the actual underlying
+                       capacity can be larger (for performance & correctness reasons) */
+    ,
+    key_t invalid_key /* invalid key shouldn't appear in any *iter in [key_first, key_last) */,
+    value_t invalid_value /* invalid_value shouldn't appear in any *iter in [value_first,
+                             value_first + thrust::distance(key_first, key_last)), invalid_value is
+                             returned when match fails for the given key */
+    ,
+    rmm::cuda_stream_view stream,
+    std::enable_if_t<!binary_search, int32_t> = 0)
+    : store_(capacity, invalid_key, invalid_value, stream)
+  {
+  }
+
   /* when use_binary_search = true */
   template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
   kv_store_t(
@@ -576,6 +938,47 @@ class kv_store_t {
   {
   }
 
+  /* when use binary_search = false, this requires that the capacity is large enough */
+  template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
+  std::enable_if_t<!binary_search, void> insert(KeyIterator key_first,
+                                                KeyIterator key_last,
+                                                ValueIterator value_first,
+                                                rmm::cuda_stream_view stream)
+  {
+    store_.insert(key_first, key_last, value_first, stream);
+  }
+
+  /* when use binary_search = false, this requires that the capacity is large enough */
+  template <typename KeyIterator,
+            typename ValueIterator,
+            typename StencilIterator,
+            typename PredOp,
+            bool binary_search = use_binary_search>
+  std::enable_if_t<!binary_search, void> insert_if(KeyIterator key_first,
+                                                   KeyIterator key_last,
+                                                   ValueIterator value_first,
+                                                   StencilIterator stencil_first,
+                                                   PredOp pred_op,
+                                                   rmm::cuda_stream_view stream)
+  {
+    store_.insert_if(key_first, key_last, value_first, stencil_first, pred_op, stream);
+  }
+
+  /* when use binary_search = false, this requires that the capacity is large enough */
+  template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
+  std::enable_if_t<!binary_search, void> insert_and_assign(KeyIterator key_first,
+                                                           KeyIterator key_last,
+                                                           ValueIterator value_first,
+                                                           rmm::cuda_stream_view stream)
+  {
+    store_.insert_and_assign(key_first, key_last, value_first, stream);
+  }
+
+  auto retrieve_all(rmm::cuda_stream_view stream) const { return store_.retrieve_all(stream); }
+
+  // kv_store_t becomes empty after release
+  auto release(rmm::cuda_stream_view stream) { return store_.release(stream); }
+
   auto view() const
   {
     if constexpr (use_binary_search) {
@@ -593,6 +996,18 @@ class kv_store_t {
     }
   }
 
+  template <bool binary_search = use_binary_search>
+  std::enable_if_t<!binary_search, key_t> invalid_key() const
+  {
+    return store_.invalid_key();
+  }
+
+  value_t invalid_value() const { return store_.invalid_value(); }
+
+  size_t size() const { return store_.size(); }
+
+  size_t capacity() const { return store_.capacity(); }
+
  private:
   std::conditional_t<use_binary_search,
                      detail::kv_binary_search_store_t<key_t, value_t>,
diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
index f30de0750e3..d69bb8af25e 100644
--- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
+++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
@@ -346,7 +346,7 @@ void per_v_pair_transform_dst_nbr_intersection(
       // partition? This may provide additional performance improvement opportunities???
       auto chunk_vertex_pair_first = thrust::make_transform_iterator(
         chunk_vertex_pair_index_first,
-        detail::indirection_t<VertexPairIterator>{vertex_pair_first});
+        detail::indirection_t<size_t, VertexPairIterator>{vertex_pair_first});
       auto [intersection_offsets, intersection_indices] =
         detail::nbr_intersection(handle,
                                  graph_view,
diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
index 69cce08d352..d7c094a2361 100644
--- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh
@@ -399,11 +399,12 @@ rmm::device_uvector<edge_t> get_sampling_index_without_replacement(
         if (retry_segment_indices) {
           retry_degrees =
             rmm::device_uvector<edge_t>((*retry_segment_indices).size(), handle.get_stream());
-          thrust::transform(handle.get_thrust_policy(),
-                            (*retry_segment_indices).begin(),
-                            (*retry_segment_indices).end(),
-                            (*retry_degrees).begin(),
-                            indirection_t<decltype(segment_degree_first)>{segment_degree_first});
+          thrust::transform(
+            handle.get_thrust_policy(),
+            (*retry_segment_indices).begin(),
+            (*retry_segment_indices).end(),
+            (*retry_degrees).begin(),
+            indirection_t<size_t, decltype(segment_degree_first)>{segment_degree_first});
           retry_sample_nbr_indices = rmm::device_uvector<edge_t>(
             (*retry_segment_indices).size() * high_partition_over_sampling_K, handle.get_stream());
           retry_sample_indices = rmm::device_uvector<int32_t>(
diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
index a4d34443413..2e19adc34c4 100644
--- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
@@ -756,7 +756,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         : thrust::nullopt;
     std::conditional_t<KVStoreViewType::binary_search,
                        detail::kv_binary_search_store_device_view_t<KVStoreViewType>,
-                       detail::kv_cuco_store_device_view_t<KVStoreViewType>>
+                       detail::kv_cuco_store_find_device_view_t<KVStoreViewType>>
       dst_key_value_map_device_view(
         GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view);
     thrust::transform(handle.get_thrust_policy(),
diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
index 4823c1febf4..b5cfdf4b16b 100644
--- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
+++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh
@@ -293,9 +293,11 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v(
                                                   GraphViewType::is_multi_gpu>(handle,
                                                                                edge_partition,
                                                                                std::nullopt,
+                                                                               std::nullopt,
                                                                                majors.data(),
                                                                                minors.data(),
                                                                                std::nullopt,
+                                                                               std::nullopt,
                                                                                segment_offsets);
 
     auto vertex_pair_first =
diff --git a/cpp/src/sampling/neighborhood.cu b/cpp/src/sampling/neighborhood.cu
index 0c0beb8d8b0..2f7b203a319 100644
--- a/cpp/src/sampling/neighborhood.cu
+++ b/cpp/src/sampling/neighborhood.cu
@@ -22,6 +22,8 @@
 
 #include <raft/random/rng_state.hpp>
 
+#include <type_traits>
+
 namespace cugraph {
 
 template <typename vertex_t, typename edge_t>
@@ -34,14 +36,19 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle,
                                 size_t sampling_size,
                                 ops::graph::SamplingAlgoT sampling_algo)
 {
-  const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view);
-  return ops::graph::uniform_sample_csr(rng_state,
+  using base_vertex_t = std::decay_t<vertex_t>;
+  using base_edge_t   = std::decay_t<edge_t>;
+  static_assert(std::is_same_v<base_vertex_t, base_edge_t>,
+                "cugraph-ops sampling not yet implemented for different node and edge types");
+
+  const auto ops_graph = detail::get_graph(graph_view);
+  return ops::graph::uniform_sample_csc(rng_state,
                                         ops_graph,
                                         ptr_d_start,
                                         num_start_vertices,
                                         sampling_size,
                                         sampling_algo,
-                                        max_degree,
+                                        ops_graph.dst_max_in_degree,
                                         handle.get_stream());
 }
 
@@ -55,14 +62,19 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> sample_
   size_t sampling_size,
   ops::graph::SamplingAlgoT sampling_algo)
 {
-  const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view);
+  using base_vertex_t = std::decay_t<vertex_t>;
+  using base_edge_t   = std::decay_t<edge_t>;
+  static_assert(std::is_same_v<base_vertex_t, base_edge_t>,
+                "cugraph-ops sampling not yet implemented for different node and edge types");
+
+  const auto ops_graph = detail::get_graph(graph_view);
   return ops::graph::uniform_sample_coo(rng_state,
                                         ops_graph,
                                         ptr_d_start,
                                         num_start_vertices,
                                         sampling_size,
                                         sampling_algo,
-                                        max_degree,
+                                        ops_graph.dst_max_in_degree,
                                         handle.get_stream());
 }
 
diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh
index 6dacbee2fb1..b8dc28d563e 100644
--- a/cpp/src/structure/coarsen_graph_impl.cuh
+++ b/cpp/src/structure/coarsen_graph_impl.cuh
@@ -168,9 +168,12 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist(
     handle,
     edge_partition,
     edge_partition_weight_view,
+    std::optional<detail::edge_partition_edge_property_device_view_t<edge_t, edge_t const*>>{
+      std::nullopt},
     edgelist_majors.data(),
     edgelist_minors.data(),
     edgelist_weights ? std::optional<weight_t*>{(*edgelist_weights).data()} : std::nullopt,
+    std::optional<edge_t*>{std::nullopt},
     segment_offsets);
 
   auto pair_first =
diff --git a/cpp/src/structure/decompress_to_edgelist_impl.cuh b/cpp/src/structure/decompress_to_edgelist_impl.cuh
index fb0ffdb96c1..d653307c620 100644
--- a/cpp/src/structure/decompress_to_edgelist_impl.cuh
+++ b/cpp/src/structure/decompress_to_edgelist_impl.cuh
@@ -52,11 +52,13 @@ template <typename vertex_t,
 std::enable_if_t<multi_gpu,
                  std::tuple<rmm::device_uvector<vertex_t>,
                             rmm::device_uvector<vertex_t>,
-                            std::optional<rmm::device_uvector<weight_t>>>>
+                            std::optional<rmm::device_uvector<weight_t>>,
+                            std::optional<rmm::device_uvector<edge_t>>>>
 decompress_to_edgelist_impl(
   raft::handle_t const& handle,
   graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
   std::optional<raft::device_span<vertex_t const>> renumber_map,
   bool do_expensive_check)
 {
@@ -86,6 +88,9 @@ decompress_to_edgelist_impl(
 
   rmm::device_uvector<vertex_t> edgelist_majors(number_of_local_edges, handle.get_stream());
   rmm::device_uvector<vertex_t> edgelist_minors(edgelist_majors.size(), handle.get_stream());
+  auto edgelist_ids     = edge_id_view ? std::make_optional<rmm::device_uvector<edge_t>>(
+                                       edgelist_majors.size(), handle.get_stream())
+                                       : std::nullopt;
   auto edgelist_weights = edge_weight_view ? std::make_optional<rmm::device_uvector<weight_t>>(
                                                edgelist_majors.size(), handle.get_stream())
                                            : std::nullopt;
@@ -101,10 +106,15 @@ decompress_to_edgelist_impl(
             detail::edge_partition_edge_property_device_view_t<edge_t, weight_t const*>>(
             (*edge_weight_view), i)
         : std::nullopt,
+      edge_id_view ? std::make_optional<
+                       detail::edge_partition_edge_property_device_view_t<edge_t, edge_t const*>>(
+                       (*edge_id_view), i)
+                   : std::nullopt,
       edgelist_majors.data() + cur_size,
       edgelist_minors.data() + cur_size,
       edgelist_weights ? std::optional<weight_t*>{(*edgelist_weights).data() + cur_size}
                        : std::nullopt,
+      edgelist_ids ? std::optional<edge_t*>{(*edgelist_ids).data() + cur_size} : std::nullopt,
       graph_view.local_edge_partition_segment_offsets(i));
     cur_size += edgelist_edge_counts[i];
   }
@@ -131,16 +141,34 @@ decompress_to_edgelist_impl(
       major_ptrs[i] = edgelist_majors.data() + cur_size;
       minor_ptrs[i] = edgelist_minors.data() + cur_size;
       if (edgelist_weights) {
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            minor_ptrs[i],
-                            minor_ptrs[i] + edgelist_edge_counts[i],
-                            thrust::make_zip_iterator(thrust::make_tuple(
-                              major_ptrs[i], (*edgelist_weights).data() + cur_size)));
+        if (edgelist_ids) {
+          thrust::sort_by_key(
+            handle.get_thrust_policy(),
+            minor_ptrs[i],
+            minor_ptrs[i] + edgelist_edge_counts[i],
+            thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i],
+                                                         (*edgelist_ids).data() + cur_size,
+                                                         (*edgelist_weights).data() + cur_size)));
+        } else {
+          thrust::sort_by_key(handle.get_thrust_policy(),
+                              minor_ptrs[i],
+                              minor_ptrs[i] + edgelist_edge_counts[i],
+                              thrust::make_zip_iterator(thrust::make_tuple(
+                                major_ptrs[i], (*edgelist_weights).data() + cur_size)));
+        }
       } else {
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            minor_ptrs[i],
-                            minor_ptrs[i] + edgelist_edge_counts[i],
-                            major_ptrs[i]);
+        if (edgelist_ids) {
+          thrust::sort_by_key(handle.get_thrust_policy(),
+                              minor_ptrs[i],
+                              minor_ptrs[i] + edgelist_edge_counts[i],
+                              thrust::make_zip_iterator(thrust::make_tuple(
+                                major_ptrs[i], (*edgelist_ids).data() + cur_size)));
+        } else {
+          thrust::sort_by_key(handle.get_thrust_policy(),
+                              minor_ptrs[i],
+                              minor_ptrs[i] + edgelist_edge_counts[i],
+                              major_ptrs[i]);
+        }
       }
       rmm::device_uvector<size_t> d_segment_offsets(d_thresholds.size(), handle.get_stream());
       thrust::lower_bound(handle.get_thrust_policy(),
@@ -172,7 +200,8 @@ decompress_to_edgelist_impl(
 
   return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors),
                          store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors),
-                         std::move(edgelist_weights));
+                         std::move(edgelist_weights),
+                         std::move(edgelist_ids));
 }
 
 template <typename vertex_t,
@@ -183,11 +212,13 @@ template <typename vertex_t,
 std::enable_if_t<!multi_gpu,
                  std::tuple<rmm::device_uvector<vertex_t>,
                             rmm::device_uvector<vertex_t>,
-                            std::optional<rmm::device_uvector<weight_t>>>>
+                            std::optional<rmm::device_uvector<weight_t>>,
+                            std::optional<rmm::device_uvector<edge_t>>>>
 decompress_to_edgelist_impl(
   raft::handle_t const& handle,
   graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
   std::optional<raft::device_span<vertex_t const>> renumber_map,
   bool do_expensive_check)
 {
@@ -206,6 +237,9 @@ decompress_to_edgelist_impl(
   auto edgelist_weights = edge_weight_view ? std::make_optional<rmm::device_uvector<weight_t>>(
                                                edgelist_majors.size(), handle.get_stream())
                                            : std::nullopt;
+  auto edgelist_ids     = edge_id_view ? std::make_optional<rmm::device_uvector<edge_t>>(
+                                       edgelist_majors.size(), handle.get_stream())
+                                       : std::nullopt;
   detail::decompress_edge_partition_to_edgelist(
     handle,
     edge_partition_device_view_t<vertex_t, edge_t, multi_gpu>(
@@ -215,9 +249,14 @@ decompress_to_edgelist_impl(
           detail::edge_partition_edge_property_device_view_t<edge_t, weight_t const*>>(
           (*edge_weight_view), 0)
       : std::nullopt,
+    edge_id_view ? std::make_optional<
+                     detail::edge_partition_edge_property_device_view_t<edge_t, edge_t const*>>(
+                     (*edge_id_view), 0)
+                 : std::nullopt,
     edgelist_majors.data(),
     edgelist_minors.data(),
     edgelist_weights ? std::optional<weight_t*>{(*edgelist_weights).data()} : std::nullopt,
+    edgelist_ids ? std::optional<edge_t*>{(*edgelist_ids).data()} : std::nullopt,
     graph_view.local_edge_partition_segment_offsets());
 
   if (renumber_map) {
@@ -232,7 +271,8 @@ decompress_to_edgelist_impl(
 
   return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors),
                          store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors),
-                         std::move(edgelist_weights));
+                         std::move(edgelist_weights),
+                         std::move(edgelist_ids));
 }
 
 }  // namespace
@@ -244,18 +284,20 @@ template <typename vertex_t,
           bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>,
            rmm::device_uvector<vertex_t>,
-           std::optional<rmm::device_uvector<weight_t>>>
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>>
 decompress_to_edgelist(
   raft::handle_t const& handle,
   graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
   std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
   std::optional<raft::device_span<vertex_t const>> renumber_map,
   bool do_expensive_check)
 {
   CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");
 
   return decompress_to_edgelist_impl(
-    handle, graph_view, edge_weight_view, renumber_map, do_expensive_check);
+    handle, graph_view, edge_weight_view, edge_id_view, renumber_map, do_expensive_check);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/structure/decompress_to_edgelist_mg.cu b/cpp/src/structure/decompress_to_edgelist_mg.cu
index 9f03570504b..fbe56ca9b04 100644
--- a/cpp/src/structure/decompress_to_edgelist_mg.cu
+++ b/cpp/src/structure/decompress_to_edgelist_mg.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,121 +21,145 @@ namespace cugraph {
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, float, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, float, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, double, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, double, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, float, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, float, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, double, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, double, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, float, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, float, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, double, false, true>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, false, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, double, true, true>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, true, true> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
diff --git a/cpp/src/structure/decompress_to_edgelist_sg.cu b/cpp/src/structure/decompress_to_edgelist_sg.cu
index 296f39fdfd2..5b8e410e087 100644
--- a/cpp/src/structure/decompress_to_edgelist_sg.cu
+++ b/cpp/src/structure/decompress_to_edgelist_sg.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,121 +21,145 @@ namespace cugraph {
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, float, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, float, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, double, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>>
 decompress_to_edgelist<int32_t, int32_t, double, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int32_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, float, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, float, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, double, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int32_t, int64_t, double, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int32_t, int64_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int32_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, float, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>>
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, float, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, double, false, false>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, false, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>>
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>>
 decompress_to_edgelist<int64_t, int64_t, double, true, false>(
   raft::handle_t const& handle,
   graph_view_t<int64_t, int64_t, true, false> const& graph_view,
   std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
   std::optional<raft::device_span<int64_t const>> renumber_map,
   bool do_expensive_check);
 
diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh
index c4c34733a4d..192120e6b4c 100644
--- a/cpp/src/structure/relabel_impl.cuh
+++ b/cpp/src/structure/relabel_impl.cuh
@@ -142,7 +142,7 @@ void relabel(raft::handle_t const& handle,
           handle.get_stream());
 
         if (skip_missing_labels) {
-          auto device_view = detail::kv_cuco_store_device_view_t(relabel_map_view);
+          auto device_view = detail::kv_cuco_store_find_device_view_t(relabel_map_view);
           thrust::transform(
             handle.get_thrust_policy(),
             rx_unique_old_labels.begin(),
@@ -187,7 +187,7 @@ void relabel(raft::handle_t const& handle,
       handle.get_stream());
     auto relabel_map_view = relabel_map.view();
     if (skip_missing_labels) {
-      auto device_view = detail::kv_cuco_store_device_view_t(relabel_map_view);
+      auto device_view = detail::kv_cuco_store_find_device_view_t(relabel_map_view);
       thrust::transform(
         handle.get_thrust_policy(),
         labels,
diff --git a/cpp/src/structure/symmetrize_graph_impl.cuh b/cpp/src/structure/symmetrize_graph_impl.cuh
index 4afa4122a06..3334e089ba3 100644
--- a/cpp/src/structure/symmetrize_graph_impl.cuh
+++ b/cpp/src/structure/symmetrize_graph_impl.cuh
@@ -73,12 +73,17 @@ symmetrize_graph_impl(
 
   auto is_multigraph = graph.is_multigraph();
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                           (*renumber_map).size()));
   graph = graph_t<vertex_t, edge_t, store_transposed, multi_gpu>(handle);
@@ -158,12 +163,17 @@ symmetrize_graph_impl(
   auto is_multigraph      = graph.is_multigraph();
   bool renumber           = renumber_map.has_value();
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     renumber_map ? std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                                          (*renumber_map).size())
                  : std::nullopt);
diff --git a/cpp/src/structure/transpose_graph_impl.cuh b/cpp/src/structure/transpose_graph_impl.cuh
index c2609362b0b..5b418a15478 100644
--- a/cpp/src/structure/transpose_graph_impl.cuh
+++ b/cpp/src/structure/transpose_graph_impl.cuh
@@ -74,12 +74,17 @@ transpose_graph_impl(
 
   auto is_multigraph = graph.is_multigraph();
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                           (*renumber_map).size()));
   graph = graph_t<vertex_t, edge_t, store_transposed, multi_gpu>(handle);
@@ -165,12 +170,17 @@ transpose_graph_impl(
   auto is_multigraph      = graph.is_multigraph();
   bool renumber           = renumber_map.has_value();
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     renumber_map ? std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                                          (*renumber_map).size())
                  : std::nullopt);
diff --git a/cpp/src/structure/transpose_graph_storage_impl.cuh b/cpp/src/structure/transpose_graph_storage_impl.cuh
index b34d2f67dcd..980c9b10c53 100644
--- a/cpp/src/structure/transpose_graph_storage_impl.cuh
+++ b/cpp/src/structure/transpose_graph_storage_impl.cuh
@@ -74,12 +74,17 @@ transpose_graph_storage_impl(
   // FIXME: if is_symmetric is true we can do this more efficiently,
   //        since the graph contents should be exactly the same
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                           (*renumber_map).size()));
   graph = graph_t<vertex_t, edge_t, store_transposed, multi_gpu>(handle);
@@ -170,12 +175,17 @@ transpose_graph_storage_impl(
   // FIXME: if is_symmetric is true we can do this more efficiently,
   //        since the graph contents should be exactly the same
 
-  auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist(
+  rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> edgelist_dsts(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> edgelist_weights{std::nullopt};
+
+  std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist(
     handle,
     graph_view,
     edge_weights
       ? std::optional<edge_property_view_t<edge_t, weight_t const*>>{(*edge_weights).view()}
       : std::nullopt,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
     renumber_map ? std::make_optional<raft::device_span<vertex_t const>>((*renumber_map).data(),
                                                                          (*renumber_map).size())
                  : std::nullopt);
diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp
index 1dbe930e4c9..9aea4183866 100644
--- a/cpp/src/utilities/cugraph_ops_utils.hpp
+++ b/cpp/src/utilities/cugraph_ops_utils.hpp
@@ -20,18 +20,20 @@
 
 #include <cugraph-ops/graph/format.hpp>
 
-#include <tuple>
-
 namespace cugraph {
 namespace detail {
 
 template <typename NodeTypeT, typename EdgeTypeT>
-ops::graph::fg_csr<EdgeTypeT> get_graph(
+ops::graph::csc<EdgeTypeT, NodeTypeT> get_graph(
   graph_view_t<NodeTypeT, EdgeTypeT, false, false> const& gview)
 {
-  ops::graph::fg_csr<EdgeTypeT> graph;
-  graph.n_nodes   = gview.number_of_vertices();
-  graph.n_indices = gview.number_of_edges();
+  ops::graph::csc<EdgeTypeT, NodeTypeT> graph;
+  graph.n_src_nodes = gview.number_of_vertices();
+  graph.n_dst_nodes = gview.number_of_vertices();
+  graph.n_indices   = gview.number_of_edges();
+  // FIXME this is sufficient for now, but if there is a fast (cached) way
+  // of getting max degree, use that instead
+  graph.dst_max_in_degree = std::numeric_limits<EdgeTypeT>::max();
   // FIXME: this is evil and is just temporary until we have a matching type in cugraph-ops
   // or we change the type accepted by the functions calling into cugraph-ops
   graph.offsets = const_cast<EdgeTypeT*>(gview.local_edge_partition_view().offsets().data());
@@ -39,15 +41,5 @@ ops::graph::fg_csr<EdgeTypeT> get_graph(
   return graph;
 }
 
-template <typename NodeTypeT, typename EdgeTypeT>
-std::tuple<ops::graph::fg_csr<EdgeTypeT>, NodeTypeT> get_graph_and_max_degree(
-  graph_view_t<NodeTypeT, EdgeTypeT, false, false> const& gview)
-{
-  // FIXME this is sufficient for now, but if there is a fast (cached) way
-  // of getting max degree, use that instead
-  auto max_degree = std::numeric_limits<NodeTypeT>::max();
-  return std::make_tuple(get_graph(gview), max_degree);
-}
-
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 7d4a2181af1..3bcd5546455 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -606,6 +606,7 @@ if(BUILD_CUGRAPH_MG_TESTS)
     ConfigureCTestMG(MG_CAPI_KATZ_TEST c_api/mg_katz_test.c c_api/mg_test_utils.cpp)
     ConfigureCTestMG(MG_CAPI_EIGENVECTOR_CENTRALITY_TEST c_api/mg_eigenvector_centrality_test.c c_api/mg_test_utils.cpp)
     ConfigureCTestMG(MG_CAPI_BETWEENNESS_CENTRALITY_TEST c_api/mg_betweenness_centrality_test.c c_api/mg_test_utils.cpp)
+    ConfigureCTestMG(MG_CAPI_EDGE_BETWEENNESS_CENTRALITY_TEST c_api/mg_edge_betweenness_centrality_test.c c_api/mg_test_utils.cpp)
     ConfigureCTestMG(MG_CAPI_HITS_TEST c_api/mg_hits_test.c c_api/mg_test_utils.cpp)
     ConfigureCTestMG(MG_CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/mg_uniform_neighbor_sample_test.c c_api/mg_test_utils.cpp)
     ConfigureCTestMG(MG_CAPI_RANDOM_WALKS_TEST c_api/mg_random_walks_test.c c_api/mg_test_utils.cpp)
@@ -654,6 +655,7 @@ ConfigureCTest(CAPI_PAGERANK_TEST c_api/pagerank_test.c)
 ConfigureCTest(CAPI_KATZ_TEST c_api/katz_test.c)
 ConfigureCTest(CAPI_EIGENVECTOR_CENTRALITY_TEST c_api/eigenvector_centrality_test.c)
 ConfigureCTest(CAPI_BETWEENNESS_CENTRALITY_TEST c_api/betweenness_centrality_test.c)
+ConfigureCTest(CAPI_EDGE_BETWEENNESS_CENTRALITY_TEST c_api/edge_betweenness_centrality_test.c)
 ConfigureCTest(CAPI_HITS_TEST c_api/hits_test.c)
 ConfigureCTest(CAPI_BFS_TEST c_api/bfs_test.c)
 ConfigureCTest(CAPI_SSSP_TEST c_api/sssp_test.c)
diff --git a/cpp/tests/c_api/edge_betweenness_centrality.c b/cpp/tests/c_api/edge_betweenness_centrality_test.c
similarity index 51%
rename from cpp/tests/c_api/edge_betweenness_centrality.c
rename to cpp/tests/c_api/edge_betweenness_centrality_test.c
index 7a56f90eac7..ab119288fab 100644
--- a/cpp/tests/c_api/edge_betweenness_centrality.c
+++ b/cpp/tests/c_api/edge_betweenness_centrality_test.c
@@ -29,9 +29,11 @@ typedef float weight_t;
 int generic_edge_betweenness_centrality_test(vertex_t* h_src,
                                              vertex_t* h_dst,
                                              weight_t* h_wgt,
+                                             vertex_t* h_seeds,
                                              weight_t* h_result,
                                              size_t num_vertices,
                                              size_t num_edges,
+                                             size_t num_seeds,
                                              bool_t store_transposed,
                                              size_t num_vertices_to_sample)
 {
@@ -40,64 +42,102 @@ int generic_edge_betweenness_centrality_test(vertex_t* h_src,
   cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
   cugraph_error_t* ret_error;
 
-  cugraph_resource_handle_t* p_handle   = NULL;
-  cugraph_graph_t* p_graph              = NULL;
-  cugraph_centrality_result_t* p_result = NULL;
-  cugraph_rng_state_t* rng_state        = NULL;
+  cugraph_resource_handle_t* handle                   = NULL;
+  cugraph_graph_t* graph                              = NULL;
+  cugraph_edge_centrality_result_t* result            = NULL;
+  cugraph_rng_state_t* rng_state                      = NULL;
+  cugraph_type_erased_device_array_t* seeds           = NULL;
+  cugraph_type_erased_device_array_view_t* seeds_view = NULL;
 
-  p_handle = cugraph_create_resource_handle(NULL);
-  TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed.");
+  handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed.");
 
-  ret_code = cugraph_rng_state_create(p_handle, 0, &rng_state, &ret_error);
+  ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "failed to create rng_state.");
 
-  ret_code = create_test_graph(p_handle,
+  ret_code = create_test_graph(handle,
                                h_src,
                                h_dst,
                                h_wgt,
                                num_edges,
-                               rng_state,
                                store_transposed,
                                FALSE,
                                FALSE,
-                               &p_graph,
+                               &graph,
                                &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed.");
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
+  if (h_seeds == NULL) {
+    ret_code = cugraph_select_random_vertices(
+      handle, graph, rng_state, num_vertices_to_sample, &seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "select random seeds failed.");
+
+    seeds_view = cugraph_type_erased_device_array_view(seeds);
+  } else {
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, num_seeds, INT32, &seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed.");
+
+    seeds_view = cugraph_type_erased_device_array_view(seeds);
+    ret_code   = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, seeds_view, (byte_t*)h_seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds copy_from_host failed.");
+  }
+
   ret_code = cugraph_edge_betweenness_centrality(
-    p_handle, p_graph, num_vertices_to_sample, NULL, FALSE, FALSE, &p_result, &ret_error);
+    handle, graph, seeds_view, FALSE, FALSE, &result, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(
     test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_edge_betweenness_centrality failed.");
 
-  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* srcs;
+  cugraph_type_erased_device_array_view_t* dsts;
   cugraph_type_erased_device_array_view_t* centralities;
 
-  vertices     = cugraph_centrality_result_get_vertices(p_result);
-  centralities = cugraph_centrality_result_get_values(p_result);
+  srcs         = cugraph_edge_centrality_result_get_src_vertices(result);
+  dsts         = cugraph_edge_centrality_result_get_dst_vertices(result);
+  centralities = cugraph_edge_centrality_result_get_values(result);
+
+  size_t num_local_edges = cugraph_type_erased_device_array_view_size(srcs);
 
-  vertex_t h_vertices[num_vertices];
-  weight_t h_centralities[num_vertices];
+  vertex_t h_cugraph_src[num_local_edges];
+  vertex_t h_cugraph_dst[num_local_edges];
+  weight_t h_centralities[num_local_edges];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_cugraph_src, srcs , &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
-    p_handle, (byte_t*)h_vertices, vertices, &ret_error);
+    handle, (byte_t*)h_cugraph_dst, dsts, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
-    p_handle, (byte_t*)h_centralities, centralities, &ret_error);
+    handle, (byte_t*)h_centralities, centralities, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
-  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+  weight_t M[num_vertices][num_vertices];
+
+  for (int i = 0; i < num_vertices; ++i)
+    for (int j = 0; j < num_vertices; ++j) {
+      M[i][j]         = 0.0;
+    }
+
+  for (int i = 0; i < num_edges; ++i) {
+    M[h_src[i]][h_dst[i]] = h_result[i];
+  }
+
+  for (int i = 0; (i < num_local_edges) && (test_ret_value == 0); ++i) {
     TEST_ASSERT(test_ret_value,
-                nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.001),
-                "centralities results don't match");
+                nearlyEqual(M[h_cugraph_src[i]][h_cugraph_dst[i]], h_centralities[i], 0.001),
+                "betweenness centrality results don't match");
   }
 
-  cugraph_centrality_result_free(p_result);
-  cugraph_sg_graph_free(p_graph);
-  cugraph_free_resource_handle(p_handle);
+  cugraph_edge_centrality_result_free(result);
+  cugraph_sg_graph_free(graph);
+  cugraph_free_resource_handle(handle);
   cugraph_error_free(ret_error);
 
   return test_ret_value;
@@ -112,14 +152,14 @@ int test_edge_betweenness_centrality()
   vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4};
   weight_t h_wgt[] = {
     0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
-  weight_t h_result[] = {0.236325, 0.292055, 0.458457, 0.60533, 0.190498, 0.495942};
+  weight_t h_result[] = { 0, 2, 3, 1.83333, 2, 2, 3, 2, 3.16667, 2.83333, 4.33333, 0, 2, 2.83333, 3.66667, 2.33333 };
 
   double epsilon        = 1e-6;
   size_t max_iterations = 200;
 
   // Eigenvector centrality wants store_transposed = TRUE
   return generic_edge_betweenness_centrality_test(
-    h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, 5);
+    h_src, h_dst, h_wgt, NULL, h_result, num_vertices, num_edges, 0, TRUE, 5);
 }
 
 /******************************************************************************/
diff --git a/cpp/tests/c_api/mg_edge_betweenness_centrality.c b/cpp/tests/c_api/mg_edge_betweenness_centrality_test.c
similarity index 54%
rename from cpp/tests/c_api/mg_edge_betweenness_centrality.c
rename to cpp/tests/c_api/mg_edge_betweenness_centrality_test.c
index 17ce717dcfe..13f0085be84 100644
--- a/cpp/tests/c_api/mg_edge_betweenness_centrality.c
+++ b/cpp/tests/c_api/mg_edge_betweenness_centrality_test.c
@@ -29,9 +29,11 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha
                                              vertex_t* h_src,
                                              vertex_t* h_dst,
                                              weight_t* h_wgt,
+                                             vertex_t* h_seeds,
                                              weight_t* h_result,
                                              size_t num_vertices,
                                              size_t num_edges,
+                                             size_t num_seeds,
                                              bool_t store_transposed,
                                              size_t num_vertices_to_sample)
 {
@@ -40,16 +42,43 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha
   cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
   cugraph_error_t* ret_error;
 
-  cugraph_graph_t* p_graph              = NULL;
-  cugraph_centrality_result_t* p_result = NULL;
+  cugraph_graph_t* graph                              = NULL;
+  cugraph_edge_centrality_result_t* result            = NULL;
+  cugraph_rng_state_t* rng_state                      = NULL;
+  cugraph_type_erased_device_array_t* seeds           = NULL;
+  cugraph_type_erased_device_array_view_t* seeds_view = NULL;
 
   ret_code = create_mg_test_graph(
-    handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error);
+    handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &graph, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed.");
 
+  int rank = cugraph_resource_handle_get_rank(handle);
+
+  ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "failed to create rng_state.");
+
+  if (h_seeds == NULL) {
+    ret_code = cugraph_select_random_vertices(
+      handle, graph, rng_state, num_vertices_to_sample, &seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "select random seeds failed.");
+
+    seeds_view = cugraph_type_erased_device_array_view(seeds);
+  } else {
+    if (rank > 0) num_seeds = 0;
+
+    ret_code =
+      cugraph_type_erased_device_array_create(handle, num_seeds, INT32, &seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed.");
+
+    seeds_view = cugraph_type_erased_device_array_view(seeds);
+    ret_code   = cugraph_type_erased_device_array_view_copy_from_host(
+      handle, seeds_view, (byte_t*)h_seeds, &ret_error);
+    TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds copy_from_host failed.");
+  }
+
   ret_code = cugraph_edge_betweenness_centrality(
-    handle, p_graph, num_vertices_to_sample, NULL, FALSE, FALSE, &p_result, &ret_error);
+    handle, graph, seeds_view, FALSE, FALSE, &result, &ret_error);
   TEST_ASSERT(
     test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_edge_betweenness_centrality failed.");
 
@@ -57,33 +86,51 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha
   //       the returned values with the expected results for the entire
   //       graph.  Each GPU will have a subset of the total vertices, so
   //       they will do a subset of the comparisons.
-  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* srcs;
+  cugraph_type_erased_device_array_view_t* dsts;
   cugraph_type_erased_device_array_view_t* centralities;
 
-  vertices     = cugraph_centrality_result_get_vertices(p_result);
-  centralities = cugraph_centrality_result_get_values(p_result);
+  srcs         = cugraph_edge_centrality_result_get_src_vertices(result);
+  dsts         = cugraph_edge_centrality_result_get_dst_vertices(result);
+  centralities = cugraph_edge_centrality_result_get_values(result);
+
+  size_t num_local_edges = cugraph_type_erased_device_array_view_size(srcs);
 
-  vertex_t h_vertices[num_vertices];
-  weight_t h_centralities[num_vertices];
+  vertex_t h_cugraph_src[num_local_edges];
+  vertex_t h_cugraph_dst[num_local_edges];
+  weight_t h_centralities[num_local_edges];
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
-    handle, (byte_t*)h_vertices, vertices, &ret_error);
+    handle, (byte_t*)h_cugraph_src, srcs , &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_cugraph_dst, dsts, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
     handle, (byte_t*)h_centralities, centralities, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
-  size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices);
+  weight_t M[num_vertices][num_vertices];
+
+  for (int i = 0; i < num_vertices; ++i)
+    for (int j = 0; j < num_vertices; ++j) {
+      M[i][j]         = 0.0;
+    }
 
-  for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) {
+  for (int i = 0; i < num_edges; ++i) {
+    M[h_src[i]][h_dst[i]] = h_result[i];
+  }
+
+  for (int i = 0; (i < num_local_edges) && (test_ret_value == 0); ++i) {
     TEST_ASSERT(test_ret_value,
-                nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.001),
+                nearlyEqual(M[h_cugraph_src[i]][h_cugraph_dst[i]], h_centralities[i], 0.001),
                 "betweenness centrality results don't match");
   }
 
-  cugraph_centrality_result_free(p_result);
-  cugraph_mg_graph_free(p_graph);
+  cugraph_edge_centrality_result_free(result);
+  cugraph_mg_graph_free(graph);
   cugraph_error_free(ret_error);
 
   return test_ret_value;
@@ -98,14 +145,16 @@ int test_edge_betweenness_centrality(const cugraph_resource_handle_t* handle)
   vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4};
   weight_t h_wgt[] = {
     0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
-  weight_t h_result[] = {0.236374, 0.292046, 0.458369, 0.605472, 0.190544, 0.495814};
+  weight_t h_result[] = { 3.16667, 2.83333, 4.33333, 1.83333, 2, 2.83333, 3.66667, 2.33333,
+                          3.16667, 2.83333, 4.33333, 1.83333, 2, 2.83333, 3.66667, 2.33333 };
+
 
   double epsilon        = 1e-6;
   size_t max_iterations = 200;
 
   // Eigenvector centrality wants store_transposed = TRUE
   return generic_edge_betweenness_centrality_test(
-    handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, 5);
+    handle, h_src, h_dst, h_wgt, NULL, h_result, num_vertices, num_edges, 0, TRUE, 6);
 }
 
 /******************************************************************************/
diff --git a/cpp/tests/c_api/mg_pagerank_test.c b/cpp/tests/c_api/mg_pagerank_test.c
index 09925b9ac4e..9c142236808 100644
--- a/cpp/tests/c_api/mg_pagerank_test.c
+++ b/cpp/tests/c_api/mg_pagerank_test.c
@@ -100,6 +100,81 @@ int generic_pagerank_test(const cugraph_resource_handle_t* handle,
   return test_ret_value;
 }
 
+int generic_pagerank_nonconverging_test(const cugraph_resource_handle_t* handle,
+                                        vertex_t* h_src,
+                                        vertex_t* h_dst,
+                                        weight_t* h_wgt,
+                                        weight_t* h_result,
+                                        size_t num_vertices,
+                                        size_t num_edges,
+                                        bool_t store_transposed,
+                                        double alpha,
+                                        double epsilon,
+                                        size_t max_iterations)
+{
+  int test_ret_value = 0;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+
+  cugraph_graph_t* p_graph              = NULL;
+  cugraph_centrality_result_t* p_result = NULL;
+
+  ret_code = create_mg_test_graph(
+    handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error);
+
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed.");
+
+  ret_code = cugraph_pagerank_allow_nonconvergence(handle,
+                                                   p_graph,
+                                                   NULL,
+                                                   NULL,
+                                                   NULL,
+                                                   NULL,
+                                                   alpha,
+                                                   epsilon,
+                                                   max_iterations,
+                                                   FALSE,
+                                                   &p_result,
+                                                   &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+
+  // NOTE: Because we get back vertex ids and pageranks, we can simply compare
+  //       the returned values with the expected results for the entire
+  //       graph.  Each GPU will have a subset of the total vertices, so
+  //       they will do a subset of the comparisons.
+  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  vertices  = cugraph_centrality_result_get_vertices(p_result);
+  pageranks = cugraph_centrality_result_get_values(p_result);
+
+  size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices);
+
+  vertex_t h_vertices[num_local_vertices];
+  weight_t h_pageranks[num_local_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_vertices, vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(p_result);
+  cugraph_mg_graph_free(p_graph);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
 int generic_personalized_pagerank_test(const cugraph_resource_handle_t* handle,
                                        vertex_t* h_src,
                                        vertex_t* h_dst,
@@ -209,6 +284,115 @@ int generic_personalized_pagerank_test(const cugraph_resource_handle_t* handle,
   return test_ret_value;
 }
 
+int generic_personalized_pagerank_nonconverging_test(const cugraph_resource_handle_t* handle,
+                                                     vertex_t* h_src,
+                                                     vertex_t* h_dst,
+                                                     weight_t* h_wgt,
+                                                     weight_t* h_result,
+                                                     vertex_t* h_personalization_vertices,
+                                                     weight_t* h_personalization_values,
+                                                     size_t num_vertices,
+                                                     size_t num_edges,
+                                                     size_t num_personalization_vertices,
+                                                     bool_t store_transposed,
+                                                     double alpha,
+                                                     double epsilon,
+                                                     size_t max_iterations)
+{
+  int test_ret_value = 0;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+
+  cugraph_graph_t* p_graph                                               = NULL;
+  cugraph_centrality_result_t* p_result                                  = NULL;
+  cugraph_type_erased_device_array_t* personalization_vertices           = NULL;
+  cugraph_type_erased_device_array_t* personalization_values             = NULL;
+  cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL;
+  cugraph_type_erased_device_array_view_t* personalization_values_view   = NULL;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  ret_code = create_mg_test_graph(
+    handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error);
+
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  if (cugraph_resource_handle_get_rank(handle) != 0) { num_personalization_vertices = 0; }
+
+  ret_code = cugraph_type_erased_device_array_create(
+    handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed.");
+
+  ret_code = cugraph_type_erased_device_array_create(
+    handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed.");
+
+  personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices);
+  personalization_values_view   = cugraph_type_erased_device_array_view(personalization_values);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed.");
+
+  ret_code = cugraph_personalized_pagerank_allow_nonconvergence(handle,
+                                                                p_graph,
+                                                                NULL,
+                                                                NULL,
+                                                                NULL,
+                                                                NULL,
+                                                                personalization_vertices_view,
+                                                                personalization_values_view,
+                                                                alpha,
+                                                                epsilon,
+                                                                max_iterations,
+                                                                FALSE,
+                                                                &p_result,
+                                                                &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed.");
+
+  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  vertices  = cugraph_centrality_result_get_vertices(p_result);
+  pageranks = cugraph_centrality_result_get_values(p_result);
+
+  size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices);
+
+  vertex_t h_vertices[num_local_vertices];
+  weight_t h_pageranks[num_local_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_vertices, vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(p_result);
+  cugraph_mg_graph_free(p_graph);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
 int test_pagerank(const cugraph_resource_handle_t* handle)
 {
   size_t num_edges    = 8;
@@ -323,6 +507,34 @@ int test_pagerank_4_with_transpose(const cugraph_resource_handle_t* handle)
                                max_iterations);
 }
 
+int test_pagerank_non_convergence(const cugraph_resource_handle_t* handle)
+{
+  size_t num_edges    = 8;
+  size_t num_vertices = 6;
+
+  vertex_t h_src[]    = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[]    = {1, 3, 4, 0, 1, 3, 5, 5};
+  weight_t h_wgt[]    = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+  weight_t h_result[] = {0.0776471, 0.167637, 0.0639699, 0.220202, 0.140046, 0.330498};
+
+  double alpha          = 0.95;
+  double epsilon        = 0.0001;
+  size_t max_iterations = 2;
+
+  // Pagerank wants store_transposed = TRUE
+  return generic_pagerank_nonconverging_test(handle,
+                                             h_src,
+                                             h_dst,
+                                             h_wgt,
+                                             h_result,
+                                             num_vertices,
+                                             num_edges,
+                                             TRUE,
+                                             alpha,
+                                             epsilon,
+                                             max_iterations);
+}
+
 int test_personalized_pagerank(const cugraph_resource_handle_t* handle)
 {
   size_t num_edges    = 3;
@@ -356,6 +568,40 @@ int test_personalized_pagerank(const cugraph_resource_handle_t* handle)
                                             max_iterations);
 }
 
+int test_personalized_pagerank_non_convergence(const cugraph_resource_handle_t* handle)
+{
+  size_t num_edges    = 3;
+  size_t num_vertices = 4;
+
+  vertex_t h_src[]    = {0, 1, 2};
+  vertex_t h_dst[]    = {1, 2, 3};
+  weight_t h_wgt[]    = {1.f, 1.f, 1.f};
+  weight_t h_result[] = { 0.03625, 0.285, 0.32125, 0.3575 };
+
+
+  vertex_t h_personalized_vertices[] = {0, 1, 2, 3};
+  weight_t h_personalized_values[]   = {0.1, 0.2, 0.3, 0.4};
+
+  double alpha          = 0.85;
+  double epsilon        = 1.0e-6;
+  size_t max_iterations = 1;
+
+  return  generic_personalized_pagerank_nonconverging_test(handle,
+                                                           h_src,
+                                                           h_dst,
+                                                           h_wgt,
+                                                           h_result,
+                                                           h_personalized_vertices,
+                                                           h_personalized_values,
+                                                           num_vertices,
+                                                           num_edges,
+                                                           num_vertices,
+                                                           FALSE,
+                                                           alpha,
+                                                           epsilon,
+                                                           max_iterations);
+}
+
 /******************************************************************************/
 
 int main(int argc, char** argv)
@@ -368,7 +614,9 @@ int main(int argc, char** argv)
   result |= RUN_MG_TEST(test_pagerank_with_transpose, handle);
   result |= RUN_MG_TEST(test_pagerank_4, handle);
   result |= RUN_MG_TEST(test_pagerank_4_with_transpose, handle);
+  result |= RUN_MG_TEST(test_pagerank_non_convergence, handle);
   result |= RUN_MG_TEST(test_personalized_pagerank, handle);
+  result |= RUN_MG_TEST(test_personalized_pagerank_non_convergence, handle);
 
   cugraph_free_resource_handle(handle);
   free_mg_raft_handle(raft_handle);
diff --git a/cpp/tests/c_api/pagerank_test.c b/cpp/tests/c_api/pagerank_test.c
index 048750da06c..e12021cd16d 100644
--- a/cpp/tests/c_api/pagerank_test.c
+++ b/cpp/tests/c_api/pagerank_test.c
@@ -67,6 +67,82 @@ int generic_pagerank_test(vertex_t* h_src,
                               &p_result,
                               &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  vertices  = cugraph_centrality_result_get_vertices(p_result);
+  pageranks = cugraph_centrality_result_get_values(p_result);
+
+  vertex_t h_vertices[num_vertices];
+  weight_t h_pageranks[num_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_vertices, vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(p_result);
+  cugraph_sg_graph_free(p_graph);
+  cugraph_free_resource_handle(p_handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
+int generic_pagerank_nonconverging_test(vertex_t* h_src,
+                                        vertex_t* h_dst,
+                                        weight_t* h_wgt,
+                                        weight_t* h_result,
+                                        size_t num_vertices,
+                                        size_t num_edges,
+                                        bool_t store_transposed,
+                                        double alpha,
+                                        double epsilon,
+                                        size_t max_iterations)
+{
+  int test_ret_value = 0;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+
+  cugraph_resource_handle_t* p_handle   = NULL;
+  cugraph_graph_t* p_graph              = NULL;
+  cugraph_centrality_result_t* p_result = NULL;
+
+  p_handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed.");
+
+  ret_code = create_test_graph(
+    p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error);
+
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code = cugraph_pagerank_allow_nonconvergence(p_handle,
+                                                   p_graph,
+                                                   NULL,
+                                                   NULL,
+                                                   NULL,
+                                                   NULL,
+                                                   alpha,
+                                                   epsilon,
+                                                   max_iterations,
+                                                   FALSE,
+                                                   &p_result,
+                                                   &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
   cugraph_type_erased_device_array_view_t* vertices;
   cugraph_type_erased_device_array_view_t* pageranks;
@@ -208,6 +284,115 @@ int generic_personalized_pagerank_test(vertex_t* h_src,
   return test_ret_value;
 }
 
+int generic_personalized_pagerank_nonconverging_test(vertex_t* h_src,
+                                                     vertex_t* h_dst,
+                                                     weight_t* h_wgt,
+                                                     weight_t* h_result,
+                                                     vertex_t* h_personalization_vertices,
+                                                     weight_t* h_personalization_values,
+                                                     size_t num_vertices,
+                                                     size_t num_edges,
+                                                     size_t num_personalization_vertices,
+                                                     bool_t store_transposed,
+                                                     double alpha,
+                                                     double epsilon,
+                                                     size_t max_iterations)
+{
+  int test_ret_value = 0;
+
+  cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+  cugraph_error_t* ret_error;
+
+  cugraph_resource_handle_t* p_handle                                    = NULL;
+  cugraph_graph_t* p_graph                                               = NULL;
+  cugraph_centrality_result_t* p_result                                  = NULL;
+  cugraph_type_erased_device_array_t* personalization_vertices           = NULL;
+  cugraph_type_erased_device_array_t* personalization_values             = NULL;
+  cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL;
+  cugraph_type_erased_device_array_view_t* personalization_values_view   = NULL;
+
+  data_type_id_t vertex_tid = INT32;
+  data_type_id_t weight_tid = FLOAT32;
+
+  p_handle = cugraph_create_resource_handle(NULL);
+  TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed.");
+
+  ret_code = create_test_graph(
+    p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error);
+
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code = cugraph_type_erased_device_array_create(
+    p_handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed.");
+
+  ret_code = cugraph_type_erased_device_array_create(
+    p_handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed.");
+
+  personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices);
+  personalization_values_view   = cugraph_type_erased_device_array_view(personalization_values);
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    p_handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_from_host(
+    p_handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error);
+  TEST_ASSERT(
+    test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed.");
+
+  ret_code = cugraph_personalized_pagerank_allow_nonconvergence(p_handle,
+                                                                p_graph,
+                                                                NULL,
+                                                                NULL,
+                                                                NULL,
+                                                                NULL,
+                                                                personalization_vertices_view,
+                                                                personalization_values_view,
+                                                                alpha,
+                                                                epsilon,
+                                                                max_iterations,
+                                                                FALSE,
+                                                                &p_result,
+                                                                &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed.");
+
+  cugraph_type_erased_device_array_view_t* vertices;
+  cugraph_type_erased_device_array_view_t* pageranks;
+
+  vertices  = cugraph_centrality_result_get_vertices(p_result);
+  pageranks = cugraph_centrality_result_get_values(p_result);
+
+  vertex_t h_vertices[num_vertices];
+  weight_t h_pageranks[num_vertices];
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_vertices, vertices, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    p_handle, (byte_t*)h_pageranks, pageranks, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+  for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
+    TEST_ASSERT(test_ret_value,
+                nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001),
+                "pagerank results don't match");
+  }
+
+  cugraph_centrality_result_free(p_result);
+  cugraph_sg_graph_free(p_graph);
+  cugraph_free_resource_handle(p_handle);
+  cugraph_error_free(ret_error);
+
+  return test_ret_value;
+}
+
 int test_pagerank()
 {
   size_t num_edges    = 8;
@@ -286,6 +471,25 @@ int test_pagerank_4_with_transpose()
     h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, alpha, epsilon, max_iterations);
 }
 
+int test_pagerank_non_convergence()
+{
+  size_t num_edges    = 8;
+  size_t num_vertices = 6;
+
+  vertex_t h_src[]    = {0, 1, 1, 2, 2, 2, 3, 4};
+  vertex_t h_dst[]    = {1, 3, 4, 0, 1, 3, 5, 5};
+  weight_t h_wgt[]    = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+  weight_t h_result[] = {0.0776471, 0.167637, 0.0639699, 0.220202, 0.140046, 0.330498};
+
+  double alpha          = 0.95;
+  double epsilon        = 0.0001;
+  size_t max_iterations = 2;
+
+  // Pagerank wants store_transposed = TRUE
+  return generic_pagerank_nonconverging_test(
+    h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, alpha, epsilon, max_iterations);
+}
+
 int test_personalized_pagerank()
 {
   size_t num_edges    = 3;
@@ -318,6 +522,39 @@ int test_personalized_pagerank()
                                             max_iterations);
 }
 
+int test_personalized_pagerank_non_convergence()
+{
+  size_t num_edges    = 3;
+  size_t num_vertices = 4;
+
+  vertex_t h_src[]    = {0, 1, 2};
+  vertex_t h_dst[]    = {1, 2, 3};
+  weight_t h_wgt[]    = {1.f, 1.f, 1.f};
+  weight_t h_result[] = { 0.03625, 0.285, 0.32125, 0.3575 };
+
+
+  vertex_t h_personalized_vertices[] = {0, 1, 2, 3};
+  weight_t h_personalized_values[]   = {0.1, 0.2, 0.3, 0.4};
+
+  double alpha          = 0.85;
+  double epsilon        = 1.0e-6;
+  size_t max_iterations = 1;
+
+  return  generic_personalized_pagerank_nonconverging_test(h_src,
+                                                           h_dst,
+                                                           h_wgt,
+                                                           h_result,
+                                                           h_personalized_vertices,
+                                                           h_personalized_values,
+                                                           num_vertices,
+                                                           num_edges,
+                                                           num_vertices,
+                                                           FALSE,
+                                                           alpha,
+                                                           epsilon,
+                                                           max_iterations);
+}
+
 /******************************************************************************/
 
 int main(int argc, char** argv)
@@ -327,6 +564,8 @@ int main(int argc, char** argv)
   result |= RUN_TEST(test_pagerank_with_transpose);
   result |= RUN_TEST(test_pagerank_4);
   result |= RUN_TEST(test_pagerank_4_with_transpose);
+  result |= RUN_TEST(test_pagerank_non_convergence);
   result |= RUN_TEST(test_personalized_pagerank);
+  result |= RUN_TEST(test_personalized_pagerank_non_convergence);
   return result;
 }
diff --git a/cpp/tests/centrality/betweenness_centrality_reference.hpp b/cpp/tests/centrality/betweenness_centrality_reference.hpp
index 9a86de934c3..3c60020265a 100644
--- a/cpp/tests/centrality/betweenness_centrality_reference.hpp
+++ b/cpp/tests/centrality/betweenness_centrality_reference.hpp
@@ -33,7 +33,7 @@ void ref_bfs(std::vector<edge_t> const& offsets,
              std::queue<vertex_t>& Q,
              std::stack<vertex_t>& S,
              std::vector<vertex_t>& dist,
-             std::vector<std::vector<vertex_t>>& pred,
+             std::vector<std::vector<std::pair<vertex_t, edge_t>>>& pred,
              std::vector<double>& sigmas,
              vertex_t source)
 {
@@ -61,7 +61,7 @@ void ref_bfs(std::vector<edge_t> const& offsets,
       // Edge(v, w) on  a shortest path?
       if (dist[nbr] == dist[v] + 1) {
         sigmas[nbr] += sigmas[v];
-        pred[nbr].push_back(v);
+        pred[nbr].push_back(std::make_pair(v, nbr_idx));
       }
     }
   }
@@ -70,7 +70,7 @@ void ref_bfs(std::vector<edge_t> const& offsets,
 template <typename vertex_t, typename edge_t, typename weight_t>
 void ref_accumulation(std::vector<weight_t>& result,
                       std::stack<vertex_t>& S,
-                      std::vector<std::vector<vertex_t>>& pred,
+                      std::vector<std::vector<std::pair<vertex_t, edge_t>>>& pred,
                       std::vector<double>& sigmas,
                       std::vector<double>& deltas,
                       vertex_t source)
@@ -80,8 +80,8 @@ void ref_accumulation(std::vector<weight_t>& result,
   while (!S.empty()) {
     vertex_t w = S.top();
     S.pop();
-    for (vertex_t v : pred[w]) {
-      deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]);
+    for (auto v : pred[w]) {
+      deltas[v.first] += (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]);
     }
     if (w != source) { result[w] += deltas[w]; }
   }
@@ -90,7 +90,7 @@ void ref_accumulation(std::vector<weight_t>& result,
 template <typename vertex_t, typename edge_t, typename weight_t>
 void ref_endpoints_accumulation(std::vector<weight_t>& result,
                                 std::stack<vertex_t>& S,
-                                std::vector<std::vector<vertex_t>>& pred,
+                                std::vector<std::vector<std::pair<vertex_t, edge_t>>>& pred,
                                 std::vector<double>& sigmas,
                                 std::vector<double>& deltas,
                                 vertex_t source)
@@ -101,17 +101,19 @@ void ref_endpoints_accumulation(std::vector<weight_t>& result,
   while (!S.empty()) {
     vertex_t w = S.top();
     S.pop();
-    for (vertex_t v : pred[w]) {
-      deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]);
+    for (auto v : pred[w]) {
+      deltas[v.first] += (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]);
     }
     if (w != source) { result[w] += deltas[w] + 1; }
   }
 }
 
-template <typename vertex_t, typename weight_t>
+template <typename vertex_t, typename edge_t, typename weight_t>
 void ref_edge_accumulation(std::vector<weight_t>& result,
+                           std::vector<edge_t> const& offsets,
+                           std::vector<vertex_t> const& indices,
                            std::stack<vertex_t>& S,
-                           std::vector<std::vector<vertex_t>>& pred,
+                           std::vector<std::vector<std::pair<vertex_t, edge_t>>>& pred,
                            std::vector<double>& sigmas,
                            std::vector<double>& deltas,
                            vertex_t source)
@@ -120,10 +122,12 @@ void ref_edge_accumulation(std::vector<weight_t>& result,
   while (!S.empty()) {
     vertex_t w = S.top();
     S.pop();
-    for (vertex_t v : pred[w]) {
-      deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]);
+    for (auto v : pred[w]) {
+      double coefficient = (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]);
+
+      deltas[v.first] += coefficient;
+      result[v.second] += coefficient;
     }
-    if (w != source) { result[w] += deltas[w]; }
   }
 }
 
@@ -181,7 +185,7 @@ std::vector<weight_t> betweenness_centrality_reference(
     std::stack<vertex_t> S;
 
     std::vector<vertex_t> dist(result.size());
-    std::vector<std::vector<vertex_t>> pred(result.size());
+    std::vector<std::vector<std::pair<vertex_t, edge_t>>> pred(result.size());
     std::vector<double> sigmas(result.size());
     std::vector<double> deltas(result.size());
 
@@ -220,14 +224,14 @@ std::vector<weight_t> edge_betweenness_centrality_reference(
     std::stack<vertex_t> S;
 
     std::vector<vertex_t> dist(offsets.size() - 1);
-    std::vector<std::vector<vertex_t>> pred(offsets.size() - 1);
+    std::vector<std::vector<std::pair<vertex_t, edge_t>>> pred(result.size());
     std::vector<double> sigmas(offsets.size() - 1);
     std::vector<double> deltas(offsets.size() - 1);
 
     for (vertex_t s : seeds) {
       ref_bfs(offsets, indices, Q, S, dist, pred, sigmas, s);
 
-      ref_edge_accumulation(result, S, pred, sigmas, deltas, s);
+      ref_edge_accumulation(result, offsets, indices, S, pred, sigmas, deltas, s);
     }
   }
   return result;
diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
index bb223067e1c..e4d22ff069c 100644
--- a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
+++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp
@@ -99,7 +99,6 @@ class Tests_EdgeBetweennessCentrality
       hr_timer.start("Edge betweenness centrality");
     }
 
-#if 0
     auto d_centralities = cugraph::edge_betweenness_centrality(
       handle,
       graph_view,
@@ -108,17 +107,6 @@ class Tests_EdgeBetweennessCentrality
         raft::device_span<vertex_t const>{d_seeds.data(), d_seeds.size()}),
       betweenness_usecase.normalized,
       do_expensive_check);
-#else
-    EXPECT_THROW(cugraph::edge_betweenness_centrality(
-                   handle,
-                   graph_view,
-                   edge_weight_view,
-                   std::make_optional<raft::device_span<vertex_t const>>(
-                     raft::device_span<vertex_t const>{d_seeds.data(), d_seeds.size()}),
-                   betweenness_usecase.normalized,
-                   do_expensive_check),
-                 cugraph::logic_error);
-#endif
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -127,24 +115,34 @@ class Tests_EdgeBetweennessCentrality
     }
 
     if (betweenness_usecase.check_correctness) {
-#if 0
-      auto [h_offsets, h_indices, h_wgt] = cugraph::test::graph_to_host_csr(handle, graph_view, edge_weight_view);
+      // Compute reference edge betweenness result
+      auto [h_offsets, h_indices, h_wgt] =
+        cugraph::test::graph_to_host_csr(handle, graph_view, edge_weight_view);
 
-      auto h_seeds        = cugraph::test::to_host(handle, d_seeds);
+      auto h_seeds = cugraph::test::to_host(handle, d_seeds);
 
       auto h_reference_centralities =
-        betweenness_centrality_reference(h_offsets, h_indices, h_wgt, h_seeds, betweenness_usecase.include_endpoints);
+        edge_betweenness_centrality_reference(h_offsets, h_indices, h_wgt, h_seeds);
+
+      rmm::device_uvector<vertex_t> d_reference_src_vertex_ids(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> d_reference_dst_vertex_ids(0, handle.get_stream());
+
+      std::tie(d_reference_src_vertex_ids, d_reference_dst_vertex_ids, std::ignore) =
+        cugraph::test::graph_to_device_coo(handle, graph_view, edge_weight_view);
 
       auto d_reference_centralities = cugraph::test::to_device(handle, h_reference_centralities);
 
-      //  Need to get edges in order...
+      auto [d_cugraph_src_vertex_ids, d_cugraph_dst_vertex_ids, d_cugraph_results] =
+        cugraph::test::graph_to_device_coo(
+          handle, graph_view, std::make_optional(d_centralities.view()));
 
       cugraph::test::edge_betweenness_centrality_validate(handle,
-                                                          d_renumber_map_labels,
-                                                          d_centralities,
-                                                          std::nullopt,
+                                                          d_cugraph_src_vertex_ids,
+                                                          d_cugraph_dst_vertex_ids,
+                                                          *d_cugraph_results,
+                                                          d_reference_src_vertex_ids,
+                                                          d_reference_dst_vertex_ids,
                                                           d_reference_centralities);
-#endif
     }
   }
 };
@@ -188,7 +186,6 @@ INSTANTIATE_TEST_SUITE_P(
                       EdgeBetweennessCentrality_Usecase{20, false, true, true}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
                       cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
-                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
                       cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
 
 INSTANTIATE_TEST_SUITE_P(
diff --git a/cpp/tests/centrality/eigenvector_centrality_test.cpp b/cpp/tests/centrality/eigenvector_centrality_test.cpp
index f3408d9b131..7cafcfbde85 100644
--- a/cpp/tests/centrality/eigenvector_centrality_test.cpp
+++ b/cpp/tests/centrality/eigenvector_centrality_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -167,10 +167,15 @@ class Tests_EigenvectorCentrality
     }
 
     if (eigenvector_usecase.check_correctness) {
-      auto [dst_v, src_v, opt_wgt_v] = cugraph::decompress_to_edgelist(
+      rmm::device_uvector<vertex_t> dst_v(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> src_v(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> opt_wgt_v{std::nullopt};
+
+      std::tie(dst_v, src_v, opt_wgt_v, std::ignore) = cugraph::decompress_to_edgelist(
         handle,
         graph_view,
         edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
       auto h_src     = cugraph::test::to_host(handle, src_v);
diff --git a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp
index a1e73b6147b..ebc49e4a3e4 100644
--- a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp
+++ b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp
@@ -57,6 +57,8 @@ class Tests_MGEdgeBetweennessCentrality
   template <typename vertex_t, typename edge_t, typename weight_t>
   void run_current_test(std::tuple<EdgeBetweennessCentrality_Usecase, input_usecase_t> const& param)
   {
+    constexpr bool do_expensive_check = false;
+
     auto [betweenness_usecase, input_usecase] = param;
 
     HighResTimer hr_timer{};
@@ -83,7 +85,7 @@ class Tests_MGEdgeBetweennessCentrality
       mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
 
     raft::random::RngState rng_state(handle_->get_comms().get_rank());
-    auto d_seeds = cugraph::select_random_vertices(
+    auto d_mg_seeds = cugraph::select_random_vertices(
       *handle_,
       mg_graph_view,
       std::optional<raft::device_span<vertex_t const>>{std::nullopt},
@@ -98,24 +100,13 @@ class Tests_MGEdgeBetweennessCentrality
       hr_timer.start("MG edge betweenness centrality");
     }
 
-#if 0
     auto d_centralities = cugraph::edge_betweenness_centrality(
       *handle_,
       mg_graph_view,
       mg_edge_weight_view,
       std::make_optional<raft::device_span<vertex_t const>>(
-        raft::device_span<vertex_t const>{d_seeds.data(), d_seeds.size()}),
+        raft::device_span<vertex_t const>{d_mg_seeds.data(), d_mg_seeds.size()}),
       betweenness_usecase.normalized);
-#else
-    EXPECT_THROW(cugraph::edge_betweenness_centrality(
-                   *handle_,
-                   mg_graph_view,
-                   mg_edge_weight_view,
-                   std::make_optional<raft::device_span<vertex_t const>>(
-                     raft::device_span<vertex_t const>{d_seeds.data(), d_seeds.size()}),
-                   betweenness_usecase.normalized),
-                 cugraph::logic_error);
-#endif
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -125,22 +116,52 @@ class Tests_MGEdgeBetweennessCentrality
     }
 
     if (betweenness_usecase.check_correctness) {
-#if 0
-      d_centralities = cugraph::test::device_gatherv(
-        *handle_, raft::device_span<weight_t const>(d_centralities.data(), d_centralities.size()));
-      d_seeds = cugraph::test::device_gatherv(
-        *handle_, raft::device_span<vertex_t const>(d_seeds.data(), d_seeds.size()));
-
-      auto [h_src, h_dst, h_wgt] = cugraph::test::graph_to_host_coo(*handle_, graph_view);
-
-      if (h_src.size() > 0) {
-        auto h_centralities = cugraph::test::to_host(*handle_, d_centralities);
-        auto h_seeds        = cugraph::test::to_host(*handle_, d_seeds);
-
-        cugraph::test::edge_betweenness_centrality_validate(
-          h_src, h_dst, h_wgt, h_centralities, h_seeds);
+      // Extract MG results
+      auto [d_cugraph_src_vertex_ids, d_cugraph_dst_vertex_ids, d_cugraph_results] =
+        cugraph::test::graph_to_device_coo(
+          *handle_, mg_graph_view, std::make_optional(d_centralities.view()));
+
+      // Create SG graph so we can generate SG results
+      cugraph::graph_t<vertex_t, edge_t, false, false> sg_graph(*handle_);
+      std::optional<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, false, false>, weight_t>>
+        sg_edge_weights{std::nullopt};
+      std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight_view,
+        std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+        false);
+
+      auto d_mg_aggregate_seeds = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{d_mg_seeds.data(), d_mg_seeds.size()});
+
+      if (handle_->get_comms().get_rank() == 0) {
+        auto sg_edge_weights_view =
+          sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt;
+
+        // Generate SG results and compare
+        auto d_sg_centralities = cugraph::edge_betweenness_centrality(
+          *handle_,
+          sg_graph.view(),
+          sg_edge_weights_view,
+          std::make_optional<raft::device_span<vertex_t const>>(raft::device_span<vertex_t const>{
+            d_mg_aggregate_seeds.data(), d_mg_aggregate_seeds.size()}),
+          betweenness_usecase.normalized,
+          do_expensive_check);
+
+        auto [d_sg_src_vertex_ids, d_sg_dst_vertex_ids, d_sg_reference_centralities] =
+          cugraph::test::graph_to_device_coo(
+            *handle_, sg_graph.view(), std::make_optional(d_sg_centralities.view()));
+
+        cugraph::test::edge_betweenness_centrality_validate(*handle_,
+                                                            d_cugraph_src_vertex_ids,
+                                                            d_cugraph_dst_vertex_ids,
+                                                            *d_cugraph_results,
+                                                            d_sg_src_vertex_ids,
+                                                            d_sg_dst_vertex_ids,
+                                                            *d_sg_reference_centralities);
       }
-#endif
     }
   }
 
diff --git a/cpp/tests/community/egonet_validate.cu b/cpp/tests/community/egonet_validate.cu
index 44b74090ec4..5fc94c5c07d 100644
--- a/cpp/tests/community/egonet_validate.cu
+++ b/cpp/tests/community/egonet_validate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,11 +44,16 @@ egonet_reference(
   int radius)
 {
 #if 1
-  auto [d_coo_src, d_coo_dst, d_coo_wgt] =
-    cugraph::decompress_to_edgelist(handle,
-                                    graph_view,
-                                    edge_weight_view,
-                                    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
+  rmm::device_uvector<vertex_t> d_coo_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_coo_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_coo_wgt{std::nullopt};
+
+  std::tie(d_coo_src, d_coo_dst, d_coo_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 #else
   // FIXME: This should be faster (smaller list of edges to operate on), but uniform_nbr_sample
   // doesn't preserve multi-edges (which is probably a bug)
diff --git a/cpp/tests/cores/k_core_validate.cu b/cpp/tests/cores/k_core_validate.cu
index 687349dbbd7..b264ed53540 100644
--- a/cpp/tests/cores/k_core_validate.cu
+++ b/cpp/tests/cores/k_core_validate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,12 +61,17 @@ void check_correctness(
 
   EXPECT_EQ(error_count, 0) << "destination error count is non-zero";
 
-  auto [graph_src, graph_dst, graph_wgt] =
-    cugraph::decompress_to_edgelist(handle,
-                                    graph_view,
-                                    edge_weight_view,
-                                    std::optional<raft::device_span<vertex_t const>>{std::nullopt},
-                                    false);
+  rmm::device_uvector<vertex_t> graph_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> graph_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> graph_wgt{std::nullopt};
+
+  std::tie(graph_src, graph_dst, graph_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+    false);
 
   // Now we'll count how many edges should be in the subgraph
   auto expected_edge_count =
diff --git a/cpp/tests/link_analysis/mg_pagerank_test.cpp b/cpp/tests/link_analysis/mg_pagerank_test.cpp
index b3d9e0271d0..922a6ff2781 100644
--- a/cpp/tests/link_analysis/mg_pagerank_test.cpp
+++ b/cpp/tests/link_analysis/mg_pagerank_test.cpp
@@ -120,30 +120,25 @@ class Tests_MGPageRank
     result_t constexpr alpha{0.85};
     result_t constexpr epsilon{1e-6};
 
-    rmm::device_uvector<result_t> d_mg_pageranks(mg_graph_view.local_vertex_partition_range_size(),
-                                                 handle_->get_stream());
-
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
       handle_->get_comms().barrier();
       hr_timer.start("MG PageRank");
     }
 
-    cugraph::pagerank<vertex_t, edge_t, weight_t>(
+    auto [d_mg_pageranks, metadata] = cugraph::pagerank<vertex_t, edge_t, weight_t>(
       *handle_,
       mg_graph_view,
       mg_edge_weight_view,
       std::nullopt,
       d_mg_personalization_vertices
-        ? std::optional<vertex_t const*>{(*d_mg_personalization_vertices).data()}
-        : std::nullopt,
-      d_mg_personalization_values
-        ? std::optional<result_t const*>{(*d_mg_personalization_values).data()}
-        : std::nullopt,
-      d_mg_personalization_vertices
-        ? std::optional{static_cast<vertex_t>((*d_mg_personalization_vertices).size())}
+        ? std::make_optional(std::make_tuple(
+            raft::device_span<vertex_t const>{d_mg_personalization_vertices->data(),
+                                              d_mg_personalization_vertices->size()},
+            raft::device_span<result_t const>{d_mg_personalization_values->data(),
+                                              d_mg_personalization_values->size()}))
         : std::nullopt,
-      d_mg_pageranks.data(),
+      std::optional<raft::device_span<result_t const>>{std::nullopt},
       alpha,
       epsilon,
       std::numeric_limits<size_t>::max(),
@@ -211,25 +206,19 @@ class Tests_MGPageRank
 
         ASSERT_EQ(mg_graph_view.number_of_vertices(), sg_graph_view.number_of_vertices());
 
-        rmm::device_uvector<result_t> d_sg_pageranks(sg_graph_view.number_of_vertices(),
-                                                     handle_->get_stream());
-
-        cugraph::pagerank<vertex_t, edge_t, weight_t>(
+        auto [d_sg_pageranks, sg_metadata] = cugraph::pagerank<vertex_t, edge_t, weight_t>(
           *handle_,
           sg_graph_view,
           sg_edge_weight_view,
           std::nullopt,
           d_mg_aggregate_personalization_vertices
-            ? std::optional<vertex_t const*>{(*d_mg_aggregate_personalization_vertices).data()}
-            : std::nullopt,
-          d_mg_aggregate_personalization_values
-            ? std::optional<result_t const*>{(*d_mg_aggregate_personalization_values).data()}
-            : std::nullopt,
-          d_mg_aggregate_personalization_vertices
-            ? std::optional<vertex_t>{static_cast<vertex_t>(
-                (*d_mg_aggregate_personalization_vertices).size())}
+            ? std::make_optional(std::make_tuple(
+                raft::device_span<vertex_t const>{d_mg_aggregate_personalization_vertices->data(),
+                                                  d_mg_aggregate_personalization_vertices->size()},
+                raft::device_span<result_t const>{d_mg_aggregate_personalization_values->data(),
+                                                  d_mg_aggregate_personalization_values->size()}))
             : std::nullopt,
-          d_sg_pageranks.data(),
+          std::optional<raft::device_span<result_t const>>{std::nullopt},
           alpha,
           epsilon,
           std::numeric_limits<size_t>::max(),  // max_iterations
diff --git a/cpp/tests/link_analysis/pagerank_test.cpp b/cpp/tests/link_analysis/pagerank_test.cpp
index adb4ea2fa54..0354b69b8a8 100644
--- a/cpp/tests/link_analysis/pagerank_test.cpp
+++ b/cpp/tests/link_analysis/pagerank_test.cpp
@@ -206,30 +206,27 @@ class Tests_PageRank
     result_t constexpr alpha{0.85};
     result_t constexpr epsilon{1e-6};
 
-    rmm::device_uvector<result_t> d_pageranks(graph_view.number_of_vertices(), handle.get_stream());
-
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
       hr_timer.start("PageRank");
     }
 
-    cugraph::pagerank<vertex_t, edge_t, weight_t>(
+    auto [d_pageranks, metadata] = cugraph::pagerank<vertex_t, edge_t, weight_t>(
       handle,
       graph_view,
       edge_weight_view,
       std::nullopt,
       d_personalization_vertices
-        ? std::optional<vertex_t const*>{(*d_personalization_vertices).data()}
+        ? std::make_optional(
+            std::make_tuple(raft::device_span<vertex_t const>{d_personalization_vertices->data(),
+                                                              d_personalization_vertices->size()},
+                            raft::device_span<result_t const>{d_personalization_values->data(),
+                                                              d_personalization_values->size()}))
         : std::nullopt,
-      d_personalization_values ? std::optional<result_t const*>{(*d_personalization_values).data()}
-                               : std::nullopt,
-      d_personalization_vertices ? std::optional<vertex_t>{(*d_personalization_vertices).size()}
-                                 : std::nullopt,
-      d_pageranks.data(),
+      std::optional<raft::device_span<result_t const>>{std::nullopt},
       alpha,
       epsilon,
       std::numeric_limits<size_t>::max(),
-      false,
       false);
 
     if (cugraph::test::g_perf) {
diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu
index bebb21bd720..449aa728d87 100644
--- a/cpp/tests/prims/mg_count_if_e.cu
+++ b/cpp/tests/prims/mg_count_if_e.cu
@@ -33,7 +33,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu
index f90f788cfae..3d745708401 100644
--- a/cpp/tests/prims/mg_count_if_v.cu
+++ b/cpp/tests/prims/mg_count_if_v.cu
@@ -27,7 +27,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu
index 1c85b55e4be..b71fe5ddb5e 100644
--- a/cpp/tests/prims/mg_extract_transform_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_e.cu
@@ -35,7 +35,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
index 3cd6bd243e1..4d9435dd344 100644
--- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu
@@ -34,7 +34,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
index 97d52c04114..677d6ce5022 100644
--- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
+++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu
@@ -35,7 +35,7 @@
 #include <cugraph/utilities/high_res_timer.hpp>
 #include <cugraph/utilities/thrust_tuple_utils.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu
index 7080eb12da6..b6f8da48ef4 100644
--- a/cpp/tests/prims/mg_reduce_v.cu
+++ b/cpp/tests/prims/mg_reduce_v.cu
@@ -30,7 +30,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu
index 47def15fffc..127eddd43c7 100644
--- a/cpp/tests/prims/mg_transform_e.cu
+++ b/cpp/tests/prims/mg_transform_e.cu
@@ -34,7 +34,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
@@ -117,10 +117,11 @@ class Tests_MGTransformE
     {
       rmm::device_uvector<vertex_t> srcs(0, handle_->get_stream());
       rmm::device_uvector<vertex_t> dsts(0, handle_->get_stream());
-      std::tie(srcs, dsts, std::ignore) = cugraph::decompress_to_edgelist(
+      std::tie(srcs, dsts, std::ignore, std::ignore) = cugraph::decompress_to_edgelist(
         *handle_,
         mg_graph_view,
         std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>>{std::nullopt},
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         std::optional<raft::device_span<vertex_t const>>{std::nullopt});
       auto edge_first = thrust::make_zip_iterator(
         thrust::make_tuple(store_transposed ? dsts.begin() : srcs.begin(),
diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu
index 8dba488f23d..79aa3da54df 100644
--- a/cpp/tests/prims/mg_transform_reduce_e.cu
+++ b/cpp/tests/prims/mg_transform_reduce_e.cu
@@ -33,7 +33,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu
index 3ea7636a718..c9fc138ae1b 100644
--- a/cpp/tests/prims/mg_transform_reduce_v.cu
+++ b/cpp/tests/prims/mg_transform_reduce_v.cu
@@ -29,7 +29,7 @@
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
index 2eb270973f2..d0b97065da7 100644
--- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
+++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu
@@ -34,7 +34,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/high_res_timer.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/comms/mpi_comms.hpp>
 #include <raft/core/comms.hpp>
diff --git a/cpp/tests/prims/property_generator.cuh b/cpp/tests/prims/property_generator.cuh
index 24a21c1cb01..e7264cd276f 100644
--- a/cpp/tests/prims/property_generator.cuh
+++ b/cpp/tests/prims/property_generator.cuh
@@ -21,7 +21,7 @@
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/thrust_tuple_utils.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/core/handle.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
index 00c14009e86..8221073f556 100644
--- a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
+++ b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh
@@ -31,7 +31,7 @@
 #include <cugraph/utilities/high_res_timer.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 
-#include <cuco/detail/hash_functions.cuh>
+#include <cuco/hash_functions.cuh>
 
 #include <raft/core/handle.hpp>
 
diff --git a/cpp/tests/sampling/random_walks_check.cuh b/cpp/tests/sampling/random_walks_check.cuh
index 4cd74f01bcb..f73891a1537 100644
--- a/cpp/tests/sampling/random_walks_check.cuh
+++ b/cpp/tests/sampling/random_walks_check.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,11 +37,16 @@ void random_walks_validate(
   std::optional<rmm::device_uvector<weight_t>>&& d_weights,
   size_t max_length)
 {
-  auto [d_src, d_dst, d_wgt] =
-    cugraph::decompress_to_edgelist(handle,
-                                    graph_view,
-                                    edge_weight_view,
-                                    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
+  rmm::device_uvector<vertex_t> d_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_wgt{std::nullopt};
+
+  std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
   if constexpr (multi_gpu) {
     d_src = cugraph::test::device_gatherv(
diff --git a/cpp/tests/structure/mg_symmetrize_test.cpp b/cpp/tests/structure/mg_symmetrize_test.cpp
index cdacff91403..f2d37170f76 100644
--- a/cpp/tests/structure/mg_symmetrize_test.cpp
+++ b/cpp/tests/structure/mg_symmetrize_test.cpp
@@ -123,11 +123,15 @@ class Tests_MGSymmetrize
 
     if (symmetrize_usecase.check_correctness) {
       // 4-1. decompress MG results
+      rmm::device_uvector<vertex_t> d_mg_srcs(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> d_mg_dsts(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_mg_weights{std::nullopt};
 
-      auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist(
+      std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist(
         *handle_,
         mg_graph.view(),
         mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         mg_renumber_map ? std::make_optional<raft::device_span<vertex_t const>>(
                             (*mg_renumber_map).data(), (*mg_renumber_map).size())
                         : std::nullopt);
@@ -157,11 +161,15 @@ class Tests_MGSymmetrize
         ASSERT_FALSE(d_sg_renumber_map_labels.has_value());
 
         // 4-4. decompress SG results
+        rmm::device_uvector<vertex_t> d_sg_srcs(0, handle_->get_stream());
+        rmm::device_uvector<vertex_t> d_sg_dsts(0, handle_->get_stream());
+        std::optional<rmm::device_uvector<weight_t>> d_sg_weights{std::nullopt};
 
-        auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist(
+        std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist(
           *handle_,
           sg_graph.view(),
           sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
           std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
         // 4-5. compare
diff --git a/cpp/tests/structure/mg_transpose_storage_test.cpp b/cpp/tests/structure/mg_transpose_storage_test.cpp
index b6033a7ab53..1adce8d102e 100644
--- a/cpp/tests/structure/mg_transpose_storage_test.cpp
+++ b/cpp/tests/structure/mg_transpose_storage_test.cpp
@@ -131,13 +131,17 @@ class Tests_MGTransposeStorage
 
     if (transpose_storage_usecase.check_correctness) {
       // 3-1. decompress MG results
+      rmm::device_uvector<vertex_t> d_mg_srcs(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> d_mg_dsts(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_mg_weights{std::nullopt};
 
-      auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist(
+      std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist(
         *handle_,
         mg_storage_transposed_graph.view(),
         mg_storage_transposed_edge_weights
           ? std::make_optional((*mg_storage_transposed_edge_weights).view())
           : std::nullopt,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         mg_renumber_map ? std::make_optional<raft::device_span<vertex_t const>>(
                             (*mg_renumber_map).data(), (*mg_renumber_map).size())
                         : std::nullopt);
@@ -156,11 +160,15 @@ class Tests_MGTransposeStorage
 
       if (handle_->get_comms().get_rank() == int{0}) {
         // 3-3. decompress SG results
+        rmm::device_uvector<vertex_t> d_sg_srcs(0, handle_->get_stream());
+        rmm::device_uvector<vertex_t> d_sg_dsts(0, handle_->get_stream());
+        std::optional<rmm::device_uvector<weight_t>> d_sg_weights{std::nullopt};
 
-        auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist(
+        std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist(
           *handle_,
           sg_graph.view(),
           sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
           std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
         // 3-4. compare
diff --git a/cpp/tests/structure/mg_transpose_test.cpp b/cpp/tests/structure/mg_transpose_test.cpp
index 3558e0f7d97..03a31e14ca9 100644
--- a/cpp/tests/structure/mg_transpose_test.cpp
+++ b/cpp/tests/structure/mg_transpose_test.cpp
@@ -121,11 +121,15 @@ class Tests_MGTranspose
 
     if (transpose_usecase.check_correctness) {
       // 4-1. decompress MG results
+      rmm::device_uvector<vertex_t> d_mg_srcs(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> d_mg_dsts(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_mg_weights{std::nullopt};
 
-      auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist(
+      std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist(
         *handle_,
         mg_graph.view(),
         mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
         mg_renumber_map ? std::make_optional<raft::device_span<vertex_t const>>(
                             (*mg_renumber_map).data(), (*mg_renumber_map).size())
                         : std::nullopt);
@@ -152,11 +156,15 @@ class Tests_MGTranspose
                                    std::optional<rmm::device_uvector<vertex_t>>{std::nullopt});
 
         // 4-4. decompress SG results
+        rmm::device_uvector<vertex_t> d_sg_srcs(0, handle_->get_stream());
+        rmm::device_uvector<vertex_t> d_sg_dsts(0, handle_->get_stream());
+        std::optional<rmm::device_uvector<weight_t>> d_sg_weights{std::nullopt};
 
-        auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist(
+        std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist(
           *handle_,
           sg_graph.view(),
           sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
           std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
         // 4-5. compare
diff --git a/cpp/tests/structure/symmetrize_test.cpp b/cpp/tests/structure/symmetrize_test.cpp
index 9673b29e389..89ff9ed139a 100644
--- a/cpp/tests/structure/symmetrize_test.cpp
+++ b/cpp/tests/structure/symmetrize_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -211,14 +211,18 @@ class Tests_Symmetrize
     rmm::device_uvector<vertex_t> d_org_srcs(0, handle.get_stream());
     rmm::device_uvector<vertex_t> d_org_dsts(0, handle.get_stream());
     std::optional<rmm::device_uvector<weight_t>> d_org_weights{std::nullopt};
+
     if (symmetrize_usecase.check_correctness) {
-      std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist(
-        handle,
-        graph.view(),
-        edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
-        d_renumber_map_labels ? std::make_optional<raft::device_span<vertex_t const>>(
-                                  (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size())
-                              : std::nullopt);
+      std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) =
+        cugraph::decompress_to_edgelist(
+          handle,
+          graph.view(),
+          edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+          d_renumber_map_labels
+            ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
+                                                                    (*d_renumber_map_labels).size())
+            : std::nullopt);
     }
 
     if (cugraph::test::g_perf) {
@@ -240,13 +244,20 @@ class Tests_Symmetrize
     }
 
     if (symmetrize_usecase.check_correctness) {
-      auto [d_symm_srcs, d_symm_dsts, d_symm_weights] = cugraph::decompress_to_edgelist(
-        handle,
-        graph.view(),
-        edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
-        d_renumber_map_labels ? std::make_optional<raft::device_span<vertex_t const>>(
-                                  (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size())
-                              : std::nullopt);
+      rmm::device_uvector<vertex_t> d_symm_srcs(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> d_symm_dsts(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_symm_weights{std::nullopt};
+
+      std::tie(d_symm_srcs, d_symm_dsts, d_symm_weights, std::ignore) =
+        cugraph::decompress_to_edgelist(
+          handle,
+          graph.view(),
+          edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+          d_renumber_map_labels
+            ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
+                                                                    (*d_renumber_map_labels).size())
+            : std::nullopt);
 
       auto h_org_srcs    = cugraph::test::to_host(handle, d_org_srcs);
       auto h_org_dsts    = cugraph::test::to_host(handle, d_org_dsts);
diff --git a/cpp/tests/structure/transpose_storage_test.cpp b/cpp/tests/structure/transpose_storage_test.cpp
index 8c94e62d68b..a713abf7dae 100644
--- a/cpp/tests/structure/transpose_storage_test.cpp
+++ b/cpp/tests/structure/transpose_storage_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,13 +77,16 @@ class Tests_TransposeStorage
     rmm::device_uvector<vertex_t> d_org_dsts(0, handle.get_stream());
     std::optional<rmm::device_uvector<weight_t>> d_org_weights{std::nullopt};
     if (transpose_storage_usecase.check_correctness) {
-      std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist(
-        handle,
-        graph.view(),
-        edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
-        d_renumber_map_labels ? std::make_optional<raft::device_span<vertex_t const>>(
-                                  (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size())
-                              : std::nullopt);
+      std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) =
+        cugraph::decompress_to_edgelist(
+          handle,
+          graph.view(),
+          edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+          d_renumber_map_labels
+            ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
+                                                                    (*d_renumber_map_labels).size())
+            : std::nullopt);
     }
 
     if (cugraph::test::g_perf) {
@@ -107,13 +110,21 @@ class Tests_TransposeStorage
     }
 
     if (transpose_storage_usecase.check_correctness) {
-      auto [d_storage_transposed_srcs, d_storage_transposed_dsts, d_storage_transposed_weights] =
+      rmm::device_uvector<vertex_t> d_storage_transposed_srcs(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> d_storage_transposed_dsts(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_storage_transposed_weights{std::nullopt};
+
+      std::tie(d_storage_transposed_srcs,
+               d_storage_transposed_dsts,
+               d_storage_transposed_weights,
+               std::ignore) =
         cugraph::decompress_to_edgelist(
           handle,
           storage_transposed_graph.view(),
           storage_transposed_edge_weights
             ? std::make_optional((*storage_transposed_edge_weights).view())
             : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
           d_renumber_map_labels
             ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
                                                                     (*d_renumber_map_labels).size())
diff --git a/cpp/tests/structure/transpose_test.cpp b/cpp/tests/structure/transpose_test.cpp
index 39ae7d7635e..1cbefa21fcc 100644
--- a/cpp/tests/structure/transpose_test.cpp
+++ b/cpp/tests/structure/transpose_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,13 +77,16 @@ class Tests_Transpose
     rmm::device_uvector<vertex_t> d_org_dsts(0, handle.get_stream());
     std::optional<rmm::device_uvector<weight_t>> d_org_weights{std::nullopt};
     if (transpose_usecase.check_correctness) {
-      std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist(
-        handle,
-        graph.view(),
-        edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
-        d_renumber_map_labels ? std::make_optional<raft::device_span<vertex_t const>>(
-                                  (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size())
-                              : std::nullopt);
+      std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) =
+        cugraph::decompress_to_edgelist(
+          handle,
+          graph.view(),
+          edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+          d_renumber_map_labels
+            ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
+                                                                    (*d_renumber_map_labels).size())
+            : std::nullopt);
     }
 
     if (cugraph::test::g_perf) {
@@ -101,11 +104,16 @@ class Tests_Transpose
     }
 
     if (transpose_usecase.check_correctness) {
-      auto [d_transposed_srcs, d_transposed_dsts, d_transposed_weights] =
+      rmm::device_uvector<vertex_t> d_transposed_srcs(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> d_transposed_dsts(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> d_transposed_weights{std::nullopt};
+
+      std::tie(d_transposed_srcs, d_transposed_dsts, d_transposed_weights, std::ignore) =
         cugraph::decompress_to_edgelist(
           handle,
           graph.view(),
           edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt,
+          std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
           d_renumber_map_labels
             ? std::make_optional<raft::device_span<vertex_t const>>((*d_renumber_map_labels).data(),
                                                                     (*d_renumber_map_labels).size())
diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp
index 615522a863b..1fa869ac2df 100644
--- a/cpp/tests/utilities/test_utilities.hpp
+++ b/cpp/tests/utilities/test_utilities.hpp
@@ -480,6 +480,20 @@ graph_to_host_coo(
   cugraph::graph_view_t<vertex_t, edge_t, store_transposed, is_multi_gpu> const& graph_view,
   std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view);
 
+// If multi-GPU, only the rank 0 GPU holds the valid data
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool is_multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<vertex_t, edge_t, store_transposed, is_multi_gpu> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view);
+
 // If multi-GPU, only the rank 0 GPU holds the valid data
 template <typename vertex_t,
           typename edge_t,
diff --git a/cpp/tests/utilities/test_utilities_impl.cuh b/cpp/tests/utilities/test_utilities_impl.cuh
index ed4302b4156..3025ca7908b 100644
--- a/cpp/tests/utilities/test_utilities_impl.cuh
+++ b/cpp/tests/utilities/test_utilities_impl.cuh
@@ -48,11 +48,16 @@ graph_to_host_coo(
   cugraph::graph_view_t<vertex_t, edge_t, store_transposed, is_multi_gpu> const& graph_view,
   std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view)
 {
-  auto [d_src, d_dst, d_wgt] =
-    cugraph::decompress_to_edgelist(handle,
-                                    graph_view,
-                                    edge_weight_view,
-                                    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
+  rmm::device_uvector<vertex_t> d_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_wgt{std::nullopt};
+
+  std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
   if constexpr (is_multi_gpu) {
     d_src = cugraph::test::device_gatherv(
@@ -89,6 +94,53 @@ graph_to_host_coo(
   return std::make_tuple(std::move(h_src), std::move(h_dst), std::move(h_wgt));
 }
 
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool is_multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<vertex_t, edge_t, store_transposed, is_multi_gpu> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view)
+{
+  rmm::device_uvector<vertex_t> d_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_wgt{std::nullopt};
+
+  std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
+
+  if constexpr (is_multi_gpu) {
+    d_src = cugraph::test::device_gatherv(
+      handle, raft::device_span<vertex_t const>{d_src.data(), d_src.size()});
+    d_dst = cugraph::test::device_gatherv(
+      handle, raft::device_span<vertex_t const>{d_dst.data(), d_dst.size()});
+    if (d_wgt)
+      *d_wgt = cugraph::test::device_gatherv(
+        handle, raft::device_span<weight_t const>{d_wgt->data(), d_wgt->size()});
+    if (handle.get_comms().get_rank() != 0) {
+      d_src.resize(0, handle.get_stream());
+      d_src.shrink_to_fit(handle.get_stream());
+      d_dst.resize(0, handle.get_stream());
+      d_dst.shrink_to_fit(handle.get_stream());
+      if (d_wgt) {
+        (*d_wgt).resize(0, handle.get_stream());
+        (*d_wgt).shrink_to_fit(handle.get_stream());
+      }
+    }
+  }
+
+  return std::make_tuple(std::move(d_src), std::move(d_dst), std::move(d_wgt));
+}
+
 template <typename vertex_t,
           typename edge_t,
           typename weight_t,
@@ -100,11 +152,16 @@ graph_to_host_csr(
   cugraph::graph_view_t<vertex_t, edge_t, store_transposed, is_multi_gpu> const& graph_view,
   std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view)
 {
-  auto [d_src, d_dst, d_wgt] =
-    cugraph::decompress_to_edgelist(handle,
-                                    graph_view,
-                                    edge_weight_view,
-                                    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
+  rmm::device_uvector<vertex_t> d_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_wgt{std::nullopt};
+
+  std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    std::optional<raft::device_span<vertex_t const>>{std::nullopt});
 
   if constexpr (is_multi_gpu) {
     d_src = cugraph::test::device_gatherv(
@@ -184,8 +241,16 @@ mg_graph_to_sg_graph(
   std::optional<raft::device_span<vertex_t const>> number_map,
   bool renumber)
 {
-  auto [d_src, d_dst, d_wgt] =
-    cugraph::decompress_to_edgelist(handle, graph_view, edge_weight_view, number_map);
+  rmm::device_uvector<vertex_t> d_src(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_dst(0, handle.get_stream());
+  std::optional<rmm::device_uvector<weight_t>> d_wgt{std::nullopt};
+
+  std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist(
+    handle,
+    graph_view,
+    edge_weight_view,
+    std::optional<edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+    number_map);
 
   d_src = cugraph::test::device_gatherv(
     handle, raft::device_span<vertex_t const>{d_src.data(), d_src.size()});
diff --git a/cpp/tests/utilities/test_utilities_mg.cu b/cpp/tests/utilities/test_utilities_mg.cu
index b572f7df23a..7366a8376a4 100644
--- a/cpp/tests/utilities/test_utilities_mg.cu
+++ b/cpp/tests/utilities/test_utilities_mg.cu
@@ -90,6 +90,102 @@ graph_to_host_coo(
   cugraph::graph_view_t<int64_t, int64_t, true, true> const& graph_view,
   std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
 
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, true, true> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
 template std::tuple<std::vector<int32_t>, std::vector<int32_t>, std::optional<std::vector<float>>>
 graph_to_host_csr(
   raft::handle_t const& handle,
diff --git a/cpp/tests/utilities/test_utilities_sg.cu b/cpp/tests/utilities/test_utilities_sg.cu
index a5a4fecb4e5..aceff526f21 100644
--- a/cpp/tests/utilities/test_utilities_sg.cu
+++ b/cpp/tests/utilities/test_utilities_sg.cu
@@ -90,6 +90,102 @@ graph_to_host_coo(
   cugraph::graph_view_t<int64_t, int64_t, true, false> const& graph_view,
   std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
 
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, float const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int32_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int32_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int32_t, int64_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>>
+graph_to_device_coo(
+  raft::handle_t const& handle,
+  cugraph::graph_view_t<int64_t, int64_t, true, false> const& graph_view,
+  std::optional<cugraph::edge_property_view_t<int64_t, double const*>> edge_weight_view);
+
 template std::tuple<std::vector<int32_t>, std::vector<int32_t>, std::optional<std::vector<float>>>
 graph_to_host_csr(
   raft::handle_t const& handle,
diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py
index 394acf0e950..b64901772dc 100644
--- a/docs/cugraph/source/conf.py
+++ b/docs/cugraph/source/conf.py
@@ -204,6 +204,5 @@ def setup(app):
 
 # The following is used by sphinx.ext.linkcode to provide links to github
 linkcode_resolve = make_linkcode_resolve(
-    'cugraph', 'https://github.com/rapidsai/'
-    'cugraph/blob/{revision}/python/'
-    '{package}/{path}#L{lineno}')
+    "https://github.com/rapidsai/cugraph/blob/{revision}/python/{path}#L{lineno}"
+)
\ No newline at end of file
diff --git a/docs/cugraph/source/sphinxext/github_link.py b/docs/cugraph/source/sphinxext/github_link.py
index fa8fe3f5fe3..cc28dc6e897 100644
--- a/docs/cugraph/source/sphinxext/github_link.py
+++ b/docs/cugraph/source/sphinxext/github_link.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,10 +16,8 @@
 # license in /thirdparty/LICENSES/LICENSE.scikit_learn
 
 import inspect
-import os
 import re
 import subprocess
-import sys
 from functools import partial
 from operator import attrgetter
 
@@ -56,7 +54,7 @@ def _get_git_revision():
     return revision.decode('utf-8')
 
 
-def _linkcode_resolve(domain, info, package, url_fmt, revision):
+def _linkcode_resolve(domain, info, url_fmt, revision):
     """Determine a link to online source for a class/method/function
 
     This is called by sphinx.ext.linkcode
@@ -73,7 +71,7 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
 
     if revision is None:
         return
-    if domain not in ('py', 'pyx'):
+    if domain != 'py':
         return
     if not info.get('module') or not info.get('fullname'):
         return
@@ -89,41 +87,29 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
     fn: str = None
     lineno: str = None
 
-    try:
-        fn = inspect.getsourcefile(obj)
-    except Exception:
-        fn = None
-    if not fn:
-        try:
-            fn = inspect.getsourcefile(sys.modules[obj.__module__])
-        except Exception:
-            fn = None
-
-    if not fn:
-        # Possibly Cython code. Search docstring for source
-        m = source_regex.search(obj.__doc__)
-
-        if (m is not None):
-            source_file = m.group(1)
-            lineno = m.group(2)
-
-            # fn is expected to be the absolute path.
-            fn = os.path.relpath(source_file, start=package)
-            print("{}:{}".format(
-                os.path.abspath(os.path.join("..", "python", "cuml", fn)),
-                lineno))
-        else:
-            return
-    else:
-        # Test if we are absolute or not (pyx are relative)
-        if (not os.path.isabs(fn)):
-            # Should be relative to docs right now
-            fn = os.path.abspath(os.path.join("..", "python", fn))
-
-        # Convert to relative from module root
-        fn = os.path.relpath(fn,
-                             start=os.path.dirname(
-                                 __import__(package).__file__))
+    obj_module = inspect.getmodule(obj)
+    if not obj_module:
+        print(f"could not infer source code link for: {info}")
+        return
+    module_name = obj_module.__name__.split('.')[0]
+
+    module_dir_dict = {
+        "cugraph_dgl": "cugraph-dgl",
+        "cugraph_pyg": "cugraph-pyg",
+        "cugraph_service_client": "cugraph-service/client",
+        "cugraph_service_server": "cugraph-service/server",
+        "cugraph": "cugraph",
+        "pylibcugraph": "pylibcugraph",
+    }
+    module_dir = module_dir_dict.get(module_name)
+    if not module_dir:
+        print(f"no source path directory set for {module_name}")
+        return
+
+    obj_path = "/".join(obj_module.__name__.split(".")[1:])
+    obj_file_ext = obj_module.__file__.split('.')[-1]
+    source_ext = "pyx" if obj_file_ext == "so" else "py"
+    fn = f"{module_dir}/{module_name}/{obj_path}.{source_ext}"
 
     # Get the line number if we need it. (Can work without it)
     if (lineno is None):
@@ -137,18 +123,15 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision):
             else:
                 lineno = ''
     return url_fmt.format(revision=revision,
-                          package=package,
                           path=fn,
                           lineno=lineno)
 
 
-def make_linkcode_resolve(package, url_fmt):
+def make_linkcode_resolve(url_fmt):
     """Returns a linkcode_resolve function for the given URL format
 
     revision is a git commit reference (hash or name)
 
-    package is the name of the root module of the package
-
     url_fmt is along the lines of ('https://github.com/USER/PROJECT/'
                                    'blob/{revision}/{package}/'
                                    '{path}#L{lineno}')
@@ -156,5 +139,4 @@ def make_linkcode_resolve(package, url_fmt):
     revision = _get_git_revision()
     return partial(_linkcode_resolve,
                    revision=revision,
-                   package=package,
                    url_fmt=url_fmt)
diff --git a/mg_utils/run-dask-process.sh b/mg_utils/run-dask-process.sh
index e5fa8fab332..b88abb685ec 100755
--- a/mg_utils/run-dask-process.sh
+++ b/mg_utils/run-dask-process.sh
@@ -102,6 +102,7 @@ function buildTcpArgs {
                 "
 
     WORKER_ARGS="--rmm-pool-size=$WORKER_RMM_POOL_SIZE
+             --rmm-async
              --local-directory=/tmp/$LOGNAME
              --scheduler-file=$SCHEDULER_FILE
              --memory-limit=$DASK_HOST_MEMORY_LIMIT
diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
index e70f2d0c6d1..7825febc24b 100644
--- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
+++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py
@@ -19,8 +19,8 @@
 from cugraph_dgl.nn.conv.base import BaseConv
 from cugraph.utilities.utils import import_optional
 
-from pylibcugraphops.pytorch import BipartiteCSC, SampledCSC, StaticCSC
-from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite
+from pylibcugraphops.pytorch import CSC
+from pylibcugraphops.pytorch.operators import mha_gat_n2n
 
 dgl = import_optional("dgl")
 torch = import_optional("torch")
@@ -173,9 +173,20 @@ def forward(
             :math:`H` is the number of heads, and :math:`D_{out}` is size of
             output feature.
         """
+        if max_in_degree is None:
+            max_in_degree = -1
+
         bipartite = not isinstance(nfeat, torch.Tensor)
         offsets, indices, _ = g.adj_tensors("csc")
 
+        graph = CSC(
+            offsets=offsets,
+            indices=indices,
+            num_src_nodes=g.num_src_nodes(),
+            dst_max_in_degree=max_in_degree,
+            is_bipartite=bipartite,
+        )
+
         if efeat is not None:
             if self.fc_edge is None:
                 raise RuntimeError(
@@ -191,23 +202,8 @@ def forward(
                     f"integers to allow bipartite node features, but got "
                     f"{self.in_feats}."
                 )
-            _graph = BipartiteCSC(
-                offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes()
-            )
             nfeat_src = self.fc_src(nfeat[0])
             nfeat_dst = self.fc_dst(nfeat[1])
-
-            out = mha_gat_n2n_bipartite(
-                src_feat=nfeat_src,
-                dst_feat=nfeat_dst,
-                attn_weights=self.attn_weights,
-                graph=_graph,
-                num_heads=self.num_heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=efeat,
-            )
         else:
             if not hasattr(self, "fc"):
                 raise RuntimeError(
@@ -215,36 +211,17 @@ def forward(
                     f"integer, but got {self.in_feats}."
                 )
             nfeat = self.fc(nfeat)
-            # Sampled primitive does not support edge features
-            if g.is_block and efeat is None:
-                if max_in_degree is None:
-                    max_in_degree = g.in_degrees().max().item()
-
-                if max_in_degree < self.MAX_IN_DEGREE_MFG:
-                    _graph = SampledCSC(
-                        offsets=offsets,
-                        indices=indices,
-                        max_num_neighbors=max_in_degree,
-                        num_src_nodes=g.num_src_nodes(),
-                    )
-                else:
-                    offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-                    _graph = StaticCSC(offsets=offsets, indices=indices)
-            else:
-                if g.is_block:
-                    offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-                _graph = StaticCSC(offsets=offsets, indices=indices)
-
-            out = mha_gat_n2n(
-                feat=nfeat,
-                attn_weights=self.attn_weights,
-                graph=_graph,
-                num_heads=self.num_heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=efeat,
-            )[: g.num_dst_nodes()]
+
+        out = mha_gat_n2n(
+            (nfeat_src, nfeat_dst) if bipartite else nfeat,
+            self.attn_weights,
+            graph,
+            num_heads=self.num_heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=efeat,
+        )[: g.num_dst_nodes()]
 
         if self.concat:
             out = out.view(-1, self.num_heads, self.out_feats)
diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
index 1898f5159b1..141adc86069 100644
--- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
+++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py
@@ -15,7 +15,7 @@
 from cugraph_dgl.nn.conv.base import BaseConv
 from cugraph.utilities.utils import import_optional
 
-from pylibcugraphops.pytorch import BipartiteCSC, StaticCSC
+from pylibcugraphops.pytorch import CSC
 from pylibcugraphops.pytorch.operators import mha_simple_n2n
 
 dgl = import_optional("dgl")
@@ -132,31 +132,34 @@ def forward(
         efeat: torch.Tensor, optional
             Edge feature tensor. Default: ``None``.
         """
-        bipartite = not isinstance(nfeat, torch.Tensor)
         offsets, indices, _ = g.adj_tensors("csc")
-
-        if bipartite:
-            src_feats, dst_feats = nfeat
-            _graph = BipartiteCSC(
-                offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes()
-            )
-        else:
-            src_feats = dst_feats = nfeat
-            if g.is_block:
-                offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1)
-            _graph = StaticCSC(offsets=offsets, indices=indices)
-
-        query = self.lin_query(dst_feats)
-        key = self.lin_key(src_feats)
-        value = self.lin_value(src_feats)
-        if self.lin_edge is not None:
+        graph = CSC(
+            offsets=offsets,
+            indices=indices,
+            num_src_nodes=g.num_src_nodes(),
+            is_bipartite=True,
+        )
+
+        if isinstance(nfeat, torch.Tensor):
+            nfeat = (nfeat, nfeat)
+
+        query = self.lin_query(nfeat[1][: g.num_dst_nodes()])
+        key = self.lin_key(nfeat[0])
+        value = self.lin_value(nfeat[0])
+
+        if efeat is not None:
+            if self.lin_edge is None:
+                raise RuntimeError(
+                    f"{self.__class__.__name__}.edge_feats must be set to allow "
+                    f"edge features."
+                )
             efeat = self.lin_edge(efeat)
 
         out = mha_simple_n2n(
             key_emb=key,
             query_emb=query,
             value_emb=value,
-            graph=_graph,
+            graph=graph,
             num_heads=self.num_heads,
             concat_heads=self.concat,
             edge_emb=efeat,
@@ -165,7 +168,7 @@ def forward(
         )[: g.num_dst_nodes()]
 
         if self.root_weight:
-            res = self.lin_skip(dst_feats[: g.num_dst_nodes()])
+            res = self.lin_skip(nfeat[1][: g.num_dst_nodes()])
             if self.lin_beta is not None:
                 beta = self.lin_beta(torch.cat([out, res, out - res], dim=-1))
                 beta = beta.sigmoid()
diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py
index 64af795231c..00476b9f0bb 100644
--- a/python/cugraph-dgl/tests/nn/test_transformerconv.py
+++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py
@@ -26,14 +26,14 @@
 
 
 @pytest.mark.parametrize("beta", [False, True])
-@pytest.mark.parametrize("bipartite", [False, True])
+@pytest.mark.parametrize("bipartite_node_feats", [False, True])
 @pytest.mark.parametrize("concat", [False, True])
 @pytest.mark.parametrize("idtype_int", [False, True])
 @pytest.mark.parametrize("num_heads", [1, 2, 3, 4])
 @pytest.mark.parametrize("to_block", [False, True])
 @pytest.mark.parametrize("use_edge_feats", [False, True])
 def test_TransformerConv(
-    beta, bipartite, concat, idtype_int, num_heads, to_block, use_edge_feats
+    beta, bipartite_node_feats, concat, idtype_int, num_heads, to_block, use_edge_feats
 ):
     device = "cuda"
     g = create_graph1().to(device)
@@ -44,7 +44,7 @@ def test_TransformerConv(
     if to_block:
         g = dgl.to_block(g)
 
-    if bipartite:
+    if bipartite_node_feats:
         in_node_feats = (5, 3)
         nfeat = (
             torch.rand(g.num_src_nodes(), in_node_feats[0], device=device),
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
index bec50792131..207efcdace4 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Any, Optional, Tuple, Union
+from typing import Optional, Tuple, Union
 
 from cugraph.utilities.utils import import_optional
 
@@ -20,13 +20,7 @@
 torch_geometric = import_optional("torch_geometric")
 
 try:  # pragma: no cover
-    from pylibcugraphops.pytorch import (
-        BipartiteCSC,
-        SampledCSC,
-        SampledHeteroCSC,
-        StaticCSC,
-        StaticHeteroCSC,
-    )
+    from pylibcugraphops.pytorch import CSC, HeteroCSC
 
     HAS_PYLIBCUGRAPHOPS = True
 except ImportError:
@@ -94,7 +88,7 @@ def get_cugraph(
         csc: Tuple[torch.Tensor, torch.Tensor, int],
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> Any:
+    ) -> CSC:
         r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation.
         Supports both bipartite and non-bipartite graphs.
 
@@ -119,16 +113,16 @@ def get_cugraph(
                 f"based processing (got CPU tensor)"
             )
 
-        if bipartite:
-            return BipartiteCSC(colptr, row, num_src_nodes)
+        if max_num_neighbors is None:
+            max_num_neighbors = -1
 
-        if num_src_nodes != colptr.numel() - 1:
-            if max_num_neighbors is None:
-                max_num_neighbors = int((colptr[1:] - colptr[:-1]).max())
-
-            return SampledCSC(colptr, row, max_num_neighbors, num_src_nodes)
-
-        return StaticCSC(colptr, row)
+        return CSC(
+            offsets=colptr,
+            indices=row,
+            num_src_nodes=num_src_nodes,
+            dst_max_in_degree=max_num_neighbors,
+            is_bipartite=bipartite,
+        )
 
     def get_typed_cugraph(
         self,
@@ -137,7 +131,7 @@ def get_typed_cugraph(
         num_edge_types: Optional[int] = None,
         bipartite: bool = False,
         max_num_neighbors: Optional[int] = None,
-    ) -> Any:
+    ) -> HeteroCSC:
         r"""Constructs a typed :obj:`cugraph` graph object from a CSC
         representation where each edge corresponds to a given edge type.
         Supports both bipartite and non-bipartite graphs.
@@ -162,21 +156,21 @@ def get_typed_cugraph(
         if num_edge_types is None:
             num_edge_types = int(edge_type.max()) + 1
 
+        if max_num_neighbors is None:
+            max_num_neighbors = -1
+
         row, colptr, num_src_nodes = csc
         edge_type = edge_type.int()
 
-        if bipartite:
-            raise NotImplementedError
-
-        if num_src_nodes != colptr.numel() - 1:
-            if max_num_neighbors is None:
-                max_num_neighbors = int((colptr[1:] - colptr[:-1]).max())
-
-            return SampledHeteroCSC(
-                colptr, row, edge_type, max_num_neighbors, num_src_nodes, num_edge_types
-            )
-
-        return StaticHeteroCSC(colptr, row, edge_type, num_edge_types)
+        return HeteroCSC(
+            offsets=colptr,
+            indices=row,
+            edge_types=edge_type,
+            num_src_nodes=num_src_nodes,
+            num_edge_types=num_edge_types,
+            dst_max_in_degree=max_num_neighbors,
+            is_bipartite=bipartite,
+        )
 
     def forward(
         self,
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
index 4bf37cf3e72..23b7d50ba96 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite
+from pylibcugraphops.pytorch.operators import mha_gat_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -203,19 +203,6 @@ def forward(
                 )
             x_src = self.lin_src(x[0])
             x_dst = self.lin_dst(x[1])
-
-            out = mha_gat_n2n_bipartite(
-                x_src,
-                x_dst,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
-
         else:
             if not hasattr(self, "lin"):
                 raise RuntimeError(
@@ -224,16 +211,16 @@ def forward(
                 )
             x = self.lin(x)
 
-            out = mha_gat_n2n(
-                x,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
+        out = mha_gat_n2n(
+            (x_src, x_dst) if bipartite else x,
+            self.att,
+            graph,
+            num_heads=self.heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=edge_attr,
+        )
 
         if self.bias is not None:
             out = out + self.bias
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
index 66d962b3f86..d4c947b952a 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n, mha_gat_v2_n2n_bipartite
+from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -187,8 +187,8 @@ def forward(
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
         """
-        bipartite = not isinstance(x, torch.Tensor)
-        graph = self.get_cugraph(csc, bipartite=bipartite or not self.share_weights)
+        bipartite = not isinstance(x, torch.Tensor) or not self.share_weights
+        graph = self.get_cugraph(csc, bipartite=bipartite)
 
         if edge_attr is not None:
             if self.lin_edge is None:
@@ -200,38 +200,24 @@ def forward(
                 edge_attr = edge_attr.view(-1, 1)
             edge_attr = self.lin_edge(edge_attr)
 
-        if not bipartite and self.share_weights:
+        if bipartite:
+            if isinstance(x, torch.Tensor):
+                x = (x, x)
+            x_src = self.lin_src(x[0])
+            x_dst = self.lin_dst(x[1])
+        else:
             x = self.lin_src(x)
 
-            out = mha_gat_v2_n2n(
-                x,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
-        else:
-            if bipartite:
-                x_src = self.lin_src(x[0])
-                x_dst = self.lin_dst(x[1])
-            else:
-                x_src = self.lin_src(x)
-                x_dst = self.lin_dst(x)
-
-            out = mha_gat_v2_n2n_bipartite(
-                x_src,
-                x_dst,
-                self.att,
-                graph,
-                num_heads=self.heads,
-                activation="LeakyReLU",
-                negative_slope=self.negative_slope,
-                concat_heads=self.concat,
-                edge_feat=edge_attr,
-            )
+        out = mha_gat_v2_n2n(
+            (x_src, x_dst) if bipartite else x,
+            self.att,
+            graph,
+            num_heads=self.heads,
+            activation="LeakyReLU",
+            negative_slope=self.negative_slope,
+            concat_heads=self.concat,
+            edge_feat=edge_attr,
+        )
 
         if self.bias is not None:
             out = out + self.bias
diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
index aeb51c028ae..f67756eb3fe 100644
--- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
+++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py
@@ -12,7 +12,7 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 
-from pylibcugraphops.pytorch.operators import mha_simple_n2n as TransformerConvAgg
+from pylibcugraphops.pytorch.operators import mha_simple_n2n
 
 from cugraph.utilities.utils import import_optional
 
@@ -168,10 +168,10 @@ def forward(
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
         """
-        bipartite = not isinstance(x, torch.Tensor)
+        bipartite = True
         graph = self.get_cugraph(csc, bipartite=bipartite)
 
-        if not bipartite:
+        if isinstance(x, torch.Tensor):
             x = (x, x)
 
         query = self.lin_query(x[1])
@@ -186,7 +186,7 @@ def forward(
                 )
             edge_attr = self.lin_edge(edge_attr)
 
-        out = TransformerConvAgg(
+        out = mha_simple_n2n(
             key,
             query,
             value,
diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index 8ed49ccdd1b..3b9c4e007e2 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -118,5 +118,6 @@
 
 from cugraph import gnn
 
+from cugraph import exceptions
 
 __version__ = "23.08.00"
diff --git a/python/cugraph/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/cugraph/dask/link_analysis/pagerank.py
index 4aba5725c1b..2dfd25fa522 100644
--- a/python/cugraph/cugraph/dask/link_analysis/pagerank.py
+++ b/python/cugraph/cugraph/dask/link_analysis/pagerank.py
@@ -13,31 +13,41 @@
 # limitations under the License.
 #
 
+import warnings
+
+import dask
 from dask.distributed import wait, default_client
-import cugraph.dask.comms.comms as Comms
 import dask_cudf
 import cudf
 import numpy as np
-import warnings
-from cugraph.dask.common.input_utils import get_distributed_data
-
 from pylibcugraph import (
+    pagerank as plc_pagerank,
+    personalized_pagerank as plc_p_pagerank,
+    exceptions as plc_exceptions,
     ResourceHandle,
-    pagerank as pylibcugraph_pagerank,
-    personalized_pagerank as pylibcugraph_p_pagerank,
 )
 
+import cugraph.dask.comms.comms as Comms
+from cugraph.dask.common.input_utils import get_distributed_data
+from cugraph.exceptions import FailedToConvergeError
+
 
-def convert_to_cudf(cp_arrays):
+def convert_to_return_tuple(plc_pr_retval):
     """
-    Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
+    Using the PLC pagerank return tuple, creates a cudf DataFrame from the cupy
+    arrays and extracts the (optional) bool.
     """
-    cupy_vertices, cupy_pagerank = cp_arrays
+    if len(plc_pr_retval) == 3:
+        cupy_vertices, cupy_pagerank, converged = plc_pr_retval
+    else:
+        cupy_vertices, cupy_pagerank = plc_pr_retval
+        converged = True
+
     df = cudf.DataFrame()
     df["vertex"] = cupy_vertices
     df["pagerank"] = cupy_pagerank
 
-    return df
+    return (df, converged)
 
 
 # FIXME: Move this function to the utility module so that it can be
@@ -99,20 +109,26 @@ def _call_plc_pagerank(
     epsilon,
     max_iterations,
     do_expensive_check,
+    fail_on_nonconvergence,
 ):
-
-    return pylibcugraph_pagerank(
-        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
-        graph=mg_graph_x,
-        precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
-        precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
-        initial_guess_vertices=initial_guess_vertices,
-        initial_guess_values=initial_guess_values,
-        alpha=alpha,
-        epsilon=epsilon,
-        max_iterations=max_iterations,
-        do_expensive_check=do_expensive_check,
-    )
+    try:
+        return plc_pagerank(
+            resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+            graph=mg_graph_x,
+            precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
+            precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
+            initial_guess_vertices=initial_guess_vertices,
+            initial_guess_values=initial_guess_values,
+            alpha=alpha,
+            epsilon=epsilon,
+            max_iterations=max_iterations,
+            do_expensive_check=do_expensive_check,
+            fail_on_nonconvergence=fail_on_nonconvergence,
+        )
+    # Re-raise this as a cugraph exception so users trying to catch this do not
+    # have to know to import another package.
+    except plc_exceptions.FailedToConvergeError as exc:
+        raise FailedToConvergeError from exc
 
 
 def _call_plc_personalized_pagerank(
@@ -127,23 +143,30 @@ def _call_plc_personalized_pagerank(
     epsilon,
     max_iterations,
     do_expensive_check,
+    fail_on_nonconvergence,
 ):
     personalization_vertices = data_personalization["vertex"]
     personalization_values = data_personalization["values"]
-    return pylibcugraph_p_pagerank(
-        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
-        graph=mg_graph_x,
-        precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
-        precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
-        personalization_vertices=personalization_vertices,
-        personalization_values=personalization_values,
-        initial_guess_vertices=initial_guess_vertices,
-        initial_guess_values=initial_guess_values,
-        alpha=alpha,
-        epsilon=epsilon,
-        max_iterations=max_iterations,
-        do_expensive_check=do_expensive_check,
-    )
+    try:
+        return plc_p_pagerank(
+            resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+            graph=mg_graph_x,
+            precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
+            precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
+            personalization_vertices=personalization_vertices,
+            personalization_values=personalization_values,
+            initial_guess_vertices=initial_guess_vertices,
+            initial_guess_values=initial_guess_values,
+            alpha=alpha,
+            epsilon=epsilon,
+            max_iterations=max_iterations,
+            do_expensive_check=do_expensive_check,
+            fail_on_nonconvergence=fail_on_nonconvergence,
+        )
+    # Re-raise this as a cugraph exception so users trying to catch this do not
+    # have to know to import another package.
+    except plc_exceptions.FailedToConvergeError as exc:
+        raise FailedToConvergeError from exc
 
 
 def pagerank(
@@ -154,6 +177,7 @@ def pagerank(
     max_iter=100,
     tol=1.0e-5,
     nstart=None,
+    fail_on_nonconvergence=True,
 ):
     """
     Find the PageRank values for each vertex in a graph using multiple GPUs.
@@ -222,8 +246,18 @@ def pagerank(
         nstart['values'] : cudf.Series
             Pagerank values for vertices
 
+    fail_on_nonconvergence : bool (default=True)
+        If the solver does not reach convergence, raise an exception if
+        fail_on_nonconvergence is True. If fail_on_nonconvergence is False,
+        the return value is a tuple of (pagerank, converged) where pagerank is
+        a cudf.DataFrame as described below, and converged is a boolean
+        indicating if the solver converged (True) or not (False).
+
     Returns
     -------
+    The return value varies based on the value of the fail_on_nonconvergence
+    paramter.  If fail_on_nonconvergence is True:
+
     PageRank : dask_cudf.DataFrame
         GPU data frame containing two dask_cudf.Series of size V: the
         vertex identifiers and the corresponding PageRank values.
@@ -244,6 +278,12 @@ def pagerank(
         ddf['pagerank'] : dask_cudf.Series
             Contains the PageRank score
 
+    If fail_on_nonconvergence is False:
+
+    (PageRank, converged) : tuple of (dask_cudf.DataFrame, bool)
+       PageRank is the GPU dataframe described above, converged is a bool
+       indicating if the solver converged (True) or not (False).
+
     Examples
     --------
     >>> import cugraph.dask as dcg
@@ -328,6 +368,7 @@ def pagerank(
                 tol,
                 max_iter,
                 do_expensive_check,
+                fail_on_nonconvergence,
                 workers=[w],
                 allow_other_workers=False,
             )
@@ -347,6 +388,7 @@ def pagerank(
                 tol,
                 max_iter,
                 do_expensive_check,
+                fail_on_nonconvergence,
                 workers=[w],
                 allow_other_workers=False,
             )
@@ -355,17 +397,35 @@ def pagerank(
 
     wait(result)
 
-    cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result]
+    vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
+
+    # Have each worker convert tuple of arrays and bool from PLC to cudf
+    # DataFrames and bools. This will be a list of futures.
+    result_tuples = [
+        client.submit(convert_to_return_tuple, cp_arrays) for cp_arrays in result
+    ]
 
-    wait(cudf_result)
+    # Convert the futures to dask delayed objects so the tuples can be
+    # split. nout=2 is passed since each tuple/iterable is a fixed length of 2.
+    result_tuples = [dask.delayed(r, nout=2) for r in result_tuples]
+
+    # Create the ddf and get the converged bool from the delayed objs.  Use a
+    # meta DataFrame to pass the expected dtypes for the DataFrame to prevent
+    # another compute to determine them automatically.
+    meta = cudf.DataFrame(columns=["vertex", "pagerank"])
+    meta = meta.astype({"pagerank": "float64", "vertex": vertex_dtype})
+    ddf = dask_cudf.from_delayed([t[0] for t in result_tuples], meta=meta).persist()
+    converged = all(dask.compute(*[t[1] for t in result_tuples]))
 
-    ddf = dask_cudf.from_delayed(cudf_result).persist()
     wait(ddf)
 
     # Wait until the inactive futures are released
-    wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)])
+    wait([(r.release(), c_r.release()) for r, c_r in zip(result, result_tuples)])
 
     if input_graph.renumbered:
         ddf = input_graph.unrenumber(ddf, "vertex")
 
-    return ddf
+    if fail_on_nonconvergence:
+        return ddf
+    else:
+        return (ddf, converged)
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
index 7d8972a7385..d74a8df14eb 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
@@ -14,10 +14,11 @@
 
 from __future__ import annotations
 
+import warnings
+
 import numpy
 from dask import delayed
-from dask.distributed import wait, Lock, get_client
-from cugraph.dask.common.input_utils import get_distributed_data
+from dask.distributed import Lock, get_client, wait
 
 import dask_cudf
 import cudf
@@ -26,12 +27,20 @@
 from pylibcugraph import ResourceHandle
 
 from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample
+from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper
 
 from cugraph.dask.comms import comms as Comms
+from cugraph.dask.common.input_utils import get_distributed_data
+from cugraph.dask import get_n_workers
 
 from typing import Sequence, List, Union, Tuple
 from typing import TYPE_CHECKING
 
+from cugraph.dask.common.part_utils import (
+    get_persisted_df_worker_map,
+    persist_dask_df_equal_parts_per_worker,
+)
+
 if TYPE_CHECKING:
     from cugraph import Graph
 
@@ -150,7 +159,63 @@ def convert_to_cudf(cp_arrays, weight_t, with_edge_properties, return_offsets=Fa
         return df
 
 
+def __get_label_to_output_comm_rank(min_batch_id, max_batch_id, n_workers):
+    num_batches = max_batch_id - min_batch_id + 1
+    num_batches = int(num_batches)
+    z = cp.zeros(num_batches, dtype="int32")
+    s = cp.array_split(cp.arange(num_batches), n_workers)
+    for i, t in enumerate(s):
+        z[t] = i
+
+    return z
+
+
 def _call_plc_uniform_neighbor_sample(
+    sID,
+    mg_graph_x,
+    st_x,
+    keep_batches_together,
+    n_workers,
+    min_batch_id,
+    max_batch_id,
+    fanout_vals,
+    with_replacement,
+    weight_t,
+    with_edge_properties,
+    random_state=None,
+    return_offsets=False,
+):
+    st_x = st_x[0]
+    start_list_x = st_x[start_col_name]
+    batch_id_list_x = st_x[batch_col_name] if batch_col_name in st_x else None
+
+    label_list = None
+    label_to_output_comm_rank = None
+    if keep_batches_together:
+        label_list = cp.arange(min_batch_id, max_batch_id + 1, dtype="int32")
+        label_to_output_comm_rank = __get_label_to_output_comm_rank(
+            min_batch_id, max_batch_id, n_workers
+        )
+
+    cp_arrays = pylibcugraph_uniform_neighbor_sample(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        input_graph=mg_graph_x,
+        start_list=start_list_x,
+        label_list=label_list,
+        label_to_output_comm_rank=label_to_output_comm_rank,
+        h_fan_out=fanout_vals,
+        with_replacement=with_replacement,
+        do_expensive_check=False,
+        with_edge_properties=with_edge_properties,
+        batch_id_list=batch_id_list_x,
+        random_state=random_state,
+    )
+    return convert_to_cudf(
+        cp_arrays, weight_t, with_edge_properties, return_offsets=return_offsets
+    )
+
+
+def _call_plc_uniform_neighbor_sample_legacy(
     sID,
     mg_graph_x,
     st_x,
@@ -183,7 +248,7 @@ def _call_plc_uniform_neighbor_sample(
     )
 
 
-def _mg_call_plc_uniform_neighbor_sample(
+def _mg_call_plc_uniform_neighbor_sample_legacy(
     client,
     session_id,
     input_graph,
@@ -200,7 +265,7 @@ def _mg_call_plc_uniform_neighbor_sample(
 ):
     result = [
         client.submit(
-            _call_plc_uniform_neighbor_sample,
+            _call_plc_uniform_neighbor_sample_legacy,
             session_id,
             input_graph._plc_graph[w],
             ddf[w][0],
@@ -247,7 +312,92 @@ def _mg_call_plc_uniform_neighbor_sample(
         return ddf
 
 
-def uniform_neighbor_sample(
+def _mg_call_plc_uniform_neighbor_sample(
+    client,
+    session_id,
+    input_graph,
+    ddf,
+    keep_batches_together,
+    min_batch_id,
+    max_batch_id,
+    fanout_vals,
+    with_replacement,
+    weight_t,
+    indices_t,
+    with_edge_properties,
+    random_state,
+    return_offsets=False,
+):
+    n_workers = None
+    if keep_batches_together:
+        n_workers = get_n_workers()
+
+        if hasattr(min_batch_id, "compute"):
+            min_batch_id = min_batch_id.compute()
+        if hasattr(max_batch_id, "compute"):
+            max_batch_id = max_batch_id.compute()
+
+    result = [
+        client.submit(
+            _call_plc_uniform_neighbor_sample,
+            session_id,
+            input_graph._plc_graph[w],
+            starts,
+            keep_batches_together,
+            n_workers,
+            min_batch_id,
+            max_batch_id,
+            fanout_vals,
+            with_replacement,
+            weight_t=weight_t,
+            with_edge_properties=with_edge_properties,
+            # FIXME accept and properly transmute a numpy/cupy random state.
+            random_state=hash((random_state, w)),
+            return_offsets=return_offsets,
+            allow_other_workers=False,
+            pure=False,
+        )
+        for w, starts in ddf.items()
+    ]
+    del ddf
+
+    empty_df = (
+        create_empty_df_with_edge_props(
+            indices_t, weight_t, return_offsets=return_offsets
+        )
+        if with_edge_properties
+        else create_empty_df(indices_t, weight_t)
+    )
+
+    wait(result)
+
+    if return_offsets:
+        result_split = [delayed(lambda x: x, nout=2)(r) for r in result]
+        ddf = dask_cudf.from_delayed(
+            [r[0] for r in result_split], meta=empty_df[0], verify_meta=False
+        ).persist()
+        ddf_offsets = dask_cudf.from_delayed(
+            [r[1] for r in result_split], meta=empty_df[1], verify_meta=False
+        ).persist()
+
+        wait([ddf, ddf_offsets])
+        wait([r.release() for r in result_split])
+        wait([r.release() for r in result])
+
+        del result
+
+        return ddf, ddf_offsets
+    else:
+        ddf = dask_cudf.from_delayed(result, meta=empty_df, verify_meta=False).persist()
+
+        wait(ddf)
+        wait([r.release() for r in result])
+        del result
+
+        return ddf
+
+
+def _uniform_neighbor_sample_legacy(
     input_graph: Graph,
     start_list: Sequence,
     fanout_vals: List[int],
@@ -259,6 +409,162 @@ def uniform_neighbor_sample(
     random_state: int = None,
     return_offsets: bool = False,
     _multiple_clients: bool = False,
+) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]:
+    warnings.warn(
+        "The batch_id_list, label_list, and label_to_output_comm_rank "
+        "parameters are deprecated.  Consider using with_batch_ids, "
+        "keep_batches_together, min_batch_id, and max_batch_id instead."
+    )
+
+    if isinstance(start_list, int):
+        start_list = [start_list]
+
+    if isinstance(start_list, list):
+        start_list = cudf.Series(
+            start_list,
+            dtype=input_graph.edgelist.edgelist_df[
+                input_graph.renumber_map.renumbered_src_col_name
+            ].dtype,
+        )
+
+    elif with_edge_properties and batch_id_list is None:
+        batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+
+    # fanout_vals must be a host array!
+    # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
+    if isinstance(fanout_vals, list):
+        fanout_vals = numpy.asarray(fanout_vals, dtype="int32")
+    else:
+        raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}")
+
+    if "value" in input_graph.edgelist.edgelist_df:
+        weight_t = input_graph.edgelist.edgelist_df["value"].dtype
+    else:
+        weight_t = "float32"
+
+    if "_SRC_" in input_graph.edgelist.edgelist_df:
+        indices_t = input_graph.edgelist.edgelist_df["_SRC_"].dtype
+    elif src_n in input_graph.edgelist.edgelist_df:
+        indices_t = input_graph.edgelist.edgelist_df[src_n].dtype
+    else:
+        indices_t = numpy.int32
+
+    start_list = start_list.rename(start_col_name)
+    if batch_id_list is not None:
+        batch_id_list = batch_id_list.rename(batch_col_name)
+        if hasattr(start_list, "compute"):
+            # mg input
+            start_list = start_list.to_frame()
+            batch_id_list = batch_id_list.to_frame()
+            ddf = start_list.merge(
+                batch_id_list,
+                how="left",
+                left_index=True,
+                right_index=True,
+            )
+        else:
+            # sg input
+            ddf = cudf.concat(
+                [
+                    start_list,
+                    batch_id_list,
+                ],
+                axis=1,
+            )
+    else:
+        ddf = start_list.to_frame()
+
+    if input_graph.renumbered:
+        ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name)
+
+    if hasattr(ddf, "compute"):
+        ddf = get_distributed_data(ddf)
+        wait(ddf)
+        ddf = ddf.worker_to_parts
+    else:
+        splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers()))
+        ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())}
+
+    client = get_client()
+    session_id = Comms.get_session_id()
+    if _multiple_clients:
+        # Distributed centralized lock to allow
+        # two disconnected processes (clients) to coordinate a lock
+        # https://docs.dask.org/en/stable/futures.html?highlight=lock#distributed.Lock
+        lock = Lock("plc_graph_access")
+        if lock.acquire(timeout=100):
+            try:
+                ddf = _mg_call_plc_uniform_neighbor_sample_legacy(
+                    client=client,
+                    session_id=session_id,
+                    input_graph=input_graph,
+                    ddf=ddf,
+                    label_list=label_list,
+                    label_to_output_comm_rank=label_to_output_comm_rank,
+                    fanout_vals=fanout_vals,
+                    with_replacement=with_replacement,
+                    weight_t=weight_t,
+                    indices_t=indices_t,
+                    with_edge_properties=with_edge_properties,
+                    random_state=random_state,
+                    return_offsets=return_offsets,
+                )
+            finally:
+                lock.release()
+        else:
+            raise RuntimeError(
+                "Failed to acquire lock(plc_graph_access) while trying to sampling"
+            )
+    else:
+        ddf = _mg_call_plc_uniform_neighbor_sample_legacy(
+            client=client,
+            session_id=session_id,
+            input_graph=input_graph,
+            ddf=ddf,
+            label_list=label_list,
+            label_to_output_comm_rank=label_to_output_comm_rank,
+            fanout_vals=fanout_vals,
+            with_replacement=with_replacement,
+            weight_t=weight_t,
+            indices_t=indices_t,
+            with_edge_properties=with_edge_properties,
+            random_state=random_state,
+            return_offsets=return_offsets,
+        )
+
+    if return_offsets:
+        ddf, offsets_ddf = ddf
+    if input_graph.renumbered:
+        ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True)
+        ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True)
+
+    if return_offsets:
+        return ddf, offsets_ddf
+
+    return ddf
+
+
+uniform_neighbor_sample_legacy = deprecated_warning_wrapper(
+    _uniform_neighbor_sample_legacy
+)
+
+
+def uniform_neighbor_sample(
+    input_graph: Graph,
+    start_list: Sequence,
+    fanout_vals: List[int],
+    with_replacement: bool = True,
+    with_edge_properties: bool = False,
+    batch_id_list: Sequence = None,  # deprecated
+    label_list: Sequence = None,  # deprecated
+    label_to_output_comm_rank: bool = None,  # deprecated
+    with_batch_ids: bool = False,
+    keep_batches_together=False,
+    min_batch_id=None,
+    max_batch_id=None,
+    random_state: int = None,
+    return_offsets: bool = False,
+    _multiple_clients: bool = False,
 ) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]:
     """
     Does neighborhood sampling, which samples nodes from a graph based on the
@@ -285,20 +591,36 @@ def uniform_neighbor_sample(
         edge type, batch id, hop id) with the sampled edges.
 
     batch_id_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
+        Deprecated.
         List of batch ids that will be returned with the sampled edges if
         with_edge_properties is set to True.
 
     label_list: cudf.Series or dask_cudf.Series (int32), optional (default=None)
+        Deprecated.
         List of unique batch id labels.  Used along with
         label_to_output_comm_rank to assign batch ids to GPUs.
 
     label_to_out_comm_rank: cudf.Series or dask_cudf.Series (int32),
     optional (default=None)
+        Deprecated.
         List of output GPUs (by rank) corresponding to batch
         id labels in the label list.  Used to assign each batch
         id to a GPU.
         Must be in ascending order (i.e. [0, 0, 1, 2]).
 
+    with_batch_ids: bool, optional (default=False)
+        Flag to specify whether batch ids are present in the start_list
+
+    keep_batches_together: bool (optional, default=False)
+        If True, will ensure that the returned samples for each batch are on the
+        same partition.
+
+    min_batch_id: int (optional, default=None)
+        Required for the keep_batches_together option.  The minimum batch id.
+
+    max_batch_id: int (optional, default=None)
+        Required for the keep_batches_together option.  The maximum batch id.
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -363,6 +685,25 @@ def uniform_neighbor_sample(
                     Contains the offsets of each batch in the sampling result
     """
 
+    if (
+        batch_id_list is not None
+        or label_list is not None
+        or label_to_output_comm_rank is not None
+    ):
+        return uniform_neighbor_sample_legacy(
+            input_graph,
+            start_list,
+            fanout_vals,
+            with_replacement=with_replacement,
+            with_edge_properties=with_edge_properties,
+            batch_id_list=batch_id_list,
+            label_list=label_list,
+            label_to_output_comm_rank=label_to_output_comm_rank,
+            random_state=random_state,
+            return_offsets=return_offsets,
+            _multiple_clients=_multiple_clients,
+        )
+
     if isinstance(start_list, int):
         start_list = [start_list]
 
@@ -373,9 +714,21 @@ def uniform_neighbor_sample(
                 input_graph.renumber_map.renumbered_src_col_name
             ].dtype,
         )
+    elif with_edge_properties and not with_batch_ids:
+        if isinstance(start_list, (cudf.DataFrame, dask_cudf.DataFrame)):
+            raise ValueError("expected 1d input for start list without batch ids")
 
-    elif with_edge_properties and batch_id_list is None:
-        batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+        start_list = start_list.to_frame()
+        start_list[batch_id_n] = cudf.Series(cp.zeros(len(start_list), dtype="int32"))
+
+    if keep_batches_together and min_batch_id is None:
+        raise ValueError(
+            "must provide min_batch_id if using keep_batches_together option"
+        )
+    if keep_batches_together and max_batch_id is None:
+        raise ValueError(
+            "must provide max_batch_id if using keep_batches_together option"
+        )
 
     # fanout_vals must be a host array!
     # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
@@ -396,44 +749,30 @@ def uniform_neighbor_sample(
     else:
         indices_t = numpy.int32
 
-    start_list = start_list.rename(start_col_name)
-    if batch_id_list is not None:
-        batch_id_list = batch_id_list.rename(batch_col_name)
-        if hasattr(start_list, "compute"):
-            # mg input
-            start_list = start_list.to_frame()
-            batch_id_list = batch_id_list.to_frame()
-            ddf = start_list.merge(
-                batch_id_list,
-                how="left",
-                left_index=True,
-                right_index=True,
-            )
-        else:
-            # sg input
-            ddf = cudf.concat(
-                [
-                    start_list,
-                    batch_id_list,
-                ],
-                axis=1,
-            )
-    else:
+    if isinstance(start_list, (cudf.Series, dask_cudf.Series)):
+        start_list = start_list.rename(start_col_name)
         ddf = start_list.to_frame()
+    else:
+        ddf = start_list
+        columns = ddf.columns
+        ddf = ddf.rename(
+            columns={columns[0]: start_col_name, columns[-1]: batch_col_name}
+        )
 
     if input_graph.renumbered:
         ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name)
 
-    if hasattr(ddf, "compute"):
-        ddf = get_distributed_data(ddf)
-        wait(ddf)
-        ddf = ddf.worker_to_parts
-    else:
-        splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers()))
-        ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())}
-
     client = get_client()
     session_id = Comms.get_session_id()
+    n_workers = get_n_workers()
+
+    if isinstance(ddf, cudf.DataFrame):
+        ddf = dask_cudf.from_cudf(ddf, npartitions=n_workers)
+
+    ddf = ddf.repartition(npartitions=n_workers)
+    ddf = persist_dask_df_equal_parts_per_worker(ddf, client)
+    ddf = get_persisted_df_worker_map(ddf, client)
+
     if _multiple_clients:
         # Distributed centralized lock to allow
         # two disconnected processes (clients) to coordinate a lock
@@ -446,8 +785,9 @@ def uniform_neighbor_sample(
                     session_id=session_id,
                     input_graph=input_graph,
                     ddf=ddf,
-                    label_list=label_list,
-                    label_to_output_comm_rank=label_to_output_comm_rank,
+                    keep_batches_together=keep_batches_together,
+                    min_batch_id=min_batch_id,
+                    max_batch_id=max_batch_id,
                     fanout_vals=fanout_vals,
                     with_replacement=with_replacement,
                     weight_t=weight_t,
@@ -468,8 +808,9 @@ def uniform_neighbor_sample(
             session_id=session_id,
             input_graph=input_graph,
             ddf=ddf,
-            label_list=label_list,
-            label_to_output_comm_rank=label_to_output_comm_rank,
+            keep_batches_together=keep_batches_together,
+            min_batch_id=min_batch_id,
+            max_batch_id=max_batch_id,
             fanout_vals=fanout_vals,
             with_replacement=with_replacement,
             weight_t=weight_t,
diff --git a/python/cugraph/cugraph/exceptions.py b/python/cugraph/cugraph/exceptions.py
new file mode 100644
index 00000000000..64280603112
--- /dev/null
+++ b/python/cugraph/cugraph/exceptions.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Exception classes for cugraph.
+"""
+
+
+class FailedToConvergeError(Exception):
+    """
+    Raised when an algorithm fails to converge within a predetermined set of
+    constraints which vary based on the algorithm, and may or may not be
+    user-configurable.
+    """
+
+    pass
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
index 0257a56ba08..a2b0a367d1d 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
@@ -15,16 +15,21 @@
 
 from typing import Union
 
-import cupy
 import cudf
 import dask_cudf
-import cugraph.dask as dask_cugraph
+
+from dask.distributed import wait
+from dask.distributed import futures_of
 
 import cugraph
 import pylibcugraph
 
 from cugraph.gnn.data_loading.bulk_sampler_io import write_samples
 
+import warnings
+import logging
+import time
+
 
 class EXPERIMENTAL__BulkSampler:
     start_col_name = "_START_"
@@ -36,7 +41,8 @@ def __init__(
         output_path: str,
         graph,
         seeds_per_call: int = 200_000,
-        batches_per_partition=100,
+        batches_per_partition: int = 100,
+        log_level: int = None,
         **kwargs,
     ):
         """
@@ -55,13 +61,19 @@ def __init__(
             a single sampling call.
         batches_per_partition: int (optional, default=100)
             The number of batches outputted to a single parquet partition.
+        log_level: int (optional, default=None)
+            Whether to enable logging for this sampler. Supports 3 levels
+            of logging if enabled (INFO, WARNING, ERROR).  If not provided,
+            defaults to WARNING.
         kwargs: kwargs
             Keyword arguments to be passed to the sampler (i.e. fanout).
         """
 
+        self.__logger = logging.getLogger(__name__)
+        self.__logger.setLevel(log_level or logging.WARNING)
+
         max_batches_per_partition = seeds_per_call // batch_size
         if batches_per_partition > max_batches_per_partition:
-            import warnings
 
             warnings.warn(
                 f"batches_per_partition ({batches_per_partition}) is >"
@@ -140,7 +152,7 @@ def add_batches(
         ...     start_col_name="start_vid",
         ...     batch_col_name="start_batch")
         """
-        df = df.rename(
+        df = df[[start_col_name, batch_col_name]].rename(
             columns={
                 start_col_name: self.start_col_name,
                 batch_col_name: self.batch_col_name,
@@ -163,6 +175,11 @@ def add_batches(
                 )
 
         if self.size >= self.seeds_per_call:
+            self.__logger.info(
+                f"Number of input seeds ({self.size})"
+                f" is >= seeds per call ({self.seeds_per_call})."
+                " Calling flush() to compute and write minibatches."
+            )
             self.flush()
 
     def flush(self) -> None:
@@ -171,12 +188,16 @@ def flush(self) -> None:
         """
         if self.size == 0:
             return
-        self.__batches.reset_index(drop=True)
+
+        start_time_calc_batches = time.perf_counter()
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            self.__batches = self.__batches.persist()
 
         min_batch_id = self.__batches[self.batch_col_name].min()
         if isinstance(self.__batches, dask_cudf.DataFrame):
-            min_batch_id = min_batch_id.compute()
-        min_batch_id = int(min_batch_id)
+            min_batch_id = min_batch_id.persist()
+        else:
+            min_batch_id = int(min_batch_id)
 
         partition_size = self.batches_per_partition * self.batch_size
         partitions_per_call = (
@@ -185,7 +206,19 @@ def flush(self) -> None:
         npartitions = partitions_per_call
 
         max_batch_id = min_batch_id + npartitions * self.batches_per_partition - 1
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            max_batch_id = max_batch_id.persist()
+
         batch_id_filter = self.__batches[self.batch_col_name] <= max_batch_id
+        if isinstance(batch_id_filter, dask_cudf.Series):
+            batch_id_filter = batch_id_filter.persist()
+
+        end_time_calc_batches = time.perf_counter()
+        self.__logger.info(
+            f"Calculated batches to sample; min = {min_batch_id}"
+            f" and max = {max_batch_id};"
+            f" took {end_time_calc_batches - start_time_calc_batches:.4f} s"
+        )
 
         if isinstance(self.__graph._plc_graph, pylibcugraph.graphs.SGGraph):
             sample_fn = cugraph.uniform_neighbor_sample
@@ -194,28 +227,62 @@ def flush(self) -> None:
             self.__sample_call_args.update(
                 {
                     "_multiple_clients": True,
-                    "label_to_output_comm_rank": self.__get_label_to_output_comm_rank(
-                        min_batch_id, max_batch_id
-                    ),
-                    "label_list": cupy.arange(
-                        min_batch_id, max_batch_id + 1, dtype="int32"
-                    ),
+                    "keep_batches_together": True,
+                    "min_batch_id": min_batch_id,
+                    "max_batch_id": max_batch_id,
                 }
             )
 
+        start_time_sample_call = time.perf_counter()
+
+        # Call uniform neighbor sample
         samples, offsets = sample_fn(
             self.__graph,
             **self.__sample_call_args,
-            start_list=self.__batches[self.start_col_name][batch_id_filter],
-            batch_id_list=self.__batches[self.batch_col_name][batch_id_filter],
+            start_list=self.__batches[[self.start_col_name, self.batch_col_name]][
+                batch_id_filter
+            ],
+            with_batch_ids=True,
             with_edge_properties=True,
             return_offsets=True,
         )
 
+        end_time_sample_call = time.perf_counter()
+        sample_runtime = end_time_sample_call - start_time_sample_call
+
+        self.__logger.info(
+            f"Called uniform neighbor sample, took {sample_runtime:.4f} s"
+        )
+
+        # Filter batches to remove those already processed
         self.__batches = self.__batches[~batch_id_filter]
+        del batch_id_filter
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            self.__batches = self.__batches.persist()
+
+        start_time_write = time.perf_counter()
+
+        # Write batches to parquet
         self.__write(samples, offsets)
+        if isinstance(self.__batches, dask_cudf.DataFrame):
+            wait(
+                [f.release() for f in futures_of(samples)]
+                + [f.release() for f in futures_of(offsets)]
+            )
+
+        del samples
+        del offsets
+
+        end_time_write = time.perf_counter()
+        write_runtime = end_time_write - start_time_write
+        self.__logger.info(f"Wrote samples to parquet, took {write_runtime} seconds")
 
-        if self.size > 0:
+        current_size = self.size
+        if current_size > 0:
+            self.__logger.info(
+                f"There are still {current_size} samples remaining, "
+                "calling flush() again..."
+            )
             self.flush()
 
     def __write(
@@ -227,13 +294,3 @@ def __write(
         write_samples(
             samples, offsets, self.__batches_per_partition, self.__output_path
         )
-
-    def __get_label_to_output_comm_rank(self, min_batch_id, max_batch_id):
-        num_workers = dask_cugraph.get_n_workers()
-        num_batches = max_batch_id - min_batch_id + 1
-        z = cupy.zeros(num_batches, dtype="int32")
-        s = cupy.array_split(cupy.arange(num_batches), num_workers)
-        for i, t in enumerate(s):
-            z[t] = i
-
-        return cudf.Series(z)
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
index d7f1c136484..44c1185bbf1 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
@@ -24,7 +24,7 @@ def _write_samples_to_parquet(
     batches_per_partition: int,
     output_path: str,
     partition_info: Optional[Union[dict, str]] = None,
-) -> None:
+) -> cudf.Series:
     """
     Writes the samples to parquet.
     results: cudf.DataFrame
@@ -40,11 +40,13 @@ def _write_samples_to_parquet(
         Either a dictionary containing partition data from dask, the string 'sg'
         indicating that this is a single GPU write, or None indicating that this
         function should perform a no-op (required by dask).
+
+    Returns an empty cudf series.
     """
 
     # Required by dask; need to skip dummy partitions.
     if partition_info is None or len(results) == 0:
-        return
+        return cudf.Series(dtype="int64")
     if partition_info != "sg" and (not isinstance(partition_info, dict)):
         raise ValueError("Invalid value of partition_info")
 
@@ -71,6 +73,8 @@ def _write_samples_to_parquet(
         ).values
         results_p.to_parquet(full_output_path, compression=None, index=False)
 
+    return cudf.Series(dtype="int64")
+
 
 def write_samples(
     results: cudf.DataFrame,
@@ -97,7 +101,9 @@ def write_samples(
             batches_per_partition,
             output_path,
             align_dataframes=False,
+            meta=cudf.Series(dtype="int64"),
         ).compute()
+
     else:
         _write_samples_to_parquet(
             results, offsets, batches_per_partition, output_path, partition_info="sg"
diff --git a/python/cugraph/cugraph/link_analysis/pagerank.py b/python/cugraph/cugraph/link_analysis/pagerank.py
index 6696512dcf0..d2b827fa7c8 100644
--- a/python/cugraph/cugraph/link_analysis/pagerank.py
+++ b/python/cugraph/cugraph/link_analysis/pagerank.py
@@ -11,20 +11,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from cugraph.utilities import (
-    ensure_cugraph_obj_for_nx,
-    df_score_to_dictionary,
-)
+import warnings
+
 import cudf
 import numpy as np
-import warnings
 
 from pylibcugraph import (
-    pagerank as pylibcugraph_pagerank,
-    personalized_pagerank as pylibcugraph_p_pagerank,
+    pagerank as plc_pagerank,
+    personalized_pagerank as plc_p_pagerank,
+    exceptions as plc_exceptions,
     ResourceHandle,
 )
 
+from cugraph.utilities import (
+    ensure_cugraph_obj_for_nx,
+    df_score_to_dictionary,
+)
+from cugraph.exceptions import FailedToConvergeError
+
 
 def renumber_vertices(input_graph, input_df):
     if len(input_graph.renumber_map.implementation.col_names) > 1:
@@ -86,9 +90,9 @@ def pagerank(
     nstart=None,
     weight=None,
     dangling=None,
+    fail_on_nonconvergence=True,
 ):
-    """
-    Find the PageRank score for every vertex in a graph. cuGraph computes an
+    """Find the PageRank score for every vertex in a graph. cuGraph computes an
     approximation of the Pagerank eigenvector using the power method. The
     number of iterations depends on the properties of the network itself; it
     increases when the tolerance descreases and/or alpha increases toward the
@@ -163,8 +167,18 @@ def pagerank(
     dangling : dict, optional (default=None)
         This parameter is here for NetworkX compatibility and ignored
 
+    fail_on_nonconvergence : bool (default=True)
+        If the solver does not reach convergence, raise an exception if
+        fail_on_nonconvergence is True. If fail_on_nonconvergence is False,
+        the return value is a tuple of (pagerank, converged) where pagerank is
+        a cudf.DataFrame as described below, and converged is a boolean
+        indicating if the solver converged (True) or not (False).
+
     Returns
     -------
+    The return value varies based on the value of the fail_on_nonconvergence
+    paramter.  If fail_on_nonconvergence is True:
+
     PageRank : cudf.DataFrame
         GPU data frame containing two cudf.Series of size V: the vertex
         identifiers and the corresponding PageRank values.
@@ -185,6 +199,12 @@ def pagerank(
         df['pagerank'] : cudf.Series
             Contains the PageRank score
 
+    If fail_on_nonconvergence is False:
+
+    (PageRank, converged) : tuple of (cudf.DataFrame, bool)
+       PageRank is the GPU dataframe described above, converged is a bool
+       indicating if the solver converged (True) or not (False).
+
     Examples
     --------
     >>> from cugraph.experimental.datasets import karate
@@ -226,47 +246,55 @@ def pagerank(
         pre_vtx_o_wgt_vertices = precomputed_vertex_out_weight["vertex"]
         pre_vtx_o_wgt_sums = precomputed_vertex_out_weight["sums"]
 
-    if personalization is not None:
-        if not isinstance(personalization, cudf.DataFrame):
-            raise NotImplementedError(
-                "personalization other than a cudf dataframe " "currently not supported"
+    try:
+        if personalization is not None:
+            if not isinstance(personalization, cudf.DataFrame):
+                raise NotImplementedError(
+                    "personalization other than a cudf dataframe currently not "
+                    "supported"
+                )
+            if G.renumbered is True:
+                personalization = renumber_vertices(G, personalization)
+
+            personalization = ensure_valid_dtype(G, personalization, "personalization")
+
+            result_tuple = plc_p_pagerank(
+                resource_handle=ResourceHandle(),
+                graph=G._plc_graph,
+                precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
+                precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
+                personalization_vertices=personalization["vertex"],
+                personalization_values=personalization["values"],
+                initial_guess_vertices=initial_guess_vertices,
+                initial_guess_values=initial_guess_values,
+                alpha=alpha,
+                epsilon=tol,
+                max_iterations=max_iter,
+                do_expensive_check=do_expensive_check,
+                fail_on_nonconvergence=fail_on_nonconvergence,
             )
-        if G.renumbered is True:
-            personalization = renumber_vertices(G, personalization)
-
-        personalization = ensure_valid_dtype(G, personalization, "personalization")
-
-        vertex, pagerank_values = pylibcugraph_p_pagerank(
-            resource_handle=ResourceHandle(),
-            graph=G._plc_graph,
-            precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
-            precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
-            personalization_vertices=personalization["vertex"],
-            personalization_values=personalization["values"],
-            initial_guess_vertices=initial_guess_vertices,
-            initial_guess_values=initial_guess_values,
-            alpha=alpha,
-            epsilon=tol,
-            max_iterations=max_iter,
-            do_expensive_check=do_expensive_check,
-        )
-    else:
-        vertex, pagerank_values = pylibcugraph_pagerank(
-            resource_handle=ResourceHandle(),
-            graph=G._plc_graph,
-            precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
-            precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
-            initial_guess_vertices=initial_guess_vertices,
-            initial_guess_values=initial_guess_values,
-            alpha=alpha,
-            epsilon=tol,
-            max_iterations=max_iter,
-            do_expensive_check=do_expensive_check,
-        )
+        else:
+            result_tuple = plc_pagerank(
+                resource_handle=ResourceHandle(),
+                graph=G._plc_graph,
+                precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices,
+                precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums,
+                initial_guess_vertices=initial_guess_vertices,
+                initial_guess_values=initial_guess_values,
+                alpha=alpha,
+                epsilon=tol,
+                max_iterations=max_iter,
+                do_expensive_check=do_expensive_check,
+                fail_on_nonconvergence=fail_on_nonconvergence,
+            )
+    # Re-raise this as a cugraph exception so users trying to catch this do not
+    # have to know to import another package.
+    except plc_exceptions.FailedToConvergeError as exc:
+        raise FailedToConvergeError from exc
 
     df = cudf.DataFrame()
-    df["vertex"] = vertex
-    df["pagerank"] = pagerank_values
+    df["vertex"] = result_tuple[0]
+    df["pagerank"] = result_tuple[1]
 
     if G.renumbered:
         df = G.unrenumber(df, "vertex")
@@ -274,4 +302,7 @@ def pagerank(
     if isNx is True:
         df = df_score_to_dictionary(df, "pagerank")
 
-    return df
+    if fail_on_nonconvergence:
+        return df
+    else:
+        return (df, result_tuple[2])
diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
index d6acaa550eb..d239f92d485 100644
--- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
@@ -15,6 +15,7 @@
 
 from pylibcugraph import ResourceHandle
 from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample
+from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper
 
 import numpy
 
@@ -29,6 +30,10 @@
     from cugraph import Graph
 
 
+start_col_name = "_START_"
+batch_col_name = "_BATCH_"
+
+
 # FIXME: Move this function to the utility module so that it can be
 # shared by other algos
 def ensure_valid_dtype(input_graph, start_list):
@@ -50,7 +55,7 @@ def ensure_valid_dtype(input_graph, start_list):
     return start_list
 
 
-def uniform_neighbor_sample(
+def _uniform_neighbor_sample_legacy(
     G: Graph,
     start_list: Sequence,
     fanout_vals: List[int],
@@ -60,6 +65,135 @@ def uniform_neighbor_sample(
     random_state: int = None,
     return_offsets: bool = False,
 ) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]:
+
+    warnings.warn(
+        "The batch_id_list parameter is deprecated. "
+        "Consider passing a DataFrame where the last column "
+        "is the batch ids and setting with_batch_ids=True"
+    )
+
+    if isinstance(start_list, int):
+        start_list = [start_list]
+
+    if isinstance(start_list, list):
+        start_list = cudf.Series(
+            start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype
+        )
+
+    if with_edge_properties and batch_id_list is None:
+        batch_id_list = cp.zeros(len(start_list), dtype="int32")
+
+    # fanout_vals must be a host array!
+    # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
+    if isinstance(fanout_vals, list):
+        fanout_vals = numpy.asarray(fanout_vals, dtype="int32")
+    else:
+        raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}")
+
+    if "weights" in G.edgelist.edgelist_df:
+        weight_t = G.edgelist.edgelist_df["weights"].dtype
+    else:
+        weight_t = "float32"
+
+    start_list = ensure_valid_dtype(G, start_list)
+
+    if G.renumbered is True:
+        if isinstance(start_list, cudf.DataFrame):
+            start_list = G.lookup_internal_vertex_id(start_list, start_list.columns)
+        else:
+            start_list = G.lookup_internal_vertex_id(start_list)
+
+    sampling_result = pylibcugraph_uniform_neighbor_sample(
+        resource_handle=ResourceHandle(),
+        input_graph=G._plc_graph,
+        start_list=start_list,
+        h_fan_out=fanout_vals,
+        with_replacement=with_replacement,
+        do_expensive_check=False,
+        with_edge_properties=with_edge_properties,
+        batch_id_list=batch_id_list,
+        random_state=random_state,
+    )
+
+    df = cudf.DataFrame()
+
+    if with_edge_properties:
+        (
+            sources,
+            destinations,
+            weights,
+            edge_ids,
+            edge_types,
+            batch_ids,
+            offsets,
+            hop_ids,
+        ) = sampling_result
+
+        df["sources"] = sources
+        df["destinations"] = destinations
+        df["weight"] = weights
+        df["edge_id"] = edge_ids
+        df["edge_type"] = edge_types
+        df["hop_id"] = hop_ids
+
+        if return_offsets:
+            offsets_df = cudf.DataFrame(
+                {
+                    "batch_id": batch_ids,
+                    "offsets": offsets[:-1],
+                }
+            )
+
+        else:
+            if len(batch_ids) > 0:
+                batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
+                batch_ids.reset_index(drop=True, inplace=True)
+
+            df["batch_id"] = batch_ids
+
+    else:
+        sources, destinations, indices = sampling_result
+
+        df["sources"] = sources
+        df["destinations"] = destinations
+
+        if indices is None:
+            df["indices"] = None
+        else:
+            df["indices"] = indices
+            if weight_t == "int32":
+                df["indices"] = indices.astype("int32")
+            elif weight_t == "int64":
+                df["indices"] = indices.astype("int64")
+            else:
+                df["indices"] = indices
+
+    if G.renumbered:
+        df = G.unrenumber(df, "sources", preserve_order=True)
+        df = G.unrenumber(df, "destinations", preserve_order=True)
+
+    if return_offsets:
+        return df, offsets_df
+
+    return df
+
+
+uniform_neighbor_sample_legacy = deprecated_warning_wrapper(
+    _uniform_neighbor_sample_legacy
+)
+
+
+def uniform_neighbor_sample(
+    G: Graph,
+    start_list: Sequence,
+    fanout_vals: List[int],
+    with_replacement: bool = True,
+    with_edge_properties: bool = False,
+    batch_id_list: Sequence = None,  # deprecated
+    with_batch_ids: bool = False,
+    random_state: int = None,
+    return_offsets: bool = False,
+) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]:
     """
     Does neighborhood sampling, which samples nodes from a graph based on the
     current node's neighbors, with a corresponding fanout value at each hop.
@@ -85,9 +219,14 @@ def uniform_neighbor_sample(
         edge type, batch id, hop id) with the sampled edges.
 
     batch_id_list: list (int32)
+        Deprecated.
         List of batch ids that will be returned with the sampled edges if
         with_edge_properties is set to True.
 
+    with_batch_ids: bool, optional (default=False)
+        Flag to specify whether batch ids are present in the start_list
+        Assumes they are the last column in the start_list dataframe
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -148,6 +287,18 @@ def uniform_neighbor_sample(
                     Contains the offsets of each batch in the sampling result
     """
 
+    if batch_id_list is not None:
+        return uniform_neighbor_sample_legacy(
+            G,
+            start_list,
+            fanout_vals,
+            with_replacement=with_replacement,
+            with_edge_properties=with_edge_properties,
+            batch_id_list=batch_id_list,
+            random_state=random_state,
+            return_offsets=return_offsets,
+        )
+
     if isinstance(start_list, int):
         start_list = [start_list]
 
@@ -156,8 +307,13 @@ def uniform_neighbor_sample(
             start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype
         )
 
-    if with_edge_properties and batch_id_list is None:
-        batch_id_list = cp.zeros(len(start_list), dtype="int32")
+    if with_edge_properties and not with_batch_ids:
+        if isinstance(start_list, cudf.Series):
+            start_list = start_list.to_frame()
+
+        start_list[batch_col_name] = cudf.Series(
+            cp.zeros(len(start_list), dtype="int32")
+        )
 
     # fanout_vals must be a host array!
     # FIXME: ensure other sequence types (eg. cudf Series) can be handled.
@@ -173,21 +329,37 @@ def uniform_neighbor_sample(
 
     start_list = ensure_valid_dtype(G, start_list)
 
-    if G.renumbered is True:
-        if isinstance(start_list, cudf.DataFrame):
-            start_list = G.lookup_internal_vertex_id(start_list, start_list.columns)
+    if isinstance(start_list, cudf.Series):
+        start_list = start_list.rename(start_col_name)
+        start_list = start_list.to_frame()
+
+        if G.renumbered:
+            start_list = G.lookup_internal_vertex_id(start_list, start_col_name)
+    else:
+        columns = start_list.columns
+
+        if with_batch_ids:
+            if G.renumbered:
+                start_list = G.lookup_internal_vertex_id(start_list, columns[:-1])
+            start_list = start_list.rename(
+                columns={columns[0]: start_col_name, columns[-1]: batch_col_name}
+            )
         else:
-            start_list = G.lookup_internal_vertex_id(start_list)
+            if G.renumbered:
+                start_list = G.lookup_internal_vertex_id(start_list, columns)
+            start_list = start_list.rename(columns={columns[0]: start_col_name})
 
     sampling_result = pylibcugraph_uniform_neighbor_sample(
         resource_handle=ResourceHandle(),
         input_graph=G._plc_graph,
-        start_list=start_list,
+        start_list=start_list[start_col_name],
+        batch_id_list=start_list[batch_col_name]
+        if batch_col_name in start_list
+        else None,
         h_fan_out=fanout_vals,
         with_replacement=with_replacement,
         do_expensive_check=False,
         with_edge_properties=with_edge_properties,
-        batch_id_list=batch_id_list,
         random_state=random_state,
     )
 
diff --git a/python/cugraph/cugraph/testing/mg_utils.py b/python/cugraph/cugraph/testing/mg_utils.py
index 1e1a481e4d6..bd165ba3db5 100644
--- a/python/cugraph/cugraph/testing/mg_utils.py
+++ b/python/cugraph/cugraph/testing/mg_utils.py
@@ -29,6 +29,7 @@
 
 def start_dask_client(
     protocol=None,
+    rmm_async=False,
     rmm_pool_size=None,
     dask_worker_devices=None,
     jit_unspill=False,
@@ -137,6 +138,7 @@ def start_dask_client(
             local_directory=local_directory,
             protocol=protocol,
             rmm_pool_size=rmm_pool_size,
+            rmm_async=rmm_async,
             CUDA_VISIBLE_DEVICES=dask_worker_devices,
             jit_unspill=jit_unspill,
             device_memory_limit=device_memory_limit,
@@ -287,6 +289,15 @@ def persist_dask_object(arg):
 
 # Function to convert bytes into human readable format
 def sizeof_fmt(num, suffix="B"):
+    if isinstance(num, str):
+        if num[-2:] == "GB":
+            return num[:-2] + "G"
+        elif num[-2:] == "MB":
+            return num[:-2] + "M"
+        elif num[-2:] == "KB":
+            return num[:-2] + "K"
+        else:
+            raise ValueError("unknown unit")
     for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
         if abs(num) < 1024.0:
             return "%3.1f%s%s" % (num, unit, suffix)
diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py
index ba136963b60..b7487ae329c 100644
--- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py
+++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py
@@ -432,3 +432,49 @@ def test_pagerank_transposed_false():
 
     with pytest.warns(UserWarning, match=warning_msg):
         cugraph.pagerank(G)
+
+
+@pytest.mark.sg
+def test_pagerank_non_convergence():
+    G = karate.get_graph(create_using=cugraph.Graph(directed=True))
+
+    # Not enough allowed iterations, should not converge
+    with pytest.raises(cugraph.exceptions.FailedToConvergeError):
+        df = cugraph.pagerank(G, max_iter=1, fail_on_nonconvergence=True)
+
+    # Not enough allowed iterations, should not converge but do not consider
+    # that an error
+    (df, converged) = cugraph.pagerank(G, max_iter=1, fail_on_nonconvergence=False)
+    assert type(df) is cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is False
+
+    # The default max_iter value should allow convergence for this graph
+    (df, converged) = cugraph.pagerank(G, fail_on_nonconvergence=False)
+    assert type(df) is cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is True
+
+    # Test personalized pagerank the same way
+    personalization = cudf.DataFrame()
+    personalization["vertex"] = [17, 26]
+    personalization["values"] = [0.5, 0.75]
+
+    with pytest.raises(cugraph.exceptions.FailedToConvergeError):
+        df = cugraph.pagerank(
+            G, max_iter=1, personalization=personalization, fail_on_nonconvergence=True
+        )
+
+    (df, converged) = cugraph.pagerank(
+        G, max_iter=1, personalization=personalization, fail_on_nonconvergence=False
+    )
+    assert type(df) is cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is False
+
+    (df, converged) = cugraph.pagerank(
+        G, personalization=personalization, fail_on_nonconvergence=False
+    )
+    assert type(df) is cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is True
diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py
index 941974eea4f..14a512c59e5 100644
--- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py
+++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py
@@ -48,6 +48,25 @@ def personalize(vertices, personalization_perc):
     return cu_personalization, personalization
 
 
+def create_distributed_karate_graph(store_transposed=True):
+    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
+
+    chunksize = dcg.get_chunksize(input_data_path)
+
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+
+    dg = cugraph.Graph(directed=True)
+    dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=store_transposed)
+
+    return dg
+
+
 # =============================================================================
 # Parameters
 # =============================================================================
@@ -197,20 +216,7 @@ def test_pagerank_invalid_personalization_dtype(dask_client):
 
 @pytest.mark.mg
 def test_dask_pagerank_transposed_false(dask_client):
-    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
-
-    chunksize = dcg.get_chunksize(input_data_path)
-
-    ddf = dask_cudf.read_csv(
-        input_data_path,
-        chunksize=chunksize,
-        delimiter=" ",
-        names=["src", "dst", "value"],
-        dtype=["int32", "int32", "float32"],
-    )
-
-    dg = cugraph.Graph(directed=True)
-    dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False)
+    dg = create_distributed_karate_graph(store_transposed=False)
 
     warning_msg = (
         "Pagerank expects the 'store_transposed' "
@@ -220,3 +226,49 @@ def test_dask_pagerank_transposed_false(dask_client):
 
     with pytest.warns(UserWarning, match=warning_msg):
         dcg.pagerank(dg)
+
+
+@pytest.mark.mg
+def test_pagerank_non_convergence(dask_client):
+    dg = create_distributed_karate_graph()
+
+    # Not enough allowed iterations, should not converge
+    with pytest.raises(cugraph.exceptions.FailedToConvergeError):
+        ddf = dcg.pagerank(dg, max_iter=1, fail_on_nonconvergence=True)
+
+    # Not enough allowed iterations, should not converge but do not consider
+    # that an error
+    (ddf, converged) = dcg.pagerank(dg, max_iter=1, fail_on_nonconvergence=False)
+    assert type(ddf) is dask_cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is False
+
+    # The default max_iter value should allow convergence for this graph
+    (ddf, converged) = dcg.pagerank(dg, fail_on_nonconvergence=False)
+    assert type(ddf) is dask_cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is True
+
+    # Test personalized pagerank the same way
+    personalization = cudf.DataFrame()
+    personalization["vertex"] = [17, 26]
+    personalization["values"] = [0.5, 0.75]
+
+    with pytest.raises(cugraph.exceptions.FailedToConvergeError):
+        df = dcg.pagerank(
+            dg, max_iter=1, personalization=personalization, fail_on_nonconvergence=True
+        )
+
+    (df, converged) = dcg.pagerank(
+        dg, max_iter=1, personalization=personalization, fail_on_nonconvergence=False
+    )
+    assert type(df) is dask_cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is False
+
+    (df, converged) = dcg.pagerank(
+        dg, personalization=personalization, fail_on_nonconvergence=False
+    )
+    assert type(df) is dask_cudf.DataFrame
+    assert type(converged) is bool
+    assert converged is True
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
index 5d2f050bce9..39d2fbea7dd 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
@@ -285,7 +285,7 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out
 
     sampling_results = uniform_neighbor_sample(
         test_data["Graph"],
-        test_data["start_list"],
+        test_data["start_list"].astype("int64"),
         test_data["fanout_vals"],
         test_data["with_replacement"],
     )
@@ -330,11 +330,11 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets):
 
     sampling_results = uniform_neighbor_sample(
         G,
-        start_list=start_df["seed"],
+        start_list=start_df,
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
-        batch_id_list=start_df["batch"],
+        with_batch_ids=True,
         return_offsets=return_offsets,
     )
     if return_offsets:
@@ -389,11 +389,16 @@ def test_uniform_neighbor_sample_edge_properties_self_loops():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 1, 2]),
-        batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1, 2]),
+                "batch": cudf.Series([1, 1, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
         random_state=80,
     )
 
@@ -460,11 +465,16 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        cudf.Series([0, 1], dtype="int64"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1], dtype="int64"),
+                "batch": cudf.Series([0, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2, 2],
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for b in range(2):
@@ -502,11 +512,16 @@ def test_uniform_neighbor_sample_empty_start_list():
 
     sampling_results = cugraph.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([], dtype="int64"),
-        batch_id_list=cudf.Series([], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start_list": cudf.Series(dtype="int64"),
+                "batch_id_list": cudf.Series(dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
         random_state=32,
     )
 
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
index 033b96487c4..4da3f3cf950 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
@@ -327,7 +327,8 @@ def test_mg_uniform_neighbor_sample_ensure_no_duplicates(dask_client):
 @pytest.mark.cugraph_ops
 @pytest.mark.parametrize("return_offsets", [True, False])
 def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
-    if len(dask_client.scheduler_info()["workers"]) <= 1:
+    n_workers = len(dask_client.scheduler_info()["workers"])
+    if n_workers <= 1:
         pytest.skip("Test only valid for MG environments")
     edgelist_df = dask_cudf.from_cudf(
         cudf.DataFrame(
@@ -352,43 +353,58 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
         edge_attr=["w", "eid", "etp"],
     )
 
-    dest_rank = [0, 1]
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 4], dtype="int64"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 4], dtype="int64"),
+                "batch": cudf.Series([0, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[-1, -1],
         with_replacement=False,
         with_edge_properties=True,
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
-        label_list=cudf.Series([0, 1], dtype="int32") if return_offsets else None,
-        label_to_output_comm_rank=cudf.Series(dest_rank, dtype="int32")
-        if return_offsets
-        else None,
+        with_batch_ids=True,
+        keep_batches_together=True,
+        min_batch_id=0,
+        max_batch_id=1,
         return_offsets=return_offsets,
     )
 
     if return_offsets:
         sampling_results, sampling_offsets = sampling_results
 
-        df_p0 = sampling_results.get_partition(0).compute()
-        assert sorted(df_p0.sources.values_host.tolist()) == (
-            [0, 0, 0, 1, 1, 2, 2, 2, 4, 4]
-        )
-        assert sorted(df_p0.destinations.values_host.tolist()) == (
-            [1, 1, 1, 2, 2, 3, 3, 4, 4, 4]
-        )
-
-        df_p1 = sampling_results.get_partition(1).compute()
-        assert sorted(df_p1.sources.values_host.tolist()) == ([1, 1, 3, 3, 4, 4])
-        assert sorted(df_p1.destinations.values_host.tolist()) == ([1, 2, 2, 3, 3, 4])
-
-        offsets_p0 = sampling_offsets.get_partition(0).compute()
-        assert offsets_p0.batch_id.values_host.tolist() == [0]
-        assert offsets_p0.offsets.values_host.tolist() == [0]
-
-        offsets_p1 = sampling_offsets.get_partition(1).compute()
-        assert offsets_p1.batch_id.values_host.tolist() == [1]
-        assert offsets_p1.offsets.values_host.tolist() == [0]
+        batches_found = {0: 0, 1: 0}
+        for i in range(n_workers):
+            dfp = sampling_results.get_partition(i).compute()
+            if len(dfp) > 0:
+                offsets_p = sampling_offsets.get_partition(i).compute()
+                assert len(offsets_p) > 0
+
+                if offsets_p.batch_id.iloc[0] == 1:
+                    batches_found[1] += 1
+
+                    assert offsets_p.batch_id.values_host.tolist() == [1]
+                    assert offsets_p.offsets.values_host.tolist() == [0]
+
+                    assert sorted(dfp.sources.values_host.tolist()) == (
+                        [1, 1, 3, 3, 4, 4]
+                    )
+                    assert sorted(dfp.destinations.values_host.tolist()) == (
+                        [1, 2, 2, 3, 3, 4]
+                    )
+                elif offsets_p.batch_id.iloc[0] == 0:
+                    batches_found[0] += 1
+
+                    assert offsets_p.batch_id.values_host.tolist() == [0]
+                    assert offsets_p.offsets.values_host.tolist() == [0]
+
+                    assert sorted(dfp.sources.values_host.tolist()) == (
+                        [0, 0, 0, 1, 1, 2, 2, 2, 4, 4]
+                    )
+                    assert sorted(dfp.destinations.values_host.tolist()) == (
+                        [1, 1, 1, 2, 2, 3, 3, 4, 4, 4]
+                    )
 
     mdf = cudf.merge(
         sampling_results.compute(),
@@ -446,13 +462,19 @@ def test_uniform_neighbor_sample_edge_properties_self_loops(dask_client):
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=dask_cudf.from_cudf(cudf.Series([0, 1, 2]), npartitions=2),
-        batch_id_list=dask_cudf.from_cudf(
-            cudf.Series([1, 1, 1], dtype="int32"), npartitions=2
+        start_list=dask_cudf.from_cudf(
+            cudf.DataFrame(
+                {
+                    "start": cudf.Series([0, 1, 2], dtype="int64"),
+                    "batch": cudf.Series([1, 1, 1], dtype="int32"),
+                }
+            ),
+            npartitions=2,
         ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -526,23 +548,32 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch():
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        cudf.Series([0, 1], dtype="int64"),
+        dask_cudf.from_cudf(
+            cudf.DataFrame(
+                {
+                    "start": cudf.Series([0, 1], dtype="int64"),
+                    "batch": cudf.Series([0, 1], dtype="int32"),
+                }
+            ),
+            npartitions=2,
+        ),
         fanout_vals=[2, 2, 2],
-        batch_id_list=cudf.Series([0, 1], dtype="int32"),
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for p in range(sampling_results.npartitions):
         sampling_results_p = sampling_results.get_partition(p)
-        for b in range(2):
-            sampling_results_pb = sampling_results_p[
-                sampling_results_p.batch_id == b
-            ].compute()
-            assert (
-                sorted(sampling_results_pb.hop_id.values_host.tolist())
-                == sampling_results_pb.hop_id.values_host.tolist()
-            )
+        if len(sampling_results_p) > 0:
+            for b in range(2):
+                sampling_results_pb = sampling_results_p[
+                    sampling_results_p.batch_id == b
+                ].compute()
+                assert (
+                    sorted(sampling_results_pb.hop_id.values_host.tolist())
+                    == sampling_results_pb.hop_id.values_host.tolist()
+                )
 
 
 @pytest.mark.mg
@@ -577,11 +608,19 @@ def test_uniform_neighbor_edge_properties_sample_small_start_list(
 
     cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0]),
+        start_list=dask_cudf.from_cudf(
+            cudf.Series(
+                {
+                    "start": cudf.Series([0]),
+                    "batch": cudf.Series([10], dtype="int32"),
+                }
+            ),
+            npartitions=1,
+        ),
         fanout_vals=[10, 25],
         with_replacement=with_replacement,
         with_edge_properties=True,
-        batch_id_list=cudf.Series([10], dtype="int32"),
+        with_batch_ids=True,
     )
 
 
@@ -610,11 +649,16 @@ def test_uniform_neighbor_sample_without_dask_inputs(dask_client):
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
-        start_list=cudf.Series([0, 1, 2]),
-        batch_id_list=cudf.Series([1, 1, 1], dtype="int32"),
+        start_list=cudf.DataFrame(
+            {
+                "start": cudf.Series([0, 1, 2]),
+                "batch": cudf.Series([1, 1, 1], dtype="int32"),
+            }
+        ),
         fanout_vals=[2, 2],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     ).compute()
 
     assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2]
@@ -664,24 +708,24 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
     input_vertices = dask_cudf.concat([df.src, df.dst]).unique().compute()
     assert isinstance(input_vertices, cudf.Series)
 
+    input_vertices.name = "start"
     input_vertices.index = cupy.random.permutation(len(input_vertices))
+    input_vertices = input_vertices.to_frame().reset_index(drop=True)
 
-    input_batch = cudf.Series(
+    input_vertices["batch"] = cudf.Series(
         cupy.random.randint(0, max_batches, len(input_vertices)), dtype="int32"
     )
-    input_batch.index = cupy.random.permutation(len(input_vertices))
 
     if input_df == dask_cudf.DataFrame:
-        input_batch = dask_cudf.from_cudf(input_batch, npartitions=num_workers)
         input_vertices = dask_cudf.from_cudf(input_vertices, npartitions=num_workers)
 
     sampling_results = cugraph.dask.uniform_neighbor_sample(
         G,
         start_list=input_vertices,
-        batch_id_list=input_batch,
         fanout_vals=[5, 5],
         with_replacement=False,
         with_edge_properties=True,
+        with_batch_ids=True,
     )
 
     for batch_id in range(max_batches):
@@ -693,7 +737,7 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
             .compute()
         )
 
-        input_starts_per_batch = len(input_batch[input_batch == batch_id])
+        input_starts_per_batch = len(input_vertices[input_vertices.batch == batch_id])
 
         # Should be <= to account for starts without outgoing edges
         assert output_starts_per_batch <= input_starts_per_batch
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index e0d7b6797d4..5c03d8f98cc 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -81,4 +81,6 @@
 
 from pylibcugraph.select_random_vertices import select_random_vertices
 
+from pylibcugraph import exceptions
+
 __version__ = "23.08.00"
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
index 06838256f30..6cd02ed6f17 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd
@@ -47,6 +47,16 @@ cdef extern from "cugraph_c/centrality_algorithms.h":
             cugraph_centrality_result_t* result
         )
 
+    cdef size_t \
+        cugraph_centrality_result_get_num_iterations(
+            cugraph_centrality_result_t* result
+        )
+
+    cdef bool_t \
+        cugraph_centrality_result_converged(
+            cugraph_centrality_result_t* result
+        )
+
     cdef void \
         cugraph_centrality_result_free(
             cugraph_centrality_result_t* result
@@ -68,6 +78,22 @@ cdef extern from "cugraph_c/centrality_algorithms.h":
             cugraph_error_t** error
         )
 
+    cdef cugraph_error_code_t \
+        cugraph_pagerank_allow_nonconvergence(
+            const cugraph_resource_handle_t* handle,
+            cugraph_graph_t* graph,
+            const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+            const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+            const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+            const cugraph_type_erased_device_array_view_t* initial_guess_values,
+            double alpha,
+            double epsilon,
+            size_t max_iterations,
+            bool_t do_expensive_check,
+            cugraph_centrality_result_t** result,
+            cugraph_error_t** error
+        )
+
     cdef cugraph_error_code_t \
         cugraph_personalized_pagerank(
             const cugraph_resource_handle_t* handle,
@@ -86,6 +112,24 @@ cdef extern from "cugraph_c/centrality_algorithms.h":
             cugraph_error_t** error
         )
 
+    cdef cugraph_error_code_t \
+        cugraph_personalized_pagerank_allow_nonconvergence(
+            const cugraph_resource_handle_t* handle,
+            cugraph_graph_t* graph,
+            const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices,
+            const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums,
+            const cugraph_type_erased_device_array_view_t* initial_guess_vertices,
+            const cugraph_type_erased_device_array_view_t* initial_guess_values,
+            const cugraph_type_erased_device_array_view_t* personalization_vertices,
+            const cugraph_type_erased_device_array_view_t* personalization_values,
+            double alpha,
+            double epsilon,
+            size_t max_iterations,
+            bool_t do_expensive_check,
+            cugraph_centrality_result_t** result,
+            cugraph_error_t** error
+        )
+
     ###########################################################################
     # eigenvector centrality
     cdef cugraph_error_code_t \
@@ -167,4 +211,4 @@ cdef extern from "cugraph_c/centrality_algorithms.h":
             bool_t do_expensive_check,
             cugraph_centrality_result_t** result,
             cugraph_error_t** error
-        )
\ No newline at end of file
+        )
diff --git a/python/pylibcugraph/pylibcugraph/exceptions.py b/python/pylibcugraph/pylibcugraph/exceptions.py
new file mode 100644
index 00000000000..54b58d840b3
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/exceptions.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Exception classes for pylibcugraph.
+"""
+
+
+class FailedToConvergeError(Exception):
+    """
+    Raised when an algorithm fails to converge within a predetermined set of
+    constraints which vary based on the algorithm, and may or may not be
+    user-configurable.
+    """
+
+    pass
diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx
index 7d8f7807ead..a5022072b4c 100644
--- a/python/pylibcugraph/pylibcugraph/pagerank.pyx
+++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -35,7 +35,8 @@ from pylibcugraph._cugraph_c.graph cimport (
 )
 from pylibcugraph._cugraph_c.centrality_algorithms cimport (
     cugraph_centrality_result_t,
-    cugraph_pagerank,
+    cugraph_pagerank_allow_nonconvergence,
+    cugraph_centrality_result_converged,
     cugraph_centrality_result_get_vertices,
     cugraph_centrality_result_get_values,
     cugraph_centrality_result_free,
@@ -53,6 +54,7 @@ from pylibcugraph.utils cimport (
     get_c_type_from_numpy_type,
     create_cugraph_type_erased_device_array_view_from_py_obj,
 )
+from pylibcugraph.exceptions import FailedToConvergeError
 
 
 def pagerank(ResourceHandle resource_handle,
@@ -64,7 +66,8 @@ def pagerank(ResourceHandle resource_handle,
             double alpha,
             double epsilon,
             size_t max_iterations,
-            bool_t do_expensive_check):
+            bool_t do_expensive_check,
+            fail_on_nonconvergence=True):
     """
     Find the PageRank score for every vertex in a graph by computing an
     approximation of the Pagerank eigenvector using the power method. The
@@ -123,13 +126,29 @@ def pagerank(ResourceHandle resource_handle,
         If True, performs more extensive tests on the inputs to ensure
         validitity, at the expense of increased run time.
 
+    fail_on_nonconvergence : bool (default=True)
+        If the solver does not reach convergence, raise an exception if
+        fail_on_nonconvergence is True. If fail_on_nonconvergence is False,
+        the return value is a tuple of (pagerank, converged) where pagerank is
+        a cudf.DataFrame as described below, and converged is a boolean
+        indicating if the solver converged (True) or not (False).
+
     Returns
     -------
-    A tuple of device arrays, where the first item in the tuple is a device
-    array containing the vertex identifiers, and the second item is a device
-    array containing the pagerank values for the corresponding vertices. For
-    example, the vertex identifier at the ith element of the vertex array has
-    the pagerank value of the ith element in the pagerank array.
+    The return value varies based on the value of the fail_on_nonconvergence
+    paramter.  If fail_on_nonconvergence is True:
+
+       A tuple of device arrays, where the first item in the tuple is a device
+       array containing the vertex identifiers, and the second item is a device
+       array containing the pagerank values for the corresponding vertices. For
+       example, the vertex identifier at the ith element of the vertex array
+       has the pagerank value of the ith element in the pagerank array.
+
+    If fail_on_nonconvergence is False:
+
+       A three-tuple where the first two items are the device arrays described
+       above, and the third is a bool indicating if the solver converged (True)
+       or not (False).
 
     Examples
     --------
@@ -195,30 +214,35 @@ def pagerank(ResourceHandle resource_handle,
     cdef cugraph_centrality_result_t* result_ptr
     cdef cugraph_error_code_t error_code
     cdef cugraph_error_t* error_ptr
+    cdef bool_t converged
+    cdef cugraph_type_erased_device_array_view_t* vertices_ptr
+    cdef cugraph_type_erased_device_array_view_t* pageranks_ptr
+
+    error_code = cugraph_pagerank_allow_nonconvergence(
+        c_resource_handle_ptr,
+        c_graph_ptr,
+        precomputed_vertex_out_weight_vertices_view_ptr,
+        precomputed_vertex_out_weight_sums_view_ptr,
+        initial_guess_vertices_view_ptr,
+        initial_guess_values_view_ptr,
+        alpha,
+        epsilon,
+        max_iterations,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_pagerank_allow_nonconvergence")
+
+    converged = cugraph_centrality_result_converged(result_ptr)
 
-    error_code = cugraph_pagerank(c_resource_handle_ptr,
-                                  c_graph_ptr,
-                                  precomputed_vertex_out_weight_vertices_view_ptr,
-                                  precomputed_vertex_out_weight_sums_view_ptr,
-                                  initial_guess_vertices_view_ptr,
-                                  initial_guess_values_view_ptr,
-                                  alpha,
-                                  epsilon,
-                                  max_iterations,
-                                  do_expensive_check,
-                                  &result_ptr,
-                                  &error_ptr)
-    assert_success(error_code, error_ptr, "cugraph_pagerank")
-
-    # Extract individual device array pointers from result and copy to cupy
-    # arrays for returning.
-    cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \
-        cugraph_centrality_result_get_vertices(result_ptr)
-    cdef cugraph_type_erased_device_array_view_t* pageranks_ptr = \
-        cugraph_centrality_result_get_values(result_ptr)
-
-    cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr)
-    cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr)
+    # Only extract results if necessary
+    if (fail_on_nonconvergence is False) or (converged is True):
+        # Extract individual device array pointers from result and copy to cupy
+        # arrays for returning.
+        vertices_ptr = cugraph_centrality_result_get_vertices(result_ptr)
+        pageranks_ptr = cugraph_centrality_result_get_values(result_ptr)
+        cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr)
+        cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr)
 
     # Free all pointers
     cugraph_centrality_result_free(result_ptr)
@@ -231,4 +255,10 @@ def pagerank(ResourceHandle resource_handle,
     if precomputed_vertex_out_weight_sums is not None:
         cugraph_type_erased_device_array_view_free(precomputed_vertex_out_weight_sums_view_ptr)
 
-    return (cupy_vertices, cupy_pageranks)
+    if fail_on_nonconvergence is False:
+        return (cupy_vertices, cupy_pageranks, bool(converged))
+    else:
+        if converged is True:
+            return (cupy_vertices, cupy_pageranks)
+        else:
+            raise FailedToConvergeError
diff --git a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx
index 89b57f139a1..e60e7fa2c3e 100644
--- a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx
+++ b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -35,7 +35,8 @@ from pylibcugraph._cugraph_c.graph cimport (
 )
 from pylibcugraph._cugraph_c.centrality_algorithms cimport (
     cugraph_centrality_result_t,
-    cugraph_personalized_pagerank,
+    cugraph_personalized_pagerank_allow_nonconvergence,
+    cugraph_centrality_result_converged,
     cugraph_centrality_result_get_vertices,
     cugraph_centrality_result_get_values,
     cugraph_centrality_result_free,
@@ -53,6 +54,7 @@ from pylibcugraph.utils cimport (
     get_c_type_from_numpy_type,
     create_cugraph_type_erased_device_array_view_from_py_obj,
 )
+from pylibcugraph.exceptions import FailedToConvergeError
 
 
 def personalized_pagerank(ResourceHandle resource_handle,
@@ -66,7 +68,8 @@ def personalized_pagerank(ResourceHandle resource_handle,
                           double alpha,
                           double epsilon,
                           size_t max_iterations,
-                          bool_t do_expensive_check):
+                          bool_t do_expensive_check,
+                          fail_on_nonconvergence=True):
     """
     Find the PageRank score for every vertex in a graph by computing an
     approximation of the Pagerank eigenvector using the power method. The
@@ -85,27 +88,21 @@ def personalized_pagerank(ResourceHandle resource_handle,
 
     precomputed_vertex_out_weight_vertices: device array type
         Subset of vertices of graph for precomputed_vertex_out_weight
-        (a performance optimization)
 
     precomputed_vertex_out_weight_sums : device array type
         Corresponding precomputed sum of outgoing vertices weight
-        (a performance optimization)
-    
+
     initial_guess_vertices : device array type
         Subset of vertices of graph for initial guess for pagerank values
-        (a performance optimization)
-    
+
     initial_guess_values : device array type
         Pagerank values for vertices
-        (a performance optimization)
-    
+
     personalization_vertices : device array type
         Subset of vertices of graph for personalization
-        (a performance optimization)
-    
+
     personalization_values : device array type
         Personalization values for vertices
-        (a performance optimization)
 
     alpha : double
         The damping factor alpha represents the probability to follow an
@@ -133,13 +130,29 @@ def personalized_pagerank(ResourceHandle resource_handle,
         If True, performs more extensive tests on the inputs to ensure
         validitity, at the expense of increased run time.
 
+    fail_on_nonconvergence : bool (default=True)
+        If the solver does not reach convergence, raise an exception if
+        fail_on_nonconvergence is True. If fail_on_nonconvergence is False,
+        the return value is a tuple of (pagerank, converged) where pagerank is
+        a cudf.DataFrame as described below, and converged is a boolean
+        indicating if the solver converged (True) or not (False).
+
     Returns
     -------
-    A tuple of device arrays, where the first item in the tuple is a device
-    array containing the vertex identifiers, and the second item is a device
-    array containing the pagerank values for the corresponding vertices. For
-    example, the vertex identifier at the ith element of the vertex array has
-    the pagerank value of the ith element in the pagerank array.
+    The return value varies based on the value of the fail_on_nonconvergence
+    paramter.  If fail_on_nonconvergence is True:
+
+       A tuple of device arrays, where the first item in the tuple is a device
+       array containing the vertex identifiers, and the second item is a device
+       array containing the pagerank values for the corresponding vertices. For
+       example, the vertex identifier at the ith element of the vertex array has
+       the pagerank value of the ith element in the pagerank array.
+
+    If fail_on_nonconvergence is False:
+
+       A three-tuple where the first two items are the device arrays described
+       above, and the third is a bool indicating if the solver converged (True)
+       or not (False).
 
     Examples
     --------
@@ -207,12 +220,12 @@ def personalized_pagerank(ResourceHandle resource_handle,
         precomputed_vertex_out_weight_sums_view_ptr = \
             create_cugraph_type_erased_device_array_view_from_py_obj(
                 precomputed_vertex_out_weight_sums)
-    
+
     cdef cugraph_type_erased_device_array_view_t* \
         personalization_vertices_view_ptr = \
             create_cugraph_type_erased_device_array_view_from_py_obj(
                 personalization_vertices)
-    
+
     cdef cugraph_type_erased_device_array_view_t* \
         personalization_values_view_ptr = \
             create_cugraph_type_erased_device_array_view_from_py_obj(
@@ -221,32 +234,38 @@ def personalized_pagerank(ResourceHandle resource_handle,
     cdef cugraph_centrality_result_t* result_ptr
     cdef cugraph_error_code_t error_code
     cdef cugraph_error_t* error_ptr
+    cdef bool_t converged
+    cdef cugraph_type_erased_device_array_view_t* vertices_ptr
+    cdef cugraph_type_erased_device_array_view_t* pageranks_ptr
+
+    error_code = cugraph_personalized_pagerank_allow_nonconvergence(
+        c_resource_handle_ptr,
+        c_graph_ptr,
+        precomputed_vertex_out_weight_vertices_view_ptr,
+        precomputed_vertex_out_weight_sums_view_ptr,
+        initial_guess_vertices_view_ptr,
+        initial_guess_values_view_ptr,
+        personalization_vertices_view_ptr,
+        personalization_values_view_ptr,
+        alpha,
+        epsilon,
+        max_iterations,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(
+        error_code, error_ptr, "cugraph_personalized_pagerank_allow_nonconvergence")
+
+    converged = cugraph_centrality_result_converged(result_ptr)
 
-    error_code = cugraph_personalized_pagerank(c_resource_handle_ptr,
-                                               c_graph_ptr,
-                                               precomputed_vertex_out_weight_vertices_view_ptr,
-                                               precomputed_vertex_out_weight_sums_view_ptr,
-                                               initial_guess_vertices_view_ptr,
-                                               initial_guess_values_view_ptr,
-                                               personalization_vertices_view_ptr,
-                                               personalization_values_view_ptr,
-                                               alpha,
-                                               epsilon,
-                                               max_iterations,
-                                               do_expensive_check,
-                                               &result_ptr,
-                                               &error_ptr)
-    assert_success(error_code, error_ptr, "cugraph_personalized_pagerank")
-
-    # Extract individual device array pointers from result and copy to cupy
-    # arrays for returning.
-    cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \
-        cugraph_centrality_result_get_vertices(result_ptr)
-    cdef cugraph_type_erased_device_array_view_t* pageranks_ptr = \
-        cugraph_centrality_result_get_values(result_ptr)
-
-    cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr)
-    cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr)
+    # Only extract results if necessary
+    if (fail_on_nonconvergence is False) or (converged is True):
+        # Extract individual device array pointers from result and copy to cupy
+        # arrays for returning.
+        vertices_ptr = cugraph_centrality_result_get_vertices(result_ptr)
+        pageranks_ptr = cugraph_centrality_result_get_values(result_ptr)
+        cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr)
+        cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr)
 
     # Free all pointers
     cugraph_centrality_result_free(result_ptr)
@@ -263,4 +282,10 @@ def personalized_pagerank(ResourceHandle resource_handle,
     if personalization_values is not None:
         cugraph_type_erased_device_array_view_free(personalization_values_view_ptr)
 
-    return (cupy_vertices, cupy_pageranks)
+    if fail_on_nonconvergence is False:
+        return (cupy_vertices, cupy_pageranks, bool(converged))
+    else:
+        if converged is True:
+            return (cupy_vertices, cupy_pageranks)
+        else:
+            raise FailedToConvergeError