From bb1d078f3053f8c259b60da73c83b58e4d0799f0 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 6 Feb 2024 14:56:42 +0100
Subject: [PATCH 01/45] Removing Python from build to get CI passing (#31)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Ben Frederickson (https://github.com/benfred)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cuvs/pull/31
---
 README.md                                     | 40 ++++++++++-
 build.sh                                      | 10 +--
 ci/build_docs.sh                              |  8 +--
 ci/build_python.sh                            | 49 ++++---------
 ci/build_wheel_cuvs.sh                        |  4 +-
 ci/test_python.sh                             | 34 +++++-----
 ci/test_wheel_cuvs.sh                         | 24 +++----
 ..._template.sh => build_libcuvs_examples.sh} |  4 +-
 conda/recipes/libcuvs/meta.yaml               |  6 +-
 cpp/{template => examples}/CMakeLists.txt     |  0
 cpp/{template => examples}/README.md          |  8 +--
 cpp/{template => examples}/build.sh           |  2 +-
 .../cmake/thirdparty/fetch_rapids.cmake       |  2 +-
 .../cmake/thirdparty/get_cuvs.cmake           |  0
 .../src/cagra_example.cu                      |  0
 cpp/{template => examples}/src/common.cuh     |  2 +-
 cpp/include/cuvs/neighbors/cagra_c.h          | 68 +++++++++----------
 cpp/src/neighbors/cagra_c.cpp                 | 50 +++++++-------
 cpp/test/CMakeLists.txt                       |  2 +-
 cpp/test/neighbors/ann_cagra_c.cu             | 10 +--
 cpp/test/neighbors/c_api.c                    |  6 +-
 docs/source/index.rst                         | 10 +--
 22 files changed, 174 insertions(+), 165 deletions(-)
 rename conda/recipes/libcuvs/{build_libcuvs_template.sh => build_libcuvs_examples.sh} (61%)
 rename cpp/{template => examples}/CMakeLists.txt (100%)
 rename cpp/{template => examples}/README.md (82%)
 rename cpp/{template => examples}/build.sh (93%)
 rename cpp/{template => examples}/cmake/thirdparty/fetch_rapids.cmake (95%)
 rename cpp/{template => examples}/cmake/thirdparty/get_cuvs.cmake (100%)
 rename cpp/{template => examples}/src/cagra_example.cu (100%)
 rename cpp/{template => examples}/src/common.cuh (98%)
diff --git a/README.md b/README.md
index e86eafda2..428ab04a9 100755
--- a/README.md
+++ b/README.md
@@ -1,2 +1,40 @@
-# <div align="left"><img src="https://rapids.ai/assets/images/rapids_logo.png" width="90px"/>&nbsp;cuVS: Vector Search on the GPU</div>
+# <div align="left"><img src="https://rapids.ai/assets/images/rapids_logo.png" width="90px"/>&nbsp;cuVS: Vector Search and Clustering on the GPU</div>
+
+### NOTE: cuVS is currently being 
+
+## Contents
+<hr>
+
+1. [Useful Resources](#useful-resources)
+2. [What is cuVS?](#what-is-cuvs)
+3. [Getting Started](#getting-started)
+4. [Installing cuVS](#installing)
+5. [Contributing](#contributing)
+6. [References](#references)
+
+<hr>
+
+## Useful Resources
+
+- [cuVS Reference Documentation](https://docs.rapids.ai/api/cuvs/stable/): API Documentation.
+- [cuVS Getting Started](./docs/source/quick_start.md): Getting started with RAFT.
+- [Build and Install cuVS](./docs/source/build.md): Instructions for installing and building cuVS.
+- [Example Notebooks](./notebooks): Example jupyer notebooks
+- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
+- [GitHub repository](https://github.com/rapidsai/cuvs): Download the cuVS source code.
+- [Issue tracker](https://github.com/rapidsai/cuvs/issues): Report issues or request features.
+
+## What is cuVS?
+
+cuVS contains many algorithms for running approximate nearest neighbors and clustering on the GPU.
+
+## Getting Started
+
+
+
+## Installing cuVS
+
+## Contributing
+
+## References
 
diff --git a/build.sh b/build.sh
index 6dd250c51..0d035171e 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcuvs python docs tests template clean --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libcuvs python docs tests examples clean --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
 HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -27,7 +27,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    python           - build the cuvs Python package
    docs             - build the documentation
    tests            - build the tests
-   template         - build the example CUVS application template
+   examples         - build the examples
 
  and <flag> is:
    -v                          - verbose build mode
@@ -433,10 +433,10 @@ if hasArg docs; then
 fi
 
 ################################################################################
-# Initiate build for example CUVS application template (if needed)
+# Initiate build for c++ examples (if needed)
 
-if hasArg template; then
-    pushd ${REPODIR}/cpp/template
+if hasArg examples; then
+    pushd ${REPODIR}/cpp/examples
     ./build.sh
     popd
 fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 1193285da..0706b1fca 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -18,15 +18,11 @@ rapids-print-env
 
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+#PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  --channel "${PYTHON_CHANNEL}" \
-  libcuvs \
-  libcuvs-headers \
-  cuvs \
-  raft-dask
+  libcuvs
 
 export RAPIDS_VERSION_NUMBER="24.02"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
diff --git a/ci/build_python.sh b/ci/build_python.sh
index a54c3dcaa..2f661c853 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -15,43 +15,22 @@ rapids-print-env
 
 rapids-logger "Begin py build"
 
-CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+#CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
-version=$(rapids-generate-version)
-git_commit=$(git rev-parse HEAD)
-export RAPIDS_PACKAGE_VERSION=${version} 
-echo "${version}" > VERSION
+#version=$(rapids-generate-version)
+#git_commit=$(git rev-parse HEAD)
+#export RAPIDS_PACKAGE_VERSION=${version}
+#echo "${version}" > VERSION
 
-package_dir="python"
-for package_name in cuvs raft-dask; do
-  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
-  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
-done
+#package_dir="python"
+#for package_name in cuvs; do
+#  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
+#  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
+#done
 
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
-rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  conda/recipes/cuvs
-
-
-# Build ann-bench for each cuda and python version
-rapids-conda-retry mambabuild \
---no-test \
---channel "${CPP_CHANNEL}" \
---channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-conda/recipes/cuda-ann-bench
-
-# Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python
-# version
-RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
-if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
-  rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/cuda-ann-bench-cpu
-fi
-
-rapids-upload-conda-to-s3 python
+#rapids-conda-retry mambabuild \
+#  --no-test \
+#  --channel "${CPP_CHANNEL}" \
+#  conda/recipes/cuvs
diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh
index 9d2f96996..b4765be38 100755
--- a/ci/build_wheel_cuvs.sh
+++ b/ci/build_wheel_cuvs.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 # Set up skbuild options. Enable sccache in skbuild config options
 export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
 
-ci/build_wheel.sh cuvs python/cuvs
+#ci/build_wheel.sh cuvs python/cuvs
diff --git a/ci/test_python.sh b/ci/test_python.sh
index a65469928..e70c4555d 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -20,7 +20,7 @@ set -u
 
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+#PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
@@ -28,10 +28,10 @@ mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 
 rapids-print-env
 
-rapids-mamba-retry install \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${PYTHON_CHANNEL}" \
-  libcuvs cuvs
+#rapids-mamba-retry install \
+#  --channel "${CPP_CHANNEL}" \
+##  --channel "${PYTHON_CHANNEL}" \
+#  libcuvs #cuvs
 
 rapids-logger "Check GPU usage"
 nvidia-smi
@@ -40,17 +40,17 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-rapids-logger "pytest cuvs"
-pushd python/cuvs/cuvs
-pytest \
-  --cache-clear \
-  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
-  --cov-config=../.coveragerc \
-  --cov=cuvs \
-  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
-  --cov-report=term \
-  test
-popd
+#rapids-logger "pytest cuvs"
+#pushd python/cuvs/cuvs
+#pytest \
+#  --cache-clear \
+#  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
+#  --cov-config=../.coveragerc \
+#  --cov=cuvs \
+#  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
+#  --cov-report=term \
+#  test
+#popd
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh
index 6b213d399..52cfa7ae1 100755
--- a/ci/test_wheel_cuvs.sh
+++ b/ci/test_wheel_cuvs.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 mkdir -p ./dist
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+#RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+#RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 
-# echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/cuvs*.whl)[test]
-
-# Run smoke tests for aarch64 pull requests
-if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
-    python ./ci/wheel_smoke_test_cuvs.py
-else
-    python -m pytest ./python/cuvs/cuvs/test
-fi
+## echo to expand wildcard before adding `[extra]` requires for pip
+#python -m pip install $(echo ./dist/cuvs*.whl)[test]
+#
+## Run smoke tests for aarch64 pull requests
+#if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
+#    python ./ci/wheel_smoke_test_cuvs.py
+#else
+#    python -m pytest ./python/cuvs/cuvs/test
+#fi
diff --git a/conda/recipes/libcuvs/build_libcuvs_template.sh b/conda/recipes/libcuvs/build_libcuvs_examples.sh
similarity index 61%
rename from conda/recipes/libcuvs/build_libcuvs_template.sh
rename to conda/recipes/libcuvs/build_libcuvs_examples.sh
index bd7719af7..6286a530e 100644
--- a/conda/recipes/libcuvs/build_libcuvs_template.sh
+++ b/conda/recipes/libcuvs/build_libcuvs_examples.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 # Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template
+./build.sh examples
diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml
index 4e437f3e6..4b6ff87e9 100644
--- a/conda/recipes/libcuvs/meta.yaml
+++ b/conda/recipes/libcuvs/meta.yaml
@@ -195,9 +195,9 @@ outputs:
       home: https://rapids.ai/
       license: Apache-2.0
       summary: libcuvs tests
-  - name: libcuvs-template
+  - name: libcuvs-examples
     version: {{ version }}
-    script: build_libcuvs_template.sh
+    script: build_libcuvs_examples.sh
     build:
       script_env: *script_env
       number: {{ GIT_DESCRIBE_NUMBER }}
@@ -241,4 +241,4 @@ outputs:
     about:
       home: https://rapids.ai/
       license: Apache-2.0
-      summary: libcuvs template
+      summary: libcuvs examples
diff --git a/cpp/template/CMakeLists.txt b/cpp/examples/CMakeLists.txt
similarity index 100%
rename from cpp/template/CMakeLists.txt
rename to cpp/examples/CMakeLists.txt
diff --git a/cpp/template/README.md b/cpp/examples/README.md
similarity index 82%
rename from cpp/template/README.md
rename to cpp/examples/README.md
index 5393c0229..125c6dba2 100644
--- a/cpp/template/README.md
+++ b/cpp/examples/README.md
@@ -1,14 +1,14 @@
-# Example CUVS Project Template
+# cuVS C++ Examples
 
 This template project provides a drop-in sample to either start building a new application with, or using CUVS in an existing CMake project. 
 
-First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use CUVS.
+First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use cuVS.
 
 Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow.  
 
-This directory (`CUVS_SOURCE/cpp/template`) can be copied directly in order to build a new application with CUVS.
+This directory (`CUVS_SOURCE/cpp/examples`) can be copied directly in order to build a new application with CUVS.
 
-CUVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
+cuVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
 
 Make sure to link against the appropriate Cmake targets. Use `cuvs::cuvs` to utilize the shared library.
 
diff --git a/cpp/template/build.sh b/cpp/examples/build.sh
similarity index 93%
rename from cpp/template/build.sh
rename to cpp/examples/build.sh
index 25ccb3461..7a948d9a8 100755
--- a/cpp/template/build.sh
+++ b/cpp/examples/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 # cuvs empty project template build script
 
diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
similarity index 95%
rename from cpp/template/cmake/thirdparty/fetch_rapids.cmake
rename to cpp/examples/cmake/thirdparty/fetch_rapids.cmake
index 15b6c43a6..4da917e26 100644
--- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake
+++ b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/cpp/template/cmake/thirdparty/get_cuvs.cmake b/cpp/examples/cmake/thirdparty/get_cuvs.cmake
similarity index 100%
rename from cpp/template/cmake/thirdparty/get_cuvs.cmake
rename to cpp/examples/cmake/thirdparty/get_cuvs.cmake
diff --git a/cpp/template/src/cagra_example.cu b/cpp/examples/src/cagra_example.cu
similarity index 100%
rename from cpp/template/src/cagra_example.cu
rename to cpp/examples/src/cagra_example.cu
diff --git a/cpp/template/src/common.cuh b/cpp/examples/src/common.cuh
similarity index 98%
rename from cpp/template/src/common.cuh
rename to cpp/examples/src/common.cuh
index 0b72d3bf3..757123cea 100644
--- a/cpp/template/src/common.cuh
+++ b/cpp/examples/src/common.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/cuvs/neighbors/cagra_c.h b/cpp/include/cuvs/neighbors/cagra_c.h
index 59861b502..c50d7475b 100644
--- a/cpp/include/cuvs/neighbors/cagra_c.h
+++ b/cpp/include/cuvs/neighbors/cagra_c.h
@@ -33,7 +33,7 @@ extern "C" {
  * @brief Enum to denote which ANN algorithm is used to build CAGRA graph
  *
  */
-enum cagraGraphBuildAlgo {
+enum cuvsCagraGraphBuildAlgo {
   /* Use IVF-PQ to build all-neighbors knn graph */
   IVF_PQ,
   /* Experimental, use NN-Descent to build all-neighbors knn graph */
@@ -44,18 +44,18 @@ enum cagraGraphBuildAlgo {
  * @brief Supplemental parameters to build CAGRA Index
  *
  */
-struct cagraIndexParams {
+struct cuvsCagraIndexParams {
   /** Degree of input graph for pruning. */
   size_t intermediate_graph_degree;
   /** Degree of output graph. */
   size_t graph_degree;
   /** ANN algorithm to build knn graph. */
-  enum cagraGraphBuildAlgo build_algo;
+  enum cuvsCagraGraphBuildAlgo build_algo;
   /** Number of Iterations to run if building with NN_DESCENT */
   size_t nn_descent_niter;
 };
 
-typedef struct cagraIndexParams* cuvsCagraIndexParams_t;
+typedef struct cuvsCagraIndexParams* cuvsCagraIndexParams_t;
 
 /**
  * @brief Allocate CAGRA Index params, and populate with default values
@@ -77,7 +77,7 @@ cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index);
  * @brief Enum to denote algorithm used to search CAGRA Index
  *
  */
-enum cagraSearchAlgo {
+enum cuvsCagraSearchAlgo {
   /** For large batch sizes. */
   SINGLE_CTA,
   /** For small batch sizes. */
@@ -90,13 +90,13 @@ enum cagraSearchAlgo {
  * @brief Enum to denote Hash Mode used while searching CAGRA index
  *
  */
-enum cagraHashMode { HASH, SMALL, AUTO_HASH };
+enum cuvsCagraHashMode { HASH, SMALL, AUTO_HASH };
 
 /**
  * @brief Supplemental parameters to search CAGRA index
  *
  */
-struct cagraSearchParams {
+struct cuvsCagraSearchParams {
   /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/
   size_t max_queries;
 
@@ -114,7 +114,7 @@ struct cagraSearchParams {
   // Reasonable default values are automatically chosen.
 
   /** Which search implementation to use. */
-  enum cagraSearchAlgo algo;
+  enum cuvsCagraSearchAlgo algo;
 
   /** Number of threads used to calculate a single distance. 4, 8, 16, or 32. */
   size_t team_size;
@@ -128,7 +128,7 @@ struct cagraSearchParams {
   /** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */
   size_t thread_block_size;
   /** Hashmap type. Auto selection when AUTO. */
-  enum cagraHashMode hashmap_mode;
+  enum cuvsCagraHashMode hashmap_mode;
   /** Lower limit of hashmap bit length. More than 8. */
   size_t hashmap_min_bitlen;
   /** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/
@@ -140,7 +140,7 @@ struct cagraSearchParams {
   uint64_t rand_xor_mask;
 };
 
-typedef struct cagraSearchParams* cuvsCagraSearchParams_t;
+typedef struct cuvsCagraSearchParams* cuvsCagraSearchParams_t;
 
 /**
  * @brief Allocate CAGRA search params, and populate with default values
@@ -166,24 +166,24 @@ typedef struct {
   uintptr_t addr;
   DLDataType dtype;
 
-} cagraIndex;
+} cuvsCagraIndex;
 
-typedef cagraIndex* cagraIndex_t;
+typedef cuvsCagraIndex* cuvsCagraIndex_t;
 
 /**
  * @brief Allocate CAGRA index
  *
- * @param[in] index cagraIndex_t to allocate
+ * @param[in] index cuvsCagraIndex_t to allocate
  * @return cagraError_t
  */
-cuvsError_t cagraIndexCreate(cagraIndex_t* index);
+cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
 
 /**
  * @brief De-allocate CAGRA index
  *
- * @param[in] index cagraIndex_t to de-allocate
+ * @param[in] index cuvsCagraIndex_t to de-allocate
  */
-cuvsError_t cagraIndexDestroy(cagraIndex_t index);
+cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
 
 /**
  * @brief Build a CAGRA index with a `DLManagedTensor` which has underlying
@@ -209,28 +209,28 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index);
  * cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(&params);
  *
  * // Create CAGRA index
- * cagraIndex_t index;
- * cuvsError_t index_create_status = cagraIndexCreate(&index);
+ * cuvsCagraIndex_t index;
+ * cuvsError_t index_create_status = cuvsCagraIndexCreate(&index);
  *
  * // Build the CAGRA Index
- * cuvsError_t build_status = cagraBuild(res, params, &dataset, index);
+ * cuvsError_t build_status = cuvsCagraBuild(res, params, &dataset, index);
  *
  * // de-allocate `params`, `index` and `res`
  * cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params);
- * cuvsError_t index_destroy_status = cagraIndexDestroy(index);
+ * cuvsError_t index_destroy_status = cuvsCagraIndexDestroy(index);
  * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
  * @endcode
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsCagraIndexParams_t used to build CAGRA index
  * @param[in] dataset DLManagedTensor* training dataset
- * @param[out] index cagraIndex_t Newly built CAGRA index
+ * @param[out] index cuvsCagraIndex_t Newly built CAGRA index
  * @return cuvsError_t
  */
-cuvsError_t cagraBuild(cuvsResources_t res,
-                       cuvsCagraIndexParams_t params,
-                       DLManagedTensor* dataset,
-                       cagraIndex_t index);
+cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                           cuvsCagraIndexParams_t params,
+                           DLManagedTensor* dataset,
+                           cuvsCagraIndex_t index);
 
 /**
  * @brief Search a CAGRA index with a `DLManagedTensor` which has underlying
@@ -259,8 +259,8 @@ cuvsError_t cagraBuild(cuvsResources_t res,
  * cuvsCagraSearchParams_t params;
  * cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(&params);
  *
- * // Search the `index` built using `cagraBuild`
- * cuvsError_t search_status = cagraSearch(res, params, index, queries, neighbors, distances);
+ * // Search the `index` built using `cuvsCagraBuild`
+ * cuvsError_t search_status = cuvsCagraSearch(res, params, index, queries, neighbors, distances);
  *
  * // de-allocate `params` and `res`
  * cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params);
@@ -269,17 +269,17 @@ cuvsError_t cagraBuild(cuvsResources_t res,
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsCagraSearchParams_t used to search CAGRA index
- * @param[in] index cagraIndex which has been returned by `cagraBuild`
+ * @param[in] index cuvsCagraIndex which has been returned by `cuvsCagraBuild`
  * @param[in] queries DLManagedTensor* queries dataset to search
  * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
  * @param[out] distances DLManagedTensor* output `k` distances for queries
  */
-cuvsError_t cagraSearch(cuvsResources_t res,
-                        cuvsCagraSearchParams_t params,
-                        cagraIndex_t index,
-                        DLManagedTensor* queries,
-                        DLManagedTensor* neighbors,
-                        DLManagedTensor* distances);
+cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                            cuvsCagraSearchParams_t params,
+                            cuvsCagraIndex_t index,
+                            DLManagedTensor* queries,
+                            DLManagedTensor* neighbors,
+                            DLManagedTensor* distances);
 
 #ifdef __cplusplus
 }
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index 638c9a23d..2a9de37f1 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -30,7 +30,7 @@
 namespace {
 
 template <typename T>
-void* _build(cuvsResources_t res, cagraIndexParams params, DLManagedTensor* dataset_tensor)
+void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* dataset_tensor)
 {
   auto dataset = dataset_tensor->dl_tensor;
 
@@ -59,8 +59,8 @@ void* _build(cuvsResources_t res, cagraIndexParams params, DLManagedTensor* data
 
 template <typename T>
 void _search(cuvsResources_t res,
-             cagraSearchParams params,
-             cagraIndex index,
+             cuvsCagraSearchParams params,
+             cuvsCagraIndex index,
              DLManagedTensor* queries_tensor,
              DLManagedTensor* neighbors_tensor,
              DLManagedTensor* distances_tensor)
@@ -95,17 +95,17 @@ void _search(cuvsResources_t res,
 
 }  // namespace
 
-extern "C" cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+extern "C" cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index)
 {
   try {
-    *index = new cagraIndex{};
+    *index = new cuvsCagraIndex{};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
   }
 }
 
-extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr)
+extern "C" cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index_c_ptr)
 {
   try {
     auto index = *index_c_ptr;
@@ -130,10 +130,10 @@ extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr)
   }
 }
 
-extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
-                                  cuvsCagraIndexParams_t params,
-                                  DLManagedTensor* dataset_tensor,
-                                  cagraIndex_t index)
+extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                                      cuvsCagraIndexParams_t params,
+                                      DLManagedTensor* dataset_tensor,
+                                      cuvsCagraIndex_t index)
 {
   try {
     auto dataset = dataset_tensor->dl_tensor;
@@ -158,12 +158,12 @@ extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
   }
 }
 
-extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
-                                   cuvsCagraSearchParams_t params,
-                                   cagraIndex_t index_c_ptr,
-                                   DLManagedTensor* queries_tensor,
-                                   DLManagedTensor* neighbors_tensor,
-                                   DLManagedTensor* distances_tensor)
+extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                                       cuvsCagraSearchParams_t params,
+                                       cuvsCagraIndex_t index_c_ptr,
+                                       DLManagedTensor* queries_tensor,
+                                       DLManagedTensor* neighbors_tensor,
+                                       DLManagedTensor* distances_tensor)
 {
   try {
     auto queries   = queries_tensor->dl_tensor;
@@ -205,10 +205,10 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
 extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
 {
   try {
-    *params = new cagraIndexParams{.intermediate_graph_degree = 128,
-                                   .graph_degree              = 64,
-                                   .build_algo                = IVF_PQ,
-                                   .nn_descent_niter          = 20};
+    *params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128,
+                                       .graph_degree              = 64,
+                                       .build_algo                = IVF_PQ,
+                                       .nn_descent_niter          = 20};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
@@ -228,11 +228,11 @@ extern "C" cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params
 extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params)
 {
   try {
-    *params = new cagraSearchParams{.itopk_size            = 64,
-                                    .search_width          = 1,
-                                    .hashmap_max_fill_rate = 0.5,
-                                    .num_random_samplings  = 1,
-                                    .rand_xor_mask         = 0x128394};
+    *params = new cuvsCagraSearchParams{.itopk_size            = 64,
+                                        .search_width          = 1,
+                                        .hashmap_max_fill_rate = 0.5,
+                                        .num_random_samplings  = 1,
+                                        .rand_xor_mask         = 0x128394};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 609bc2d4e..f33c14179 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -57,7 +57,7 @@ function(ConfigureTest)
   )
   set_target_properties(
     ${TEST_NAME}
-    PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/gtests>"
+    PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUVS_BINARY_DIR}/gtests>"
                INSTALL_RPATH "\$ORIGIN/../../../lib"
                CXX_STANDARD 17
                CXX_STANDARD_REQUIRED ON
diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu
index 4870ac3b8..a1c496eaa 100644
--- a/cpp/test/neighbors/ann_cagra_c.cu
+++ b/cpp/test/neighbors/ann_cagra_c.cu
@@ -56,13 +56,13 @@ TEST(CagraC, BuildSearch)
   dataset_tensor.dl_tensor.strides            = nullptr;
 
   // create index
-  cagraIndex_t index;
-  cagraIndexCreate(&index);
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
 
   // build index
   cuvsCagraIndexParams_t build_params;
   cuvsCagraIndexParamsCreate(&build_params);
-  cagraBuild(res, build_params, &dataset_tensor, index);
+  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
 
   // create queries DLTensor
   float* queries_d;
@@ -113,7 +113,7 @@ TEST(CagraC, BuildSearch)
   // search index
   cuvsCagraSearchParams_t search_params;
   cuvsCagraSearchParamsCreate(&search_params);
-  cagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
+  cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
 
   // verify output
   ASSERT_TRUE(cuvs::devArrMatchHost(neighbors_exp, neighbors_d, 4, cuvs::Compare<uint32_t>()));
@@ -128,6 +128,6 @@ TEST(CagraC, BuildSearch)
   // de-allocate index and res
   cuvsCagraSearchParamsDestroy(search_params);
   cuvsCagraIndexParamsDestroy(build_params);
-  cagraIndexDestroy(index);
+  cuvsCagraIndexDestroy(index);
   cuvsResourcesDestroy(res);
 }
diff --git a/cpp/test/neighbors/c_api.c b/cpp/test/neighbors/c_api.c
index d4f5ad08e..fa1727c51 100644
--- a/cpp/test/neighbors/c_api.c
+++ b/cpp/test/neighbors/c_api.c
@@ -24,8 +24,8 @@ int main()
   // simple smoke test to make sure that we can compile the cagra_c.h API
   // using a c compiler. This isn't aiming to be a full test, just checking
   // that the exposed C-API is valid C code and doesn't contain C++ features
-  cagraIndex_t index;
-  cagraIndexCreate(&index);
-  cagraIndexDestroy(index);
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+  cuvsCagraIndexDestroy(index);
   return 0;
 }
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a161efb42..bf9790610 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,9 +1,5 @@
-cuVS: GPU-accelerated Vector Search
-===================================
-
-.. image:: ../../img/raft-tech-stack-vss.png
-  :width: 800
-  :alt: cuVS Tech Stack
+cuVS: Vector Search and Clustering on the GPU
+=============================================
 
 Useful Resources
 ################
@@ -19,7 +15,7 @@ Useful Resources
 What is cuVS?
 #############
 
-cuVS is a library for vector search on the GPU.
+cuVS is a library for vector search and clustering on the GPU.
 
 .. toctree::
    :maxdepth: 1

From fb08679d19066fe05bf0933d400c81ab61e1d089 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 7 Feb 2024 21:42:08 +0100
Subject: [PATCH 02/45] Fixing googletests (#32)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cuvs/pull/32
---
 README.md                                     | 150 ++++++++++++++++--
 ci/build_wheel_cuvs.sh                        |   2 +-
 ci/release/update-version.sh                  |   6 +-
 cpp/CMakeLists.txt                            |   4 +-
 .../cuvs/neighbors/{cagra_c.h => cagra.h}     |   4 +-
 cpp/src/neighbors/cagra_c.cpp                 |   2 +-
 cpp/test/neighbors/ann_cagra_c.cu             |   2 +-
 cpp/test/neighbors/c_api.c                    |   4 +-
 docs/source/contributing.md                   |   8 +-
 docs/source/cpp_api/core_interop.rst          |   6 +-
 fetch_rapids.cmake                            |   8 +-
 11 files changed, 164 insertions(+), 32 deletions(-)
 rename cpp/include/cuvs/neighbors/{cagra_c.h => cagra.h} (99%)

diff --git a/README.md b/README.md
index 428ab04a9..e213de39a 100755
--- a/README.md
+++ b/README.md
@@ -1,19 +1,14 @@
 # <div align="left"><img src="https://rapids.ai/assets/images/rapids_logo.png" width="90px"/>&nbsp;cuVS: Vector Search and Clustering on the GPU</div>
 
-### NOTE: cuVS is currently being 
-
 ## Contents
-<hr>
 
 1. [Useful Resources](#useful-resources)
 2. [What is cuVS?](#what-is-cuvs)
-3. [Getting Started](#getting-started)
-4. [Installing cuVS](#installing)
+3. [Installing cuVS](#installing)
+4. [Getting Started](#getting-started)
 5. [Contributing](#contributing)
 6. [References](#references)
 
-<hr>
-
 ## Useful Resources
 
 - [cuVS Reference Documentation](https://docs.rapids.ai/api/cuvs/stable/): API Documentation.
@@ -26,15 +21,152 @@
 
 ## What is cuVS?
 
-cuVS contains many algorithms for running approximate nearest neighbors and clustering on the GPU.
+cuVS contains state-of-the-art implementations of several algorithms for running approximate nearest neighbors and clustering on the GPU. It can be used directly or through the various databases and other libraries that have integrated it. The primary goal of cuVS is to simplify the use of GPUs for vector similarity search and clustering.
+
+**Please note** that cuVS is a new library mostly derived from the approximate nearest neighbors and clustering algorithms in the [RAPIDS RAFT](https://github.com/rapidsai) library of data mining primitives. RAPIDS RAFT currently contains the most fully-featured versions of the approximate nearest neighbors and clustering algorithms in cuVS. We are in the process of migrating the algorithms from RAFT to cuVS, but if you are unsure of which to use, please consider the following:
+1. RAFT contains C++ and Python APIs for all of the approximate nearest neighbors and clustering algorithms. 
+2. cuVS contains a growing support for different languages, including C, C++, Python, and Rust. We will be adding more language support to cuVS in the future but will not be improving the language support for RAFT.
+3. Once all of RAFT's approximate nearest neighbors and clustering algorithms are moved to cuVS, the RAFT APIs will be deprecated and eventually removed altogether. Once removed, RAFT will become a lightweight header-only library. In the meantime, there's no harm in using RAFT if support for additional languages is not needed.
+
+## Installing cuVS
+
+cuVS comes with pre-built packages that can be installed through [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python). Different packages are available for the different languages supported by cuVS:
+
+| Python | C++ | C | Rust |
+|--------|-----|---|------|
+| `pycuvs`| `libcuvs` | `libcuvs_c` | `cuvs-rs` |
+
+### Stable release
+
+It is recommended to use [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html) to install the desired packages. The following command will install the Python package. You can substitute `pycuvs` for any of the packages in the table above:
+```bash
+mamba install -c conda-forge -c nvidia -c rapidsai pycuvs
+```
+
+### Nightlies
+If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
+```bash
+mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.02*
+```
+
+Please see the [Build and Install Guide](docs/source/build.md) for more information on installing cuVS and building from source.
 
 ## Getting Started
 
+The following code snippets train an approximate nearest neighbors index for the CAGRA algorithm. 
 
+### Python API
+
+```python
+from cuvs.neighbors import cagra
+
+dataset = load_data()
+index_params = cagra.IndexParams()
+
+index = cagra.build_index(build_params, dataset)
+```
+
+### C++ API
+
+```c++
+#include <cuvs/neighbors/cagra.hpp>
+
+using namespace cuvs::neighbors;
+
+raft::device_matrix_view<float> dataset = load_dataset();
+raft::device_resources res;
+
+cagra::index_params index_params;
+
+auto index = cagra::build(res, index_params, dataset);
+```
+
+For more example of the C++ APIs, refer to [cpp/examples](https://github.com/rapidsai/cuvs/tree/HEAD/cpp/examples) directory in the codebase.
+
+### C API
+
+```c
+#include <cuvs/neighbors/cagra.h>
+
+cuvsResources_t res;
+cuvsCagraIndexParams_t index_params;
+cuvsCagraIndex_t index;
+
+DLManagedTensor *dataset;
+load_dataset(dataset);
+
+cuvsResourcesCreate(&res);
+cuvsCagraIndexParamsCreate(&index_params);
+cuvsCagraIndexCreate(&index);
+
+cuvsCagraBuild(res, index_params, dataset, index);
+
+cuvsCagraIndexDestroy(index);
+cuvsCagraIndexParamsDestroy(index_params);
+cuvsResourcesDestroy(res);
+```
 
-## Installing cuVS
 
 ## Contributing
 
+If you are interested in contributing to the cuVS library, please read our [Contributing guidelines](docs/source/contributing.md). Refer to the [Developer Guide](docs/source/developer_guide.md) for details on the developer guidelines, workflows, and principals.
+
 ## References
 
+When citing cuVS generally, please consider referencing this Github repository.
+```bibtex
+@misc{rapidsai,
+  title={Rapidsai/cuVS: Vector Search and Clustering on the GPU.},
+  url={https://github.com/rapidsai/cuvs},
+  journal={GitHub},
+  publisher={Nvidia RAPIDS},
+  author={Rapidsai},
+  year={2024}
+}
+```
+
+If citing CAGRA, please consider the following bibtex:
+```bibtex
+@misc{ootomo2023cagra,
+      title={CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search for GPUs},
+      author={Hiroyuki Ootomo and Akira Naruse and Corey Nolet and Ray Wang and Tamas Feher and Yong Wang},
+      year={2023},
+      eprint={2308.15136},
+      archivePrefix={arXiv},
+      primaryClass={cs.DS}
+}
+```
+
+If citing the k-selection routines, please consider the following bibtex:
+```bibtex
+@proceedings{10.1145/3581784,
+    title = {SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
+    year = {2023},
+    isbn = {9798400701092},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    abstract = {Started in 1988, the SC Conference has become the annual nexus for researchers and practitioners from academia, industry and government to share information and foster collaborations to advance the state of the art in High Performance Computing (HPC), Networking, Storage, and Analysis.},
+    location = {, Denver, CO, USA, }
+}
+```
+
+If citing the nearest neighbors descent API, please consider the following bibtex:
+```bibtex
+@inproceedings{10.1145/3459637.3482344,
+    author = {Wang, Hui and Zhao, Wan-Lei and Zeng, Xiangxiang and Yang, Jianye},
+    title = {Fast K-NN Graph Construction by GPU Based NN-Descent},
+    year = {2021},
+    isbn = {9781450384469},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    url = {https://doi.org/10.1145/3459637.3482344},
+    doi = {10.1145/3459637.3482344},
+    abstract = {NN-Descent is a classic k-NN graph construction approach. It is still widely employed in machine learning, computer vision, and information retrieval tasks due to its efficiency and genericness. However, the current design only works well on CPU. In this paper, NN-Descent has been redesigned to adapt to the GPU architecture. A new graph update strategy called selective update is proposed. It reduces the data exchange between GPU cores and GPU global memory significantly, which is the processing bottleneck under GPU computation architecture. This redesign leads to full exploitation of the parallelism of the GPU hardware. In the meantime, the genericness, as well as the simplicity of NN-Descent, are well-preserved. Moreover, a procedure that allows to k-NN graph to be merged efficiently on GPU is proposed. It makes the construction of high-quality k-NN graphs for out-of-GPU-memory datasets tractable. Our approach is 100-250\texttimes{} faster than the single-thread NN-Descent and is 2.5-5\texttimes{} faster than the existing GPU-based approaches as we tested on million as well as billion scale datasets.},
+    booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
+    pages = {1929–1938},
+    numpages = {10},
+    keywords = {high-dimensional, nn-descent, gpu, k-nearest neighbor graph},
+    location = {Virtual Event, Queensland, Australia},
+    series = {CIKM '21}
+}
+```
\ No newline at end of file
diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh
index b4765be38..de0e6f160 100755
--- a/ci/build_wheel_cuvs.sh
+++ b/ci/build_wheel_cuvs.sh
@@ -4,6 +4,6 @@
 set -euo pipefail
 
 # Set up skbuild options. Enable sccache in skbuild config options
-export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+export SKBUILD_CONFIGURE_OPTIONS="-DCUVS_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF"
 
 #ci/build_wheel.sh cuvs python/cuvs
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index f463aeb65..d730cdc4b 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 ########################
 # RAFT Version Updater #
 ########################
@@ -38,7 +38,7 @@ function sed_runner() {
 
 sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/CMakeLists.txt
 sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/template/cmake/thirdparty/fetch_rapids.cmake
-sed_runner "s/set(RAFT_VERSION .*)/set(RAFT_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
+sed_runner "s/set(CUVS_VERSION .*)/set(CUVS_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
 sed_runner 's/'"cuvs_version .*)"'/'"cuvs_version ${NEXT_FULL_TAG})"'/g' python/cuvs/CMakeLists.txt
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
 
@@ -85,7 +85,7 @@ sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TA
 
 sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile
 
-sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
+sed_runner "/^set(CUVS_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
 sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
 
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3c9cd6d1b..ea72eac63 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -28,12 +28,12 @@ set(lang_list "CXX")
 
 if(NOT BUILD_CPU_ONLY)
   include(rapids-cuda)
-  rapids_cuda_init_architectures(cuVS)
+  rapids_cuda_init_architectures(CUVS)
   list(APPEND lang_list "CUDA")
 endif()
 
 project(
-  cuVS
+  CUVS
   VERSION ${CUVS_VERSION}
   LANGUAGES ${lang_list}
 )
diff --git a/cpp/include/cuvs/neighbors/cagra_c.h b/cpp/include/cuvs/neighbors/cagra.h
similarity index 99%
rename from cpp/include/cuvs/neighbors/cagra_c.h
rename to cpp/include/cuvs/neighbors/cagra.h
index c50d7475b..64a26b924 100644
--- a/cpp/include/cuvs/neighbors/cagra_c.h
+++ b/cpp/include/cuvs/neighbors/cagra.h
@@ -195,7 +195,7 @@ cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
- * #include <cuvs/neighbors/cagra_c.h>
+ * #include <cuvs/neighbors/cagra.h>
  *
  * // Create cuvsResources_t
  * cuvsResources_t res;
@@ -244,7 +244,7 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res,
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
- * #include <cuvs/neighbors/cagra_c.h>
+ * #include <cuvs/neighbors/cagra.h>
  *
  * // Create cuvsResources_t
  * cuvsResources_t res;
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index 2a9de37f1..b0154acf8 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -24,8 +24,8 @@
 
 #include <cuvs/core/c_api.h>
 #include <cuvs/core/interop.hpp>
+#include <cuvs/neighbors/cagra.h>
 #include <cuvs/neighbors/cagra.hpp>
-#include <cuvs/neighbors/cagra_c.h>
 
 namespace {
 
diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu
index a1c496eaa..6e3a3cbd1 100644
--- a/cpp/test/neighbors/ann_cagra_c.cu
+++ b/cpp/test/neighbors/ann_cagra_c.cu
@@ -19,7 +19,7 @@
 #include <dlpack/dlpack.h>
 
 #include <cstdint>
-#include <cuvs/neighbors/cagra_c.h>
+#include <cuvs/neighbors/cagra.h>
 
 #include <cuda_runtime.h>
 #include <gtest/gtest.h>
diff --git a/cpp/test/neighbors/c_api.c b/cpp/test/neighbors/c_api.c
index fa1727c51..0c476e95b 100644
--- a/cpp/test/neighbors/c_api.c
+++ b/cpp/test/neighbors/c_api.c
@@ -15,13 +15,13 @@
  */
 
 #include <cuvs/core/c_api.h>
-#include <cuvs/neighbors/cagra_c.h>
+#include <cuvs/neighbors/cagra.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 int main()
 {
-  // simple smoke test to make sure that we can compile the cagra_c.h API
+  // simple smoke test to make sure that we can compile the cagra.h API
   // using a c compiler. This isn't aiming to be a full test, just checking
   // that the exposed C-API is valid C code and doesn't contain C++ features
   cuvsCagraIndex_t index;
diff --git a/docs/source/contributing.md b/docs/source/contributing.md
index 090fd834b..c426ce534 100755
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@@ -1,9 +1,9 @@
 # Contributing
 
-If you are interested in contributing to CUVS, your contributions will fall
+If you are interested in contributing to cuVS, your contributions will fall
 into three categories:
 1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/rapidsai/CUVS/issues/new/choose)
+    - File an [issue](https://github.com/rapidsai/cuvs/issues/new/choose)
     describing what you encountered or what you want to see changed.
     - The RAPIDS team will evaluate the issues and triage them, scheduling
     them for a release. If you believe the issue needs priority attention
@@ -26,10 +26,10 @@ into three categories:
 1. Read the project's [README.md](https://github.com/rapidsai/cuvs)
     to learn how to setup the development environment
 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/CUVS/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
-    or [help wanted](https://github.com/rapidsai/CUVS/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
+    or [help wanted](https://github.com/rapidsai/cuvs/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
 3. Comment on the issue saying you are going to work on it
 4. Code! Make sure to update unit tests!
-5. When done, [create your pull request](https://github.com/rapidsai/CUVS/compare)
+5. When done, [create your pull request](https://github.com/rapidsai/cuvs/compare)
 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
 7. Wait for other developers to review your code and update code as needed
 8. Once reviewed and approved, a RAPIDS developer will merge your pull request
diff --git a/docs/source/cpp_api/core_interop.rst b/docs/source/cpp_api/core_interop.rst
index 034030db5..b2ef05f27 100644
--- a/docs/source/cpp_api/core_interop.rst
+++ b/docs/source/cpp_api/core_interop.rst
@@ -6,11 +6,11 @@ Interop
    :class: highlight
 
 
-``#include <raft/core/interop.hpp>``
+``#include <cuvs/core/interop.hpp>``
 
-namespace *raft::core*
+namespace *cuvs::core*
 
  .. doxygengroup:: interop
-     :project: RAFT
+     :project: cuvs
      :members:
      :content-only:
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index ca871c575..e63165e1c 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -11,10 +11,10 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
   file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake
-       ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake
   )
 endif()
 
-include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+include(${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)

From a641797be3d1607d98923b6ef36b0a44c3fd49d4 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 15 Feb 2024 23:34:56 -0600
Subject: [PATCH 03/45] FEA First commit on top of 24.04

---
 python/cuvs/CMakeLists.txt                |   2 +-
 python/cuvs/cuvs/common/CMakeLists.txt    |  24 ++
 python/cuvs/cuvs/common/__init__.pxd      |   0
 python/cuvs/cuvs/common/__init__.py       |  26 ++
 python/cuvs/cuvs/common/cydlpack.pxd      |  68 ++++
 python/cuvs/cuvs/common/cydlpack.pyx      |  75 ++++
 python/cuvs/cuvs/common/temp_raft.py      |  23 ++
 python/cuvs/cuvs/neighbors/CMakeLists.txt |  24 ++
 python/cuvs/cuvs/neighbors/__init__.pxd   |   0
 python/cuvs/cuvs/neighbors/__init__.py    |  26 ++
 python/cuvs/cuvs/neighbors/c/cagra_c.pxd  |  93 +++++
 python/cuvs/cuvs/neighbors/cagra.pyx      | 471 ++++++++++++++++++++++
 python/cuvs/cuvs/test/__init__py          |   0
 python/cuvs/cuvs/test/ann_utils.py        |  35 ++
 python/cuvs/cuvs/test/test_cagra.py       | 292 ++++++++++++++
 python/cuvs/cuvs/test/test_doctests.py    | 129 ++++++
 16 files changed, 1287 insertions(+), 1 deletion(-)
 create mode 100644 python/cuvs/cuvs/common/CMakeLists.txt
 create mode 100644 python/cuvs/cuvs/common/__init__.pxd
 create mode 100644 python/cuvs/cuvs/common/__init__.py
 create mode 100644 python/cuvs/cuvs/common/cydlpack.pxd
 create mode 100644 python/cuvs/cuvs/common/cydlpack.pyx
 create mode 100644 python/cuvs/cuvs/common/temp_raft.py
 create mode 100644 python/cuvs/cuvs/neighbors/CMakeLists.txt
 create mode 100644 python/cuvs/cuvs/neighbors/__init__.pxd
 create mode 100644 python/cuvs/cuvs/neighbors/__init__.py
 create mode 100644 python/cuvs/cuvs/neighbors/c/cagra_c.pxd
 create mode 100644 python/cuvs/cuvs/neighbors/cagra.pyx
 create mode 100644 python/cuvs/cuvs/test/__init__py
 create mode 100644 python/cuvs/cuvs/test/ann_utils.py
 create mode 100644 python/cuvs/cuvs/test/test_cagra.py
 create mode 100644 python/cuvs/cuvs/test/test_doctests.py

diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index ca9da8a9d..64c058255 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 
 include(../../fetch_rapids.cmake)
 
-set(cuvs_version 24.02.00)
+set(cuvs_version 24.04.00)
 
 # We always need CUDA for cuvs because the cuvs dependency brings in a header-only cuco dependency
 # that enables CUDA unconditionally.
diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
new file mode 100644
index 000000000..6fa8e430f
--- /dev/null
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources dlpack.pyx)
+set(linked_libraries cuvs::cuvs)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_
+)
diff --git a/python/cuvs/cuvs/common/__init__.pxd b/python/cuvs/cuvs/common/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/common/__init__.py b/python/cuvs/cuvs/common/__init__.py
new file mode 100644
index 000000000..1453e30bc
--- /dev/null
+++ b/python/cuvs/cuvs/common/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+
+__all__ = [
+    "Index",
+    "IndexParams",
+    "SearchParams",
+    "build",
+    "load",
+    "save",
+    "search",
+]
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
new file mode 100644
index 000000000..72a03909c
--- /dev/null
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t
+
+cdef extern from 'dlpack.h' nogil:
+    ctypedef enum DLDeviceType:
+        kDLCPU
+        kDLCUDA
+        kDLCUDAHost
+        kDLOpenCL
+        kDLVulkan
+        kDLMetal
+        kDLVPI
+        kDLROCM
+        kDLROCMHost
+        kDLExtDev
+        kDLCUDAManaged
+        kDLOneAPI
+        kDLWebGPU
+        kDLHexagon
+
+    ctypedef struct DLDevice:
+        DLDeviceType device_type
+        int32_t device_id
+
+    ctypedef enum DLDataTypeCode:
+        kDLInt
+        kDLUInt
+        kDLFloat
+        kDLBfloat
+        kDLComplex
+        kDLBool
+
+    ctypedef struct DLDataType:
+        uint8_t code
+        uint8_t bits
+        uint16_t lanes
+
+    ctypedef struct DLTensor:
+        void* data
+        DLDevice device
+        int32_t ndim
+        DLDataType dtype
+        int64_t* shape
+        int64_t* strides
+        uint64_t byte_offset
+
+    ctypedef struct DLManagedTensor:
+        DLTensor dl_tensor
+        void* manager_ctx
+        void (*deleter)(DLManagedTensor*)  # noqa: E211
+
+
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
new file mode 100644
index 000000000..ea9e01f38
--- /dev/null
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+import numpy as np
+
+
+cdef void deleter(DLManagedTensor* tensor):
+    if tensor.manager_ctx is NULL:
+        return
+    stdlib.free(tensor.dl_tensor.shape)
+    tensor.manager_ctx = NULL
+    stdlib.free(tensor)
+
+
+cdef DLManagedTensor dlpack_c(ary):
+    #todo(dgd): add checking options/parameters
+    cdef DLDeviceType dev_type
+    cdef DLDevice dev
+    cdef DLDataType dtype
+    cdef DLTensor tensor
+    cdef DLManagedTensor dlm
+
+    if hasattr(ary, "__cuda_array_interface__"):
+        dev_type = DLDeviceType.kDLCUDA
+    else:
+        dev_type = DLDeviceType.kDLCPU
+
+    dev.device_type = dev_type
+    dev.device_id = 0
+
+    # todo (dgd): change to nice dict
+    if ary.dtype == np.float32:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 32
+    elif ary.dtype == np.float64:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 64
+    elif ary.dtype == np.int32:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 32
+    elif ary.dtype == np.int64:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 64
+    elif ary.dtype == np.bool:
+        dtype.code = DLDataTypeCode.kDLFloat
+
+    if hasattr(ary, "__cuda_array_interface__"):
+        tensor_ptr = ary.__cuda_array_interface__["data"][0]
+    else:
+        tensor_ptr = ary.__array_interface__["data"][0]
+
+
+    tensor.data = <void*> tensor_ptr
+    tensor.device = dev
+    tensor.dtype = dtype
+
+    dlm.dl_tensor = tensor
+    dlm.manager_ct = NULL
+    dlm.deleter = deleter
+
+    return dlm
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
new file mode 100644
index 000000000..62e6fb070
--- /dev/null
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -0,0 +1,23 @@
+
+
+def auto_sync_resources(f):
+    """
+    This is identical to auto_sync_handle except for the proposed name change.
+    """
+
+    @functools.wraps(f)
+    def wrapper(*args, resources=None, **kwargs):
+        sync_handle = resources is None
+        resources = resources if resources is not None else DeviceResources()
+
+        ret_value = f(*args, resources=resources, **kwargs)
+
+        if sync_handle:
+            resources.sync()
+
+        return ret_value
+
+    wrapper.__doc__ = wrapper.__doc__.format(
+        handle_docstring=_HANDLE_PARAM_DOCSTRING
+    )
+    return wrapper
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
new file mode 100644
index 000000000..b68f40f86
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources cagra.pyx)
+set(linked_libraries cuvs::cuvs)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_
+)
diff --git a/python/cuvs/cuvs/neighbors/__init__.pxd b/python/cuvs/cuvs/neighbors/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
new file mode 100644
index 000000000..1453e30bc
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+
+__all__ = [
+    "Index",
+    "IndexParams",
+    "SearchParams",
+    "build",
+    "load",
+    "save",
+    "search",
+]
diff --git a/python/cuvs/cuvs/neighbors/c/cagra_c.pxd b/python/cuvs/cuvs/neighbors/c/cagra_c.pxd
new file mode 100644
index 000000000..7bf15222d
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/c/cagra_c.pxd
@@ -0,0 +1,93 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t
+
+
+cdef extern from "cuvs/core/c_api.h"
+    ctypedef uintptr_t cuvsResources_t
+
+    ctypedef enum cuvsError_t:
+        CUVS_ERROR,
+        CUVS_SUCCESS
+
+    cuvsError_t cuvsResourcesCreate(cuvsResources_t* res)
+    cuvsError_t cuvsResourcesDestroy(cuvsResources_t res)
+    cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream)
+
+
+cdef extern from "cuvs/neighborscagra_c.h" nogil:
+
+    ctypedef enum cagraGraphBuildAlgo:
+        IVF_PQ
+        NN_DESCENT
+
+
+    ctypedef struct cagraIndexParams:
+        size_t intermediate_graph_degree
+        size_t graph_degree
+        cagraGraphBuildAlgo build_algo
+        size_t nn_descent_niter
+
+
+    ctypedef enum search_algo:
+        SINGLE_CTA,
+        MULTI_CTA,
+        MULTI_KERNEL,
+        AUTO
+
+    ctypedef enum cagraHashMode:
+        HASH,
+        SMALL,
+        AUTO_HASH
+
+    ctypedef struct cagraSearchParams:
+        size_t max_queries
+        size_t itopk_size
+        size_t max_iterations
+        cagraSearchAlgo algo
+        size_t team_size
+        size_t search_width
+        size_t min_iterations
+        size_t thread_block_size
+        cagraHashMode hashmap_mode
+        size_t hashmap_min_bitlen
+        float hashmap_max_fill_rate
+        uint32_t num_random_samplings
+        uint64_t rand_xor_mask
+
+    ctypedef struct cagraIndex:
+        uintptr_t addr
+        DLDataType dtype
+
+    ctypedef cagraIndex* cagraIndex_t
+
+    cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+
+    cuvsError_t cagraIndexDestroy(cagraIndex_t index)
+
+    cuvsError_t cagraBuild(cuvsResources_t res,
+                           struct cagraIndexParams params,
+                           DLManagedTensor* dataset,
+                           cagraIndex_t index);
+
+    cuvsError_t cagraSearch(cuvsResources_t res,
+                            cagraSearchParams params,
+                            cagraIndex_t index,
+                            DLManagedTensor* queries,
+                            DLManagedTensor* neighbors,
+                            DLManagedTensor* distances)
diff --git a/python/cuvs/cuvs/neighbors/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra.pyx
new file mode 100644
index 000000000..e07abd388
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra.pyx
@@ -0,0 +1,471 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+import numpy as np
+cimport cuvs.common.cydlpack
+
+from cuvs.common.temp_raft import auto_sync_resources
+from cuvs.common.cydlpack import dplack_c
+
+from cython.operator cimport dereference as deref
+
+from pylibraft.common import (
+    DeviceResources,
+    auto_convert_output,
+    cai_wrapper,
+    device_ndarray,
+)
+from pylibraft.common.cai_wrapper import wrap_array
+from pylibraft.common.interruptible import cuda_interruptible
+
+cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c
+
+from pylibraft.neighbors.common import _check_input_array
+
+
+cdef class IndexParams:
+    """
+    Parameters to build index for CAGRA nearest neighbor search
+
+    Parameters
+    ----------
+    metric : string denoting the metric type, default="sqeuclidean"
+        Valid values for metric: ["sqeuclidean"], where
+            - sqeuclidean is the euclidean distance without the square root
+              operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2
+    intermediate_graph_degree : int, default = 128
+
+    graph_degree : int, default = 64
+
+    build_algo: string denoting the graph building algorithm to use, \
+                default = "ivf_pq"
+        Valid values for algo: ["ivf_pq", "nn_descent"], where
+            - ivf_pq will use the IVF-PQ algorithm for building the knn graph
+            - nn_descent (experimental) will use the NN-Descent algorithm for
+              building the knn graph. It is expected to be generally
+              faster than ivf_pq.
+    """
+    cdef cagra_c.index_params params
+
+    def __init__(self, *,
+                 metric="sqeuclidean",
+                 intermediate_graph_degree=128,
+                 graph_degree=64,
+                 build_algo="ivf_pq",
+                 nn_descent_niter=20):
+        # todo (dgd): enable once other metrics are present
+        # and exposed in cuVS C API
+        # self.params.metric = _get_metric(metric)
+        # self.params.metric_arg = 0
+        self.params.intermediate_graph_degree = intermediate_graph_degree
+        self.params.graph_degree = graph_degree
+        if build_algo == "ivf_pq":
+            self.params.build_algo = cagra_c.cagraGraphBuildAlgo.IVF_PQ
+        elif build_algo == "nn_descent":
+            self.params.build_algo = cagra_c.cagraGraphBuildAlgo.NN_DESCENT
+        self.params.nn_descent_niter = nn_descent_niter
+
+    # @property
+    # def metric(self):
+        # return self.params.metric
+
+    @property
+    def intermediate_graph_degree(self):
+        return self.params.intermediate_graph_degree
+
+    @property
+    def graph_degree(self):
+        return self.params.graph_degree
+
+    @property
+    def build_algo(self):
+        return self.params.build_algo
+
+    @property
+    def nn_descent_niter(self):
+        return self.params.nn_descent_niter
+
+
+cdef class Index:
+    cdef cagraIndex_t index
+
+    def __cinit__(self):
+        cdef cuvsError_t index_create_status
+        index_create_status = cuvsCagraIndexCreate(&self.index)
+        self.trained = False
+
+        if index_create_status == cagra_c.cuvsError_t.CUVS_ERROR:
+            raise Exception("FAIL")
+
+    def __dealloc__(self):
+        if self.index is not NULL:
+            cdef cuvsError_t index_destroy_status
+            index_destroy_status = cagraIndexDestroy(&self.index)
+            if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR:
+                raise Exception("FAIL")
+            del self.index
+
+    def __repr__(self):
+        # todo(dgd): update repr as we expose data through C API
+        attr_str = []
+        return "Index(type=CAGRA, metric=L2" + (", ".join(attr_str)) + ")"
+
+
+@auto_sync_resources
+def build_index(IndexParams index_params, dataset, resources=None):
+    """
+    Build the CAGRA index from the dataset for efficient search.
+
+    The build performs two different steps- first an intermediate knn-graph is
+    constructed, then it's optimized it to create the final graph. The
+    index_params object controls the node degree of these graphs.
+
+    It is required that both the dataset and the optimized graph fit the
+    GPU memory.
+
+    The following distance metrics are supported:
+        - L2
+
+    Parameters
+    ----------
+    index_params : IndexParams object
+    dataset : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    {handle_docstring}
+
+    Returns
+    -------
+    index: cuvs.cagra.Index
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> from pylibraft.neighbors import cagra
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> k = 10
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> handle = DeviceResources()
+    >>> build_params = cagra.IndexParams(metric="sqeuclidean")
+    >>> index = cagra.build_index(build_params, dataset)
+    >>> distances, neighbors = cagra.search(cagra.SearchParams(),
+    ...                                      index, dataset,
+    ...                                      k, handle=handle)
+    >>> distances = cp.asarray(distances)
+    >>> neighbors = cp.asarray(neighbors)
+    """
+
+    # todo(dgd): we can make the check of dtype a parameter of wrap_array
+    # in RAFT to make this a single call
+    dataset_ai = wrap_array(dataset)
+    _check_input_array(dataset_ai, [np.dtype('float32'), np.dtype('byte'),
+                                    np.dtype('ubyte')])
+
+    if resources is None:
+        resources = DeviceResources()
+    cdef cuvsResources_t* resources_ = \
+        <cuvsResources_t*><size_t>handle.getHandle()
+
+    cdef Index idx = Index()
+
+    with cuda_interruptible():
+        cdef cuvsError_t build_status
+
+        build_status = cagra_c.cagraBuild(
+            deref(resources_),
+            index_params.params,
+            <DLManagedTensor*> &dplack_c(dataset_ai),
+            deref(idx.index)
+        )
+
+        if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Index failed to build.")
+        else:
+            idx.trained = True
+
+    return idx
+
+
+cdef class SearchParams:
+    """
+    CAGRA search parameters
+
+    Parameters
+    ----------
+    max_queries: int, default = 0
+        Maximum number of queries to search at the same time (batch size).
+        Auto select when 0.
+    itopk_size: int, default = 64
+        Number of intermediate search results retained during the search.
+        This is the main knob to adjust trade off between accuracy and
+        search speed. Higher values improve the search accuracy.
+    max_iterations: int, default = 0
+        Upper limit of search iterations. Auto select when 0.
+    algo: string denoting the search algorithm to use, default = "auto"
+        Valid values for algo: ["auto", "single_cta", "multi_cta"], where
+            - auto will automatically select the best value based on query size
+            - single_cta is better when query contains larger number of
+              vectors (e.g >10)
+            - multi_cta is better when query contains only a few vectors
+    team_size: int, default = 0
+        Number of threads used to calculate a single distance. 4, 8, 16,
+        or 32.
+    search_width: int, default = 1
+        Number of graph nodes to select as the starting point for the
+        search in each iteration.
+    min_iterations: int, default = 0
+        Lower limit of search iterations.
+    thread_block_size: int, default = 0
+        Thread block size. 0, 64, 128, 256, 512, 1024.
+        Auto selection when 0.
+    hashmap_mode: string denoting the type of hash map to use.
+        It's usually better to allow the algorithm to select this value,
+        default = "auto".
+        Valid values for hashmap_mode: ["auto", "small", "hash"], where
+            - auto will automatically select the best value based on algo
+            - small will use the small shared memory hash table with resetting.
+            - hash will use a single hash table in global memory.
+    hashmap_min_bitlen: int, default = 0
+        Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+    hashmap_max_fill_rate: float, default = 0.5
+        Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+    num_random_samplings: int, default = 1
+        Number of iterations of initial random seed node selection. 1 or
+        more.
+    rand_xor_mask: int, default = 0x128394
+        Bit mask used for initial random seed node selection.
+    """
+    cdef cagra_c.search_params params
+
+    def __init__(self, *,
+                 max_queries=0,
+                 itopk_size=64,
+                 max_iterations=0,
+                 algo="auto",
+                 team_size=0,
+                 search_width=1,
+                 min_iterations=0,
+                 thread_block_size=0,
+                 hashmap_mode="auto",
+                 hashmap_min_bitlen=0,
+                 hashmap_max_fill_rate=0.5,
+                 num_random_samplings=1,
+                 rand_xor_mask=0x128394):
+        self.params.max_queries = max_queries
+        self.params.itopk_size = itopk_size
+        self.params.max_iterations = max_iterations
+        if algo == "single_cta":
+            self.params.algo = cagra_c.cagraSearchAlgo.SINGLE_CTA
+        elif algo == "multi_cta":
+            self.params.algo = cagra_c.cagraSearchAlgo.MULTI_CTA
+        elif algo == "multi_kernel":
+            self.params.algo = cagra_c.cagraSearchAlgo.MULTI_KERNEL
+        elif algo == "auto":
+            self.params.algo = cagra_c.cagraSearchAlgo.AUTO
+        else:
+            raise ValueError("`algo` value not supported.")
+
+        self.params.team_size = team_size
+        self.params.search_width = search_width
+        self.params.min_iterations = min_iterations
+        self.params.thread_block_size = thread_block_size
+        if hashmap_mode == "hash":
+            self.params.hashmap_mode = cagra_c.cagraHashMode.HASH
+        elif hashmap_mode == "small":
+            self.params.hashmap_mode = cagra_c.cagraHashMode.SMALL
+        elif hashmap_mode == "auto":
+            self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO
+        else:
+            raise ValueError("`hashmap_mode` value not supported.")
+
+        self.params.hashmap_min_bitlen = hashmap_min_bitlen
+        self.params.hashmap_max_fill_rate = hashmap_max_fill_rate
+        self.params.num_random_samplings = num_random_samplings
+        self.params.rand_xor_mask = rand_xor_mask
+
+    def __repr__(self):
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in [
+                        "max_queries", "itopk_size", "max_iterations", "algo",
+                        "team_size", "search_width", "min_iterations",
+                        "thread_block_size", "hashmap_mode",
+                        "hashmap_min_bitlen", "hashmap_max_fill_rate",
+                        "num_random_samplings", "rand_xor_mask"]]
+        return "SearchParams(type=CAGRA, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def max_queries(self):
+        return self.params.max_queries
+
+    @property
+    def itopk_size(self):
+        return self.params.itopk_size
+
+    @property
+    def max_iterations(self):
+        return self.params.max_iterations
+
+    @property
+    def algo(self):
+        return self.params.algo
+
+    @property
+    def team_size(self):
+        return self.params.team_size
+
+    @property
+    def search_width(self):
+        return self.params.search_width
+
+    @property
+    def min_iterations(self):
+        return self.params.min_iterations
+
+    @property
+    def thread_block_size(self):
+        return self.params.thread_block_size
+
+    @property
+    def hashmap_mode(self):
+        return self.params.hashmap_mode
+
+    @property
+    def hashmap_min_bitlen(self):
+        return self.params.hashmap_min_bitlen
+
+    @property
+    def hashmap_max_fill_rate(self):
+        return self.params.hashmap_max_fill_rate
+
+    @property
+    def num_random_samplings(self):
+        return self.params.num_random_samplings
+
+    @property
+    def rand_xor_mask(self):
+        return self.params.rand_xor_mask
+
+
+@auto_sync_resources
+@auto_convert_output
+def search(SearchParams search_params,
+           Index index,
+           queries,
+           k,
+           neighbors=None,
+           distances=None,
+           resources=None):
+     """
+    Find the k nearest neighbors for each query.
+
+    Parameters
+    ----------
+    search_params : SearchParams
+    index : Index
+        Trained CAGRA index.
+    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    k : int
+        The number of neighbors.
+    neighbors : Optional CUDA array interface compliant matrix shape
+                (n_queries, k), dtype int64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    distances : Optional CUDA array interface compliant matrix shape
+                (n_queries, k) If supplied, the distances to the
+                neighbors will be written here in-place. (default None)
+    {handle_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from pylibraft.common import DeviceResources
+    >>> from pylibraft.neighbors import cagra
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> # Build index
+    >>> handle = DeviceResources()
+    >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
+    >>> # Search using the built index
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> k = 10
+    >>> search_params = cagra.SearchParams(
+    ...     max_queries=100,
+    ...     itopk_size=64
+    ... )
+    >>> # Using a pooling allocator reduces overhead of temporary array
+    >>> # creation during search. This is useful if multiple searches
+    >>> # are performad with same query size.
+    >>> distances, neighbors = cagra.search(search_params, index, queries,
+    ...                                     k, handle=handle)
+    >>> # pylibraft functions are often asynchronous so the
+    >>> # handle needs to be explicitly synchronized
+    >>> handle.sync()
+    >>> neighbors = cp.asarray(neighbors)
+    >>> distances = cp.asarray(distances)
+    """
+
+    if not index.trained:
+        raise ValueError("Index need to be built before calling search.")
+
+    if resources is None:
+        resources = DeviceResources()
+    cdef device_resources* resources_ = \
+        <device_resources*><size_t>resources.getHandle()
+
+    # todo(dgd): we can make the check of dtype a parameter of wrap_array
+    # in RAFT to make this a single call
+    queries_cai = cai_wrapper(queries)
+    _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
+                                     np.dtype('ubyte')],
+                       exp_cols=index.dim)
+
+    cdef uint32_t n_queries = queries_cai.shape[0]
+
+    if neighbors is None:
+        neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
+
+    neighbors_cai = cai_wrapper(neighbors)
+    _check_input_array(neighbors_cai, [np.dtype('uint32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    if distances is None:
+        distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+    distances_cai = cai_wrapper(distances)
+    _check_input_array(distances_cai, [np.dtype('float32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    cdef cagra_c.search_params params = search_params.params
+
+    with cuda_interruptible():
+        cagra_c.cagraSearch(
+            deref(resources_),
+            params,
+            deref(idx_float.index),
+            <DLManagedTensor*> &dplack_c(queries_cai),
+            <DLManagedTensor*> &dplack_c(neighbors_cai),
+            <DLManagedTensor*> &dplack_c(distances_cai)
+        )
+
+    return (distances, neighbors)
diff --git a/python/cuvs/cuvs/test/__init__py b/python/cuvs/cuvs/test/__init__py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/test/ann_utils.py b/python/cuvs/cuvs/test/ann_utils.py
new file mode 100644
index 000000000..60db7f327
--- /dev/null
+++ b/python/cuvs/cuvs/test/ann_utils.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     h ttp://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+def generate_data(shape, dtype):
+    if dtype == np.byte:
+        x = np.random.randint(-127, 128, size=shape, dtype=np.byte)
+    elif dtype == np.ubyte:
+        x = np.random.randint(0, 255, size=shape, dtype=np.ubyte)
+    else:
+        x = np.random.random_sample(shape).astype(dtype)
+
+    return x
+
+
+def calc_recall(ann_idx, true_nn_idx):
+    assert ann_idx.shape == true_nn_idx.shape
+    n = 0
+    for i in range(ann_idx.shape[0]):
+        n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size
+    recall = n / ann_idx.size
+    return recall
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
new file mode 100644
index 000000000..fcf4a92da
--- /dev/null
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -0,0 +1,292 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     h ttp://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pytest
+from sklearn.neighbors import NearestNeighbors
+from sklearn.preprocessing import normalize
+
+from pylibraft.common import device_ndarray
+from cuvs.neighbors import cagra
+from cuvs.test.ann_utils import calc_recall, generate_data
+
+
+def run_cagra_build_search_test(
+    n_rows=10000,
+    n_cols=10,
+    n_queries=100,
+    k=10,
+    dtype=np.float32,
+    metric="euclidean",
+    intermediate_graph_degree=128,
+    graph_degree=64,
+    build_algo="ivf_pq",
+    array_type="device",
+    compare=True,
+    inplace=True,
+    add_data_on_build=True,
+    search_params={},
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = cagra.IndexParams(
+        metric=metric,
+        intermediate_graph_degree=intermediate_graph_degree,
+        graph_degree=graph_degree,
+        build_algo=build_algo,
+    )
+
+    if array_type == "device":
+        index = cagra.build(build_params, dataset_device)
+    else:
+        index = cagra.build(build_params, dataset)
+
+    assert index.trained
+
+    if not add_data_on_build:
+        dataset_1 = dataset[: n_rows // 2, :]
+        dataset_2 = dataset[n_rows // 2 :, :]
+        indices_1 = np.arange(n_rows // 2, dtype=np.uint32)
+        indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint32)
+        if array_type == "device":
+            dataset_1_device = device_ndarray(dataset_1)
+            dataset_2_device = device_ndarray(dataset_2)
+            indices_1_device = device_ndarray(indices_1)
+            indices_2_device = device_ndarray(indices_2)
+            index = cagra.extend(index, dataset_1_device, indices_1_device)
+            index = cagra.extend(index, dataset_2_device, indices_2_device)
+        else:
+            index = cagra.extend(index, dataset_1, indices_1)
+            index = cagra.extend(index, dataset_2, indices_2)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+    out_idx = np.zeros((n_queries, k), dtype=np.uint32)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+
+    queries_device = device_ndarray(queries)
+    out_idx_device = device_ndarray(out_idx) if inplace else None
+    out_dist_device = device_ndarray(out_dist) if inplace else None
+
+    search_params = cagra.SearchParams(**search_params)
+
+    ret_output = cagra.search(
+        search_params,
+        index,
+        queries_device,
+        k,
+        neighbors=out_idx_device,
+        distances=out_dist_device,
+    )
+
+    if not inplace:
+        out_dist_device, out_idx_device = ret_output
+
+    if not compare:
+        return
+
+    out_idx = out_idx_device.copy_to_host()
+    out_dist = out_dist_device.copy_to_host()
+
+    # Calculate reference values with sklearn
+    skl_metric = {
+        "sqeuclidean": "sqeuclidean",
+        "inner_product": "cosine",
+        "euclidean": "euclidean",
+    }[metric]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(dataset)
+    skl_idx = nn_skl.kneighbors(queries, return_distance=False)
+
+    recall = calc_recall(out_idx, skl_idx)
+    assert recall > 0.7
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+@pytest.mark.parametrize("array_type", ["device", "host"])
+@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
+def test_cagra_dataset_dtype_host_device(
+    dtype, array_type, inplace, build_algo
+):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_cagra_build_search_test(
+        dtype=dtype,
+        inplace=inplace,
+        array_type=array_type,
+        build_algo=build_algo,
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {
+            "intermediate_graph_degree": 64,
+            "graph_degree": 32,
+            "add_data_on_build": True,
+            "k": 1,
+            "metric": "euclidean",
+            "build_algo": "ivf_pq",
+        },
+        {
+            "intermediate_graph_degree": 32,
+            "graph_degree": 16,
+            "add_data_on_build": False,
+            "k": 5,
+            "metric": "sqeuclidean",
+            "build_algo": "ivf_pq",
+        },
+        {
+            "intermediate_graph_degree": 128,
+            "graph_degree": 32,
+            "add_data_on_build": True,
+            "k": 10,
+            "metric": "inner_product",
+            "build_algo": "nn_descent",
+        },
+    ],
+)
+def test_cagra_index_params(params):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_cagra_build_search_test(
+        k=params["k"],
+        metric=params["metric"],
+        graph_degree=params["graph_degree"],
+        intermediate_graph_degree=params["intermediate_graph_degree"],
+        compare=False,
+        build_algo=params["build_algo"],
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {
+            "max_queries": 100,
+            "itopk_size": 32,
+            "max_iterations": 100,
+            "algo": "single_cta",
+            "team_size": 0,
+            "search_width": 1,
+            "min_iterations": 1,
+            "thread_block_size": 64,
+            "hashmap_mode": "hash",
+            "hashmap_min_bitlen": 0.2,
+            "hashmap_max_fill_rate": 0.5,
+            "num_random_samplings": 1,
+        },
+        {
+            "max_queries": 10,
+            "itopk_size": 128,
+            "max_iterations": 0,
+            "algo": "multi_cta",
+            "team_size": 8,
+            "search_width": 2,
+            "min_iterations": 10,
+            "thread_block_size": 0,
+            "hashmap_mode": "auto",
+            "hashmap_min_bitlen": 0.9,
+            "hashmap_max_fill_rate": 0.5,
+            "num_random_samplings": 10,
+        },
+        {
+            "max_queries": 0,
+            "itopk_size": 64,
+            "max_iterations": 0,
+            "algo": "multi_kernel",
+            "team_size": 16,
+            "search_width": 1,
+            "min_iterations": 0,
+            "thread_block_size": 0,
+            "hashmap_mode": "auto",
+            "hashmap_min_bitlen": 0,
+            "hashmap_max_fill_rate": 0.5,
+            "num_random_samplings": 1,
+        },
+        {
+            "max_queries": 0,
+            "itopk_size": 64,
+            "max_iterations": 0,
+            "algo": "auto",
+            "team_size": 32,
+            "search_width": 4,
+            "min_iterations": 0,
+            "thread_block_size": 0,
+            "hashmap_mode": "auto",
+            "hashmap_min_bitlen": 0,
+            "hashmap_max_fill_rate": 0.5,
+            "num_random_samplings": 1,
+        },
+    ],
+)
+def test_cagra_search_params(params):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_cagra_build_search_test(search_params=params)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte])
+@pytest.mark.parametrize("include_dataset", [True, False])
+def test_save_load(dtype, include_dataset):
+    n_rows = 10000
+    n_cols = 50
+    n_queries = 1000
+
+    dataset = generate_data((n_rows, n_cols), dtype)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = cagra.IndexParams()
+    index = cagra.build(build_params, dataset_device)
+
+    assert index.trained
+    filename = "my_index.bin"
+    cagra.save(filename, index, include_dataset=include_dataset)
+    loaded_index = cagra.load(filename)
+
+    # if we didn't save the dataset with the index, we need to update the
+    # index with an already loaded copy
+    if not include_dataset:
+        loaded_index.update_dataset(dataset)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+
+    queries_device = device_ndarray(queries)
+    search_params = cagra.SearchParams()
+    k = 10
+
+    distance_dev, neighbors_dev = cagra.search(
+        search_params, index, queries_device, k
+    )
+
+    neighbors = neighbors_dev.copy_to_host()
+    dist = distance_dev.copy_to_host()
+    del index
+
+    distance_dev, neighbors_dev = cagra.search(
+        search_params, loaded_index, queries_device, k
+    )
+
+    neighbors2 = neighbors_dev.copy_to_host()
+    dist2 = distance_dev.copy_to_host()
+
+    assert np.all(neighbors == neighbors2)
+    assert np.allclose(dist, dist2, rtol=1e-6)
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py
new file mode 100644
index 000000000..c75f56523
--- /dev/null
+++ b/python/cuvs/cuvs/test/test_doctests.py
@@ -0,0 +1,129 @@
+#
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import doctest
+import inspect
+import io
+
+import pytest
+
+import pylibraft.cluster
+import pylibraft.distance
+import pylibraft.matrix
+import pylibraft.neighbors
+import pylibraft.random
+
+# Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py  # noqa
+
+
+def _name_in_all(parent, name):
+    return name in getattr(parent, "__all__", [])
+
+
+def _is_public_name(parent, name):
+    return not name.startswith("_")
+
+
+def _find_doctests_in_obj(obj, finder=None, criteria=None):
+    """Find all doctests in an object.
+
+    Parameters
+    ----------
+    obj : module or class
+        The object to search for docstring examples.
+    finder : doctest.DocTestFinder, optional
+        The DocTestFinder object to use. If not provided, a DocTestFinder is
+        constructed.
+    criteria : callable, optional
+        Callable indicating whether to recurse over members of the provided
+        object. If not provided, names not defined in the object's ``__all__``
+        property are ignored.
+
+    Yields
+    ------
+    doctest.DocTest
+        The next doctest found in the object.
+    """
+    if finder is None:
+        finder = doctest.DocTestFinder()
+    if criteria is None:
+        criteria = _name_in_all
+    for docstring in finder.find(obj):
+        if docstring.examples:
+            yield docstring
+    for name, member in inspect.getmembers(obj):
+        # Only recurse over members matching the criteria
+        if not criteria(obj, name):
+            continue
+        # Recurse over the public API of modules (objects defined in the
+        # module's __all__)
+        if inspect.ismodule(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_name_in_all
+            )
+        # Recurse over the public API of classes (attributes not prefixed with
+        # an underscore)
+        if inspect.isclass(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_is_public_name
+            )
+
+        # doctest finder seems to dislike cython functions, since
+        # `inspect.isfunction` doesn't return true for them. hack around this
+        if callable(member) and not inspect.isfunction(member):
+            for docstring in finder.find(member):
+                if docstring.examples:
+                    yield docstring
+
+
+# since the root pylibraft module doesn't import submodules (or define an
+# __all__) we are explicitly adding all the submodules we want to run
+# doctests for here
+DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.cagra))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_flat))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.refine))
+DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.random))
+
+
+@pytest.mark.parametrize(
+    "docstring",
+    DOC_STRINGS,
+    ids=lambda docstring: docstring.name,
+)
+def test_docstring(docstring):
+    # We ignore differences in whitespace in the doctest output, and enable
+    # the use of an ellipsis "..." to match any string in the doctest
+    # output. An ellipsis is useful for, e.g., memory addresses or
+    # imprecise floating point values.
+    optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    runner = doctest.DocTestRunner(optionflags=optionflags)
+
+    # Capture stdout and include failing outputs in the traceback.
+    doctest_stdout = io.StringIO()
+    with contextlib.redirect_stdout(doctest_stdout):
+        runner.run(docstring)
+        results = runner.summarize()
+    assert not results.failed, (
+        f"{results.failed} of {results.attempted} doctests failed for "
+        f"{docstring.name}:\n{doctest_stdout.getvalue()}"
+    )

From a843e6bbc032973e42ef0cde130719a552d3ac9e Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 16 Feb 2024 11:51:55 -0600
Subject: [PATCH 04/45] FIX small fixes

---
 python/cuvs/cuvs/common/CMakeLists.txt |   2 +-
 python/cuvs/cuvs/common/temp_raft.py   |  18 ++++
 python/cuvs/cuvs/neighbors/__init__.py |   2 -
 python/cuvs/cuvs/test/test_cagra.py    | 119 +------------------------
 4 files changed, 21 insertions(+), 120 deletions(-)

diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
index 6fa8e430f..b6af82882 100644
--- a/python/cuvs/cuvs/common/CMakeLists.txt
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
index 62e6fb070..ac1dbdc1a 100644
--- a/python/cuvs/cuvs/common/temp_raft.py
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -1,5 +1,23 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
 
 
+# This file has code that will be upstreamed to RAFT
+
 def auto_sync_resources(f):
     """
     This is identical to auto_sync_handle except for the proposed name change.
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
index 1453e30bc..95c9415fa 100644
--- a/python/cuvs/cuvs/neighbors/__init__.py
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -20,7 +20,5 @@
     "IndexParams",
     "SearchParams",
     "build",
-    "load",
-    "save",
     "search",
 ]
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
index fcf4a92da..38745f55b 100644
--- a/python/cuvs/cuvs/test/test_cagra.py
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -52,9 +52,9 @@ def run_cagra_build_search_test(
     )
 
     if array_type == "device":
-        index = cagra.build(build_params, dataset_device)
+        index = cagra.build_index(build_params, dataset_device)
     else:
-        index = cagra.build(build_params, dataset)
+        index = cagra.build_index(build_params, dataset)
 
     assert index.trained
 
@@ -175,118 +175,3 @@ def test_cagra_index_params(params):
         compare=False,
         build_algo=params["build_algo"],
     )
-
-
-@pytest.mark.parametrize(
-    "params",
-    [
-        {
-            "max_queries": 100,
-            "itopk_size": 32,
-            "max_iterations": 100,
-            "algo": "single_cta",
-            "team_size": 0,
-            "search_width": 1,
-            "min_iterations": 1,
-            "thread_block_size": 64,
-            "hashmap_mode": "hash",
-            "hashmap_min_bitlen": 0.2,
-            "hashmap_max_fill_rate": 0.5,
-            "num_random_samplings": 1,
-        },
-        {
-            "max_queries": 10,
-            "itopk_size": 128,
-            "max_iterations": 0,
-            "algo": "multi_cta",
-            "team_size": 8,
-            "search_width": 2,
-            "min_iterations": 10,
-            "thread_block_size": 0,
-            "hashmap_mode": "auto",
-            "hashmap_min_bitlen": 0.9,
-            "hashmap_max_fill_rate": 0.5,
-            "num_random_samplings": 10,
-        },
-        {
-            "max_queries": 0,
-            "itopk_size": 64,
-            "max_iterations": 0,
-            "algo": "multi_kernel",
-            "team_size": 16,
-            "search_width": 1,
-            "min_iterations": 0,
-            "thread_block_size": 0,
-            "hashmap_mode": "auto",
-            "hashmap_min_bitlen": 0,
-            "hashmap_max_fill_rate": 0.5,
-            "num_random_samplings": 1,
-        },
-        {
-            "max_queries": 0,
-            "itopk_size": 64,
-            "max_iterations": 0,
-            "algo": "auto",
-            "team_size": 32,
-            "search_width": 4,
-            "min_iterations": 0,
-            "thread_block_size": 0,
-            "hashmap_mode": "auto",
-            "hashmap_min_bitlen": 0,
-            "hashmap_max_fill_rate": 0.5,
-            "num_random_samplings": 1,
-        },
-    ],
-)
-def test_cagra_search_params(params):
-    # Note that inner_product tests use normalized input which we cannot
-    # represent in int8, therefore we test only sqeuclidean metric here.
-    run_cagra_build_search_test(search_params=params)
-
-
-@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte])
-@pytest.mark.parametrize("include_dataset", [True, False])
-def test_save_load(dtype, include_dataset):
-    n_rows = 10000
-    n_cols = 50
-    n_queries = 1000
-
-    dataset = generate_data((n_rows, n_cols), dtype)
-    dataset_device = device_ndarray(dataset)
-
-    build_params = cagra.IndexParams()
-    index = cagra.build(build_params, dataset_device)
-
-    assert index.trained
-    filename = "my_index.bin"
-    cagra.save(filename, index, include_dataset=include_dataset)
-    loaded_index = cagra.load(filename)
-
-    # if we didn't save the dataset with the index, we need to update the
-    # index with an already loaded copy
-    if not include_dataset:
-        loaded_index.update_dataset(dataset)
-
-    queries = generate_data((n_queries, n_cols), dtype)
-
-    queries_device = device_ndarray(queries)
-    search_params = cagra.SearchParams()
-    k = 10
-
-    distance_dev, neighbors_dev = cagra.search(
-        search_params, index, queries_device, k
-    )
-
-    neighbors = neighbors_dev.copy_to_host()
-    dist = distance_dev.copy_to_host()
-    del index
-
-    distance_dev, neighbors_dev = cagra.search(
-        search_params, loaded_index, queries_device, k
-    )
-
-    neighbors2 = neighbors_dev.copy_to_host()
-    dist2 = distance_dev.copy_to_host()
-
-    assert np.all(neighbors == neighbors2)
-    assert np.allclose(dist, dist2, rtol=1e-6)

From e2cca0b36862c68839c05adb6cfc00a2a385d1a0 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Wed, 21 Feb 2024 00:17:45 -0600
Subject: [PATCH 05/45] Multiple Cython and CMake improvements, fixes and code
 cleanup

Includes Cython and CMake fixes from testing.
---
 build.sh                                      |  3 +
 python/cuvs/CMakeLists.txt                    |  9 ++-
 python/cuvs/cuvs/common/CMakeLists.txt        |  2 +-
 python/cuvs/cuvs/common/c_api.pxd             | 32 +++++++++++
 python/cuvs/cuvs/common/cydlpack.pxd          |  1 +
 python/cuvs/cuvs/common/cydlpack.pyx          |  6 +-
 python/cuvs/cuvs/neighbors/CMakeLists.txt     | 13 +----
 python/cuvs/cuvs/neighbors/__init__.py        |  9 +--
 .../cuvs/cuvs/neighbors/cagra/CMakeLists.txt  | 24 ++++++++
 python/cuvs/cuvs/neighbors/cagra/__init__.pxd |  0
 python/cuvs/cuvs/neighbors/cagra/__init__.py  | 24 ++++++++
 .../cuvs/cuvs/neighbors/cagra/c/__init__.pxd  |  0
 .../cuvs/cuvs/neighbors/cagra/c/__init__.py   |  0
 .../cuvs/neighbors/{ => cagra}/c/cagra_c.pxd  | 20 ++-----
 python/cuvs/cuvs/neighbors/cagra/cagra.pxd    |  1 +
 .../cuvs/cuvs/neighbors/{ => cagra}/cagra.pyx | 57 ++++++++++---------
 16 files changed, 138 insertions(+), 63 deletions(-)
 create mode 100644 python/cuvs/cuvs/common/c_api.pxd
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/__init__.pxd
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/__init__.py
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/c/__init__.py
 rename python/cuvs/cuvs/neighbors/{ => cagra}/c/cagra_c.pxd (82%)
 create mode 100644 python/cuvs/cuvs/neighbors/cagra/cagra.pxd
 rename python/cuvs/cuvs/neighbors/{ => cagra}/cagra.pyx (91%)

diff --git a/build.sh b/build.sh
index 6dd250c51..41b25caf7 100755
--- a/build.sh
+++ b/build.sh
@@ -305,6 +305,9 @@ if [[ ${CMAKE_TARGET} == "" ]]; then
 fi
 
 # Append `-DFIND_CUVS_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
+
+
+
 SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}"
 if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUVS_CPP"* ]]; then
     SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_CUVS_CPP=ON"
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 64c058255..0938cf4a6 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -37,14 +37,16 @@ option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulti
        OFF
 )
 
+message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}")
+
 # If the user requested it we attempt to find CUVS.
 if(FIND_CUVS_CPP)
-  find_package(cuvs ${cuvs_version})
+  find_package(cuvs_c ${cuvs_version})
 else()
   set(cuvs_FOUND OFF)
 endif()
 
-include(rapids-cython)
+include(rapids-cython-core)
 
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
@@ -60,6 +62,9 @@ endif()
 
 rapids_cython_init()
 
+add_subdirectory(cuvs/common)
+add_subdirectory(cuvs/neighbors)
+
 if(DEFINED cython_lib_dir)
   rapids_cython_add_rpath_entries(TARGET cuvs PATHS "${cython_lib_dir}")
 endif()
diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
index b6af82882..c5f623f4b 100644
--- a/python/cuvs/cuvs/common/CMakeLists.txt
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 # Set the list of Cython files to build
-set(cython_sources dlpack.pyx)
+set(cython_sources cydlpack.pyx)
 set(linked_libraries cuvs::cuvs)
 
 # Build all of the Cython targets
diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd
new file mode 100644
index 000000000..a8b91773f
--- /dev/null
+++ b/python/cuvs/cuvs/common/c_api.pxd
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+
+from libc.stdint cimport uintptr_t
+from cuda.ccudart cimport cudaStream_t
+
+
+cdef extern from "cuvs/core/c_api.h":
+    ctypedef uintptr_t cuvsResources_t
+
+    ctypedef enum cuvsError_t:
+        CUVS_ERROR,
+        CUVS_SUCCESS
+
+    cuvsError_t cuvsResourcesCreate(cuvsResources_t* res)
+    cuvsError_t cuvsResourcesDestroy(cuvsResources_t res)
+    cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream)
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
index 72a03909c..1da1cf4f3 100644
--- a/python/cuvs/cuvs/common/cydlpack.pxd
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -66,3 +66,4 @@ cdef extern from 'dlpack.h' nogil:
         void (*deleter)(DLManagedTensor*)  # noqa: E211
 
 
+cdef DLManagedTensor dlpack_c(ary)
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
index ea9e01f38..9e2f01e9c 100644
--- a/python/cuvs/cuvs/common/cydlpack.pyx
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -17,8 +17,10 @@
 
 import numpy as np
 
+from libc cimport stdlib
 
-cdef void deleter(DLManagedTensor* tensor):
+
+cdef void deleter(DLManagedTensor* tensor) noexcept:
     if tensor.manager_ctx is NULL:
         return
     stdlib.free(tensor.dl_tensor.shape)
@@ -69,7 +71,7 @@ cdef DLManagedTensor dlpack_c(ary):
     tensor.dtype = dtype
 
     dlm.dl_tensor = tensor
-    dlm.manager_ct = NULL
+    dlm.manager_ctx = NULL
     dlm.deleter = deleter
 
     return dlm
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
index b68f40f86..04c494c46 100644
--- a/python/cuvs/cuvs/neighbors/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,13 +12,4 @@
 # the License.
 # =============================================================================
 
-# Set the list of Cython files to build
-set(cython_sources cagra.pyx)
-set(linked_libraries cuvs::cuvs)
-
-# Build all of the Cython targets
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_
-)
+add_subdirectory(cagra)
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
index 95c9415fa..c3af93d75 100644
--- a/python/cuvs/cuvs/neighbors/__init__.py
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -13,12 +13,9 @@
 # limitations under the License.
 
 
-from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+from cuvs.neighbors import cagra
 
 __all__ = [
-    "Index",
-    "IndexParams",
-    "SearchParams",
-    "build",
-    "search",
+    "common",
+    "cagra"
 ]
diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
new file mode 100644
index 000000000..b68f40f86
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources cagra.pyx)
+set(linked_libraries cuvs::cuvs)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_
+)
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py
new file mode 100644
index 000000000..95c9415fa
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+
+__all__ = [
+    "Index",
+    "IndexParams",
+    "SearchParams",
+    "build",
+    "search",
+]
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.py b/python/cuvs/cuvs/neighbors/cagra/c/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/c/cagra_c.pxd b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
similarity index 82%
rename from python/cuvs/cuvs/neighbors/c/cagra_c.pxd
rename to python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
index 7bf15222d..8538580ec 100644
--- a/python/cuvs/cuvs/neighbors/c/cagra_c.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
@@ -15,22 +15,14 @@
 #
 # cython: language_level=3
 
-from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t
+from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
 
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 
-cdef extern from "cuvs/core/c_api.h"
-    ctypedef uintptr_t cuvsResources_t
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 
-    ctypedef enum cuvsError_t:
-        CUVS_ERROR,
-        CUVS_SUCCESS
 
-    cuvsError_t cuvsResourcesCreate(cuvsResources_t* res)
-    cuvsError_t cuvsResourcesDestroy(cuvsResources_t res)
-    cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream)
-
-
-cdef extern from "cuvs/neighborscagra_c.h" nogil:
+cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
 
     ctypedef enum cagraGraphBuildAlgo:
         IVF_PQ
@@ -44,7 +36,7 @@ cdef extern from "cuvs/neighborscagra_c.h" nogil:
         size_t nn_descent_niter
 
 
-    ctypedef enum search_algo:
+    ctypedef enum cagraSearchAlgo:
         SINGLE_CTA,
         MULTI_CTA,
         MULTI_KERNEL,
@@ -81,7 +73,7 @@ cdef extern from "cuvs/neighborscagra_c.h" nogil:
     cuvsError_t cagraIndexDestroy(cagraIndex_t index)
 
     cuvsError_t cagraBuild(cuvsResources_t res,
-                           struct cagraIndexParams params,
+                           cagraIndexParams params,
                            DLManagedTensor* dataset,
                            cagraIndex_t index);
 
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
new file mode 100644
index 000000000..83e4a3acf
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -0,0 +1 @@
+cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c
diff --git a/python/cuvs/cuvs/neighbors/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
similarity index 91%
rename from python/cuvs/cuvs/neighbors/cagra.pyx
rename to python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index e07abd388..f9e523202 100644
--- a/python/cuvs/cuvs/neighbors/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -19,7 +19,7 @@ import numpy as np
 cimport cuvs.common.cydlpack
 
 from cuvs.common.temp_raft import auto_sync_resources
-from cuvs.common.cydlpack import dplack_c
+from cuvs.common cimport cydlpack
 
 from cython.operator cimport dereference as deref
 
@@ -32,9 +32,11 @@ from pylibraft.common import (
 from pylibraft.common.cai_wrapper import wrap_array
 from pylibraft.common.interruptible import cuda_interruptible
 
-cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c
-
 from pylibraft.neighbors.common import _check_input_array
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from pylibraft.common.handle cimport device_resources
+
+from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
 
 
 cdef class IndexParams:
@@ -59,7 +61,7 @@ cdef class IndexParams:
               building the knn graph. It is expected to be generally
               faster than ivf_pq.
     """
-    cdef cagra_c.index_params params
+    cdef cagra_c.cagraIndexParams params
 
     def __init__(self, *,
                  metric="sqeuclidean",
@@ -101,23 +103,22 @@ cdef class IndexParams:
 
 
 cdef class Index:
-    cdef cagraIndex_t index
+    cdef cagra_c.cagraIndex_t index
 
     def __cinit__(self):
         cdef cuvsError_t index_create_status
-        index_create_status = cuvsCagraIndexCreate(&self.index)
+        index_create_status = cagra_c.cagraIndexCreate(&self.index)
         self.trained = False
 
-        if index_create_status == cagra_c.cuvsError_t.CUVS_ERROR:
+        if index_create_status == cuvsError_t.CUVS_ERROR:
             raise Exception("FAIL")
 
     def __dealloc__(self):
+        cdef cuvsError_t index_destroy_status
         if self.index is not NULL:
-            cdef cuvsError_t index_destroy_status
-            index_destroy_status = cagraIndexDestroy(&self.index)
-            if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR:
+            index_destroy_status = cagra_c.cagraIndexDestroy(self.index)
+            if index_destroy_status == cuvsError_t.CUVS_ERROR:
                 raise Exception("FAIL")
-            del self.index
 
     def __repr__(self):
         # todo(dgd): update repr as we expose data through C API
@@ -181,21 +182,21 @@ def build_index(IndexParams index_params, dataset, resources=None):
     if resources is None:
         resources = DeviceResources()
     cdef cuvsResources_t* resources_ = \
-        <cuvsResources_t*><size_t>handle.getHandle()
+        <cuvsResources_t*><size_t>resources.getHandle()
 
     cdef Index idx = Index()
+    cdef cuvsError_t build_status
+    cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai)
 
     with cuda_interruptible():
-        cdef cuvsError_t build_status
-
         build_status = cagra_c.cagraBuild(
             deref(resources_),
             index_params.params,
-            <DLManagedTensor*> &dplack_c(dataset_ai),
-            deref(idx.index)
+            &dataset_dlpack,
+            idx.index
         )
 
-        if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR:
+        if build_status == cagra_c.cuvsError_t.CUVS_ERROR:
             raise RuntimeError("Index failed to build.")
         else:
             idx.trained = True
@@ -252,7 +253,7 @@ cdef class SearchParams:
     rand_xor_mask: int, default = 0x128394
         Bit mask used for initial random seed node selection.
     """
-    cdef cagra_c.search_params params
+    cdef cagra_c.cagraSearchParams params
 
     def __init__(self, *,
                  max_queries=0,
@@ -291,7 +292,7 @@ cdef class SearchParams:
         elif hashmap_mode == "small":
             self.params.hashmap_mode = cagra_c.cagraHashMode.SMALL
         elif hashmap_mode == "auto":
-            self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO
+            self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO_HASH
         else:
             raise ValueError("`hashmap_mode` value not supported.")
 
@@ -372,7 +373,7 @@ def search(SearchParams search_params,
            neighbors=None,
            distances=None,
            resources=None):
-     """
+    """
     Find the k nearest neighbors for each query.
 
     Parameters
@@ -424,7 +425,6 @@ def search(SearchParams search_params,
     >>> neighbors = cp.asarray(neighbors)
     >>> distances = cp.asarray(distances)
     """
-
     if not index.trained:
         raise ValueError("Index need to be built before calling search.")
 
@@ -456,16 +456,19 @@ def search(SearchParams search_params,
     _check_input_array(distances_cai, [np.dtype('float32')],
                        exp_rows=n_queries, exp_cols=k)
 
-    cdef cagra_c.search_params params = search_params.params
+    cdef cagra_c.cagraSearchParams params = search_params.params
+    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
         cagra_c.cagraSearch(
-            deref(resources_),
+            <cuvsResources_t> resources_,
             params,
-            deref(idx_float.index),
-            <DLManagedTensor*> &dplack_c(queries_cai),
-            <DLManagedTensor*> &dplack_c(neighbors_cai),
-            <DLManagedTensor*> &dplack_c(distances_cai)
+            index.index,
+            &queries_dlpack,
+            &neighbors_dlpack,
+            &distances_dlpack
         )
 
     return (distances, neighbors)

From 667cef59dca62de5cbaaca634670939c60892bbd Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Thu, 22 Feb 2024 22:04:28 -0600
Subject: [PATCH 06/45] Multiple small fixes and improvements

---
 python/cuvs/cuvs/common/CMakeLists.txt        |   2 +-
 python/cuvs/cuvs/common/__init__.py           |  11 +-
 python/cuvs/cuvs/common/temp_raft.py          |  17 +-
 .../cuvs/cuvs/neighbors/cagra/CMakeLists.txt  |   2 +-
 python/cuvs/cuvs/neighbors/cagra/__init__.py  |   5 +-
 .../cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd   |   4 +-
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx    | 222 +++++++++---------
 7 files changed, 136 insertions(+), 127 deletions(-)

diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
index c5f623f4b..2cf1ff19c 100644
--- a/python/cuvs/cuvs/common/CMakeLists.txt
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 # Set the list of Cython files to build
 set(cython_sources cydlpack.pyx)
-set(linked_libraries cuvs::cuvs)
+set(linked_libraries cuvs::cuvs cuvs_c)
 
 # Build all of the Cython targets
 rapids_cython_create_modules(
diff --git a/python/cuvs/cuvs/common/__init__.py b/python/cuvs/cuvs/common/__init__.py
index 1453e30bc..ee66407bb 100644
--- a/python/cuvs/cuvs/common/__init__.py
+++ b/python/cuvs/cuvs/common/__init__.py
@@ -13,14 +13,9 @@
 # limitations under the License.
 
 
-from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+from .temp_raft import auto_sync_resources
+
 
 __all__ = [
-    "Index",
-    "IndexParams",
-    "SearchParams",
-    "build",
-    "load",
-    "save",
-    "search",
+    "auto_sync_resources"
 ]
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
index ac1dbdc1a..e67a63929 100644
--- a/python/cuvs/cuvs/common/temp_raft.py
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -18,6 +18,21 @@
 
 # This file has code that will be upstreamed to RAFT
 
+import functools
+
+from pylibraft.common import DeviceResources
+
+
+_resources_param_string = """
+     handle : Optional RAFT resource handle for reusing CUDA resources.
+        If a handle isn't supplied, CUDA resources will be
+        allocated inside this function and synchronized before the
+        function exits. If a handle is supplied, you will need to
+        explicitly synchronize yourself by calling `handle.sync()`
+        before accessing the output.
+""".strip()
+
+
 def auto_sync_resources(f):
     """
     This is identical to auto_sync_handle except for the proposed name change.
@@ -36,6 +51,6 @@ def wrapper(*args, resources=None, **kwargs):
         return ret_value
 
     wrapper.__doc__ = wrapper.__doc__.format(
-        handle_docstring=_HANDLE_PARAM_DOCSTRING
+        resources_docstring=_resources_param_string
     )
     return wrapper
diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
index b68f40f86..882b88646 100644
--- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 # Set the list of Cython files to build
 set(cython_sources cagra.pyx)
-set(linked_libraries cuvs::cuvs)
+set(linked_libraries cuvs::cuvs cuvs_c)
 
 # Build all of the Cython targets
 rapids_cython_create_modules(
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py
index 95c9415fa..c3690da87 100644
--- a/python/cuvs/cuvs/neighbors/cagra/__init__.py
+++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 
 
-from .cagra import Index, IndexParams, SearchParams, build, load, save, search
+from .cagra import Index, IndexParams, SearchParams, build_index
 
 __all__ = [
     "Index",
     "IndexParams",
     "SearchParams",
-    "build",
-    "search",
+    "build_index",
 ]
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
index 8538580ec..c8ba198c8 100644
--- a/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
@@ -73,12 +73,12 @@ cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
     cuvsError_t cagraIndexDestroy(cagraIndex_t index)
 
     cuvsError_t cagraBuild(cuvsResources_t res,
-                           cagraIndexParams params,
+                           cagraIndexParams* params,
                            DLManagedTensor* dataset,
                            cagraIndex_t index);
 
     cuvsError_t cagraSearch(cuvsResources_t res,
-                            cagraSearchParams params,
+                            cagraSearchParams* params,
                             cagraIndex_t index,
                             DLManagedTensor* queries,
                             DLManagedTensor* neighbors,
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index f9e523202..8723e2541 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -146,7 +146,7 @@ def build_index(IndexParams index_params, dataset, resources=None):
     index_params : IndexParams object
     dataset : CUDA array interface compliant matrix shape (n_samples, dim)
         Supported dtype [float, int8, uint8]
-    {handle_docstring}
+    {resources_docstring}
 
     Returns
     -------
@@ -187,11 +187,12 @@ def build_index(IndexParams index_params, dataset, resources=None):
     cdef Index idx = Index()
     cdef cuvsError_t build_status
     cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai)
+    cdef cagra_c.cagraIndexParams* params = &index_params.params
 
     with cuda_interruptible():
         build_status = cagra_c.cagraBuild(
             deref(resources_),
-            index_params.params,
+            params,
             &dataset_dlpack,
             idx.index
         )
@@ -363,112 +364,111 @@ cdef class SearchParams:
     def rand_xor_mask(self):
         return self.params.rand_xor_mask
 
-
-@auto_sync_resources
-@auto_convert_output
-def search(SearchParams search_params,
-           Index index,
-           queries,
-           k,
-           neighbors=None,
-           distances=None,
-           resources=None):
-    """
-    Find the k nearest neighbors for each query.
-
-    Parameters
-    ----------
-    search_params : SearchParams
-    index : Index
-        Trained CAGRA index.
-    queries : CUDA array interface compliant matrix shape (n_samples, dim)
-        Supported dtype [float, int8, uint8]
-    k : int
-        The number of neighbors.
-    neighbors : Optional CUDA array interface compliant matrix shape
-                (n_queries, k), dtype int64_t. If supplied, neighbor
-                indices will be written here in-place. (default None)
-    distances : Optional CUDA array interface compliant matrix shape
-                (n_queries, k) If supplied, the distances to the
-                neighbors will be written here in-place. (default None)
-    {handle_docstring}
-
-    Examples
-    --------
-    >>> import cupy as cp
-    >>> from pylibraft.common import DeviceResources
-    >>> from pylibraft.neighbors import cagra
-    >>> n_samples = 50000
-    >>> n_features = 50
-    >>> n_queries = 1000
-    >>> dataset = cp.random.random_sample((n_samples, n_features),
-    ...                                   dtype=cp.float32)
-    >>> # Build index
-    >>> handle = DeviceResources()
-    >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
-    >>> # Search using the built index
-    >>> queries = cp.random.random_sample((n_queries, n_features),
-    ...                                   dtype=cp.float32)
-    >>> k = 10
-    >>> search_params = cagra.SearchParams(
-    ...     max_queries=100,
-    ...     itopk_size=64
-    ... )
-    >>> # Using a pooling allocator reduces overhead of temporary array
-    >>> # creation during search. This is useful if multiple searches
-    >>> # are performad with same query size.
-    >>> distances, neighbors = cagra.search(search_params, index, queries,
-    ...                                     k, handle=handle)
-    >>> # pylibraft functions are often asynchronous so the
-    >>> # handle needs to be explicitly synchronized
-    >>> handle.sync()
-    >>> neighbors = cp.asarray(neighbors)
-    >>> distances = cp.asarray(distances)
-    """
-    if not index.trained:
-        raise ValueError("Index need to be built before calling search.")
-
-    if resources is None:
-        resources = DeviceResources()
-    cdef device_resources* resources_ = \
-        <device_resources*><size_t>resources.getHandle()
-
-    # todo(dgd): we can make the check of dtype a parameter of wrap_array
-    # in RAFT to make this a single call
-    queries_cai = cai_wrapper(queries)
-    _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
-                                     np.dtype('ubyte')],
-                       exp_cols=index.dim)
-
-    cdef uint32_t n_queries = queries_cai.shape[0]
-
-    if neighbors is None:
-        neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
-
-    neighbors_cai = cai_wrapper(neighbors)
-    _check_input_array(neighbors_cai, [np.dtype('uint32')],
-                       exp_rows=n_queries, exp_cols=k)
-
-    if distances is None:
-        distances = device_ndarray.empty((n_queries, k), dtype='float32')
-
-    distances_cai = cai_wrapper(distances)
-    _check_input_array(distances_cai, [np.dtype('float32')],
-                       exp_rows=n_queries, exp_cols=k)
-
-    cdef cagra_c.cagraSearchParams params = search_params.params
-    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
-    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
-    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
-
-    with cuda_interruptible():
-        cagra_c.cagraSearch(
-            <cuvsResources_t> resources_,
-            params,
-            index.index,
-            &queries_dlpack,
-            &neighbors_dlpack,
-            &distances_dlpack
-        )
-
-    return (distances, neighbors)
+# @auto_sync_resources
+# @auto_convert_output
+# def search(SearchParams search_params,
+#            Index index,
+#            queries,
+#            k,
+#            neighbors=None,
+#            distances=None,
+#            resources=None):
+#     """
+#     Find the k nearest neighbors for each query.
+
+#     Parameters
+#     ----------
+#     search_params : SearchParams
+#     index : Index
+#         Trained CAGRA index.
+#     queries : CUDA array interface compliant matrix shape (n_samples, dim)
+#         Supported dtype [float, int8, uint8]
+#     k : int
+#         The number of neighbors.
+#     neighbors : Optional CUDA array interface compliant matrix shape
+#                 (n_queries, k), dtype int64_t. If supplied, neighbor
+#                 indices will be written here in-place. (default None)
+#     distances : Optional CUDA array interface compliant matrix shape
+#                 (n_queries, k) If supplied, the distances to the
+#                 neighbors will be written here in-place. (default None)
+#     {resources_docstring}
+
+#     Examples
+#     --------
+#     >>> import cupy as cp
+#     >>> from pylibraft.common import DeviceResources
+#     >>> from pylibraft.neighbors import cagra
+#     >>> n_samples = 50000
+#     >>> n_features = 50
+#     >>> n_queries = 1000
+#     >>> dataset = cp.random.random_sample((n_samples, n_features),
+#     ...                                   dtype=cp.float32)
+#     >>> # Build index
+#     >>> handle = DeviceResources()
+#     >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
+#     >>> # Search using the built index
+#     >>> queries = cp.random.random_sample((n_queries, n_features),
+#     ...                                   dtype=cp.float32)
+#     >>> k = 10
+#     >>> search_params = cagra.SearchParams(
+#     ...     max_queries=100,
+#     ...     itopk_size=64
+#     ... )
+#     >>> # Using a pooling allocator reduces overhead of temporary array
+#     >>> # creation during search. This is useful if multiple searches
+#     >>> # are performad with same query size.
+#     >>> distances, neighbors = cagra.search(search_params, index, queries,
+#     ...                                     k, handle=handle)
+#     >>> # pylibraft functions are often asynchronous so the
+#     >>> # handle needs to be explicitly synchronized
+#     >>> handle.sync()
+#     >>> neighbors = cp.asarray(neighbors)
+#     >>> distances = cp.asarray(distances)
+#     """
+#     if not index.trained:
+#         raise ValueError("Index need to be built before calling search.")
+
+#     if resources is None:
+#         resources = DeviceResources()
+#     cdef device_resources* resources_ = \
+#         <device_resources*><size_t>resources.getHandle()
+
+#     # todo(dgd): we can make the check of dtype a parameter of wrap_array
+#     # in RAFT to make this a single call
+#     queries_cai = cai_wrapper(queries)
+#     _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
+#                                      np.dtype('ubyte')],
+#                        exp_cols=index.dim)
+
+#     cdef uint32_t n_queries = queries_cai.shape[0]
+
+#     if neighbors is None:
+#         neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
+
+#     neighbors_cai = cai_wrapper(neighbors)
+#     _check_input_array(neighbors_cai, [np.dtype('uint32')],
+#                        exp_rows=n_queries, exp_cols=k)
+
+#     if distances is None:
+#         distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+#     distances_cai = cai_wrapper(distances)
+#     _check_input_array(distances_cai, [np.dtype('float32')],
+#                        exp_rows=n_queries, exp_cols=k)
+
+#     cdef cagra_c.cagraSearchParams* params = &search_params.params
+#     cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
+#     cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
+#     cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
+
+#     with cuda_interruptible():
+#         cagra_c.cagraSearch(
+#             <cuvsResources_t> resources_,
+#             params,
+#             index.index,
+#             &queries_dlpack,
+#             &neighbors_dlpack,
+#             &distances_dlpack
+#         )
+
+#     return (distances, neighbors)

From a8de38f98e66d6b75fd938aaea36d67bfe8128b6 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 23 Feb 2024 13:01:45 -0600
Subject: [PATCH 07/45] FIX commented code

---
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx | 216 ++++++++++-----------
 1 file changed, 108 insertions(+), 108 deletions(-)

diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index 8723e2541..0b668dbba 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -364,111 +364,111 @@ cdef class SearchParams:
     def rand_xor_mask(self):
         return self.params.rand_xor_mask
 
-# @auto_sync_resources
-# @auto_convert_output
-# def search(SearchParams search_params,
-#            Index index,
-#            queries,
-#            k,
-#            neighbors=None,
-#            distances=None,
-#            resources=None):
-#     """
-#     Find the k nearest neighbors for each query.
-
-#     Parameters
-#     ----------
-#     search_params : SearchParams
-#     index : Index
-#         Trained CAGRA index.
-#     queries : CUDA array interface compliant matrix shape (n_samples, dim)
-#         Supported dtype [float, int8, uint8]
-#     k : int
-#         The number of neighbors.
-#     neighbors : Optional CUDA array interface compliant matrix shape
-#                 (n_queries, k), dtype int64_t. If supplied, neighbor
-#                 indices will be written here in-place. (default None)
-#     distances : Optional CUDA array interface compliant matrix shape
-#                 (n_queries, k) If supplied, the distances to the
-#                 neighbors will be written here in-place. (default None)
-#     {resources_docstring}
-
-#     Examples
-#     --------
-#     >>> import cupy as cp
-#     >>> from pylibraft.common import DeviceResources
-#     >>> from pylibraft.neighbors import cagra
-#     >>> n_samples = 50000
-#     >>> n_features = 50
-#     >>> n_queries = 1000
-#     >>> dataset = cp.random.random_sample((n_samples, n_features),
-#     ...                                   dtype=cp.float32)
-#     >>> # Build index
-#     >>> handle = DeviceResources()
-#     >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
-#     >>> # Search using the built index
-#     >>> queries = cp.random.random_sample((n_queries, n_features),
-#     ...                                   dtype=cp.float32)
-#     >>> k = 10
-#     >>> search_params = cagra.SearchParams(
-#     ...     max_queries=100,
-#     ...     itopk_size=64
-#     ... )
-#     >>> # Using a pooling allocator reduces overhead of temporary array
-#     >>> # creation during search. This is useful if multiple searches
-#     >>> # are performad with same query size.
-#     >>> distances, neighbors = cagra.search(search_params, index, queries,
-#     ...                                     k, handle=handle)
-#     >>> # pylibraft functions are often asynchronous so the
-#     >>> # handle needs to be explicitly synchronized
-#     >>> handle.sync()
-#     >>> neighbors = cp.asarray(neighbors)
-#     >>> distances = cp.asarray(distances)
-#     """
-#     if not index.trained:
-#         raise ValueError("Index need to be built before calling search.")
-
-#     if resources is None:
-#         resources = DeviceResources()
-#     cdef device_resources* resources_ = \
-#         <device_resources*><size_t>resources.getHandle()
-
-#     # todo(dgd): we can make the check of dtype a parameter of wrap_array
-#     # in RAFT to make this a single call
-#     queries_cai = cai_wrapper(queries)
-#     _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
-#                                      np.dtype('ubyte')],
-#                        exp_cols=index.dim)
-
-#     cdef uint32_t n_queries = queries_cai.shape[0]
-
-#     if neighbors is None:
-#         neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
-
-#     neighbors_cai = cai_wrapper(neighbors)
-#     _check_input_array(neighbors_cai, [np.dtype('uint32')],
-#                        exp_rows=n_queries, exp_cols=k)
-
-#     if distances is None:
-#         distances = device_ndarray.empty((n_queries, k), dtype='float32')
-
-#     distances_cai = cai_wrapper(distances)
-#     _check_input_array(distances_cai, [np.dtype('float32')],
-#                        exp_rows=n_queries, exp_cols=k)
-
-#     cdef cagra_c.cagraSearchParams* params = &search_params.params
-#     cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
-#     cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
-#     cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
-
-#     with cuda_interruptible():
-#         cagra_c.cagraSearch(
-#             <cuvsResources_t> resources_,
-#             params,
-#             index.index,
-#             &queries_dlpack,
-#             &neighbors_dlpack,
-#             &distances_dlpack
-#         )
-
-#     return (distances, neighbors)
+@auto_sync_resources
+@auto_convert_output
+def search(SearchParams search_params,
+           Index index,
+           queries,
+           k,
+           neighbors=None,
+           distances=None,
+           resources=None):
+    """
+    Find the k nearest neighbors for each query.
+
+    Parameters
+    ----------
+    search_params : SearchParams
+    index : Index
+        Trained CAGRA index.
+    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    k : int
+        The number of neighbors.
+    neighbors : Optional CUDA array interface compliant matrix shape
+                (n_queries, k), dtype int64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    distances : Optional CUDA array interface compliant matrix shape
+                (n_queries, k) If supplied, the distances to the
+                neighbors will be written here in-place. (default None)
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from pylibraft.common import DeviceResources
+    >>> from pylibraft.neighbors import cagra
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> # Build index
+    >>> handle = DeviceResources()
+    >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
+    >>> # Search using the built index
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> k = 10
+    >>> search_params = cagra.SearchParams(
+    ...     max_queries=100,
+    ...     itopk_size=64
+    ... )
+    >>> # Using a pooling allocator reduces overhead of temporary array
+    >>> # creation during search. This is useful if multiple searches
+    >>> # are performad with same query size.
+    >>> distances, neighbors = cagra.search(search_params, index, queries,
+    ...                                     k, handle=handle)
+    >>> # pylibraft functions are often asynchronous so the
+    >>> # handle needs to be explicitly synchronized
+    >>> handle.sync()
+    >>> neighbors = cp.asarray(neighbors)
+    >>> distances = cp.asarray(distances)
+    """
+    if not index.trained:
+        raise ValueError("Index need to be built before calling search.")
+
+    if resources is None:
+        resources = DeviceResources()
+    cdef device_resources* resources_ = \
+        <device_resources*><size_t>resources.getHandle()
+
+    # todo(dgd): we can make the check of dtype a parameter of wrap_array
+    # in RAFT to make this a single call
+    queries_cai = cai_wrapper(queries)
+    _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
+                                     np.dtype('ubyte')],
+                       exp_cols=index.dim)
+
+    cdef uint32_t n_queries = queries_cai.shape[0]
+
+    if neighbors is None:
+        neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
+
+    neighbors_cai = cai_wrapper(neighbors)
+    _check_input_array(neighbors_cai, [np.dtype('uint32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    if distances is None:
+        distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+    distances_cai = cai_wrapper(distances)
+    _check_input_array(distances_cai, [np.dtype('float32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    cdef cagra_c.cagraSearchParams* params = &search_params.params
+    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
+
+    with cuda_interruptible():
+        cagra_c.cagraSearch(
+            <cuvsResources_t> resources_,
+            params,
+            index.index,
+            &queries_dlpack,
+            &neighbors_dlpack,
+            &distances_dlpack
+        )
+
+    return (distances, neighbors)

From 7be7ec5f22803049c3e53fbe136c0a37e55cb17f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 26 Feb 2024 20:16:18 -0600
Subject: [PATCH 08/45] ENH fixes and enhancements from live review

---
 ci/test_python.sh                             | 22 ++---
 conda/recipes/cuvs/build.sh                   |  2 +-
 conda/recipes/cuvs/meta.yaml                  |  3 +-
 python/cuvs/cuvs/common/cydlpack.pyx          |  1 +
 .../cuvs/cuvs/neighbors/cagra/c/__init__.pxd  |  0
 .../cuvs/cuvs/neighbors/cagra/c/__init__.py   |  0
 .../cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd   | 85 ------------------
 python/cuvs/cuvs/neighbors/cagra/cagra.pxd    | 87 ++++++++++++++++++-
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx    | 43 +++++----
 9 files changed, 124 insertions(+), 119 deletions(-)
 delete mode 100644 python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd
 delete mode 100644 python/cuvs/cuvs/neighbors/cagra/c/__init__.py
 delete mode 100644 python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd

diff --git a/ci/test_python.sh b/ci/test_python.sh
index 9f0c9d6ee..448da668d 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -40,17 +40,17 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-#rapids-logger "pytest cuvs"
-#pushd python/cuvs/cuvs
-#pytest \
-#  --cache-clear \
-#  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
-#  --cov-config=../.coveragerc \
-#  --cov=cuvs \
-#  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
-#  --cov-report=term \
-#  test
-#popd
+rapids-logger "pytest cuvs"
+pushd python/cuvs/cuvs
+pytest \
+ --cache-clear \
+ --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
+ --cov-config=../.coveragerc \
+ --cov=cuvs \
+ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
+ --cov-report=term \
+ test
+popd
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/conda/recipes/cuvs/build.sh b/conda/recipes/cuvs/build.sh
index 81f762068..767d06672 100644
--- a/conda/recipes/cuvs/build.sh
+++ b/conda/recipes/cuvs/build.sh
@@ -2,4 +2,4 @@
 #!/usr/bin/env bash
 
 # This assumes the script is executed from the root of the repo directory
-./build.sh python --no-nvtx
+./build.sh python --no-nvtx -v
diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index f22bd01d5..7dd8150d3 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -48,10 +48,9 @@ requirements:
     - cython >=3.0.0
     - pylibraft {{ version }}
     - libcuvs {{ version }}
-    - numpy >=1.21
     - python x.x
     - rmm ={{ minor_version }}
-    - scikit-build >=0.13.1
+    - scikit-build-core >=0.7.0
     - setuptools
   run:
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
index 9e2f01e9c..a0976980f 100644
--- a/python/cuvs/cuvs/common/cydlpack.pyx
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -69,6 +69,7 @@ cdef DLManagedTensor dlpack_c(ary):
     tensor.data = <void*> tensor_ptr
     tensor.device = dev
     tensor.dtype = dtype
+    tensor.strides = NULL
 
     dlm.dl_tensor = tensor
     dlm.manager_ctx = NULL
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd
deleted file mode 100644
index e69de29bb..000000000
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.py b/python/cuvs/cuvs/neighbors/cagra/c/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
deleted file mode 100644
index c8ba198c8..000000000
--- a/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd
+++ /dev/null
@@ -1,85 +0,0 @@
-#
-# Copyright (c) 2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# cython: language_level=3
-
-from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
-
-from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
-
-from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
-
-
-cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
-
-    ctypedef enum cagraGraphBuildAlgo:
-        IVF_PQ
-        NN_DESCENT
-
-
-    ctypedef struct cagraIndexParams:
-        size_t intermediate_graph_degree
-        size_t graph_degree
-        cagraGraphBuildAlgo build_algo
-        size_t nn_descent_niter
-
-
-    ctypedef enum cagraSearchAlgo:
-        SINGLE_CTA,
-        MULTI_CTA,
-        MULTI_KERNEL,
-        AUTO
-
-    ctypedef enum cagraHashMode:
-        HASH,
-        SMALL,
-        AUTO_HASH
-
-    ctypedef struct cagraSearchParams:
-        size_t max_queries
-        size_t itopk_size
-        size_t max_iterations
-        cagraSearchAlgo algo
-        size_t team_size
-        size_t search_width
-        size_t min_iterations
-        size_t thread_block_size
-        cagraHashMode hashmap_mode
-        size_t hashmap_min_bitlen
-        float hashmap_max_fill_rate
-        uint32_t num_random_samplings
-        uint64_t rand_xor_mask
-
-    ctypedef struct cagraIndex:
-        uintptr_t addr
-        DLDataType dtype
-
-    ctypedef cagraIndex* cagraIndex_t
-
-    cuvsError_t cagraIndexCreate(cagraIndex_t* index)
-
-    cuvsError_t cagraIndexDestroy(cagraIndex_t index)
-
-    cuvsError_t cagraBuild(cuvsResources_t res,
-                           cagraIndexParams* params,
-                           DLManagedTensor* dataset,
-                           cagraIndex_t index);
-
-    cuvsError_t cagraSearch(cuvsResources_t res,
-                            cagraSearchParams* params,
-                            cagraIndex_t index,
-                            DLManagedTensor* queries,
-                            DLManagedTensor* neighbors,
-                            DLManagedTensor* distances)
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
index 83e4a3acf..1861cd6c5 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -1 +1,86 @@
-cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
+
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
+
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+
+
+cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
+
+    ctypedef enum cagraGraphBuildAlgo:
+        IVF_PQ
+        NN_DESCENT
+
+
+    ctypedef struct cagraIndexParams:
+        size_t intermediate_graph_degree
+        size_t graph_degree
+        cagraGraphBuildAlgo build_algo
+        size_t nn_descent_niter
+
+
+    ctypedef enum cagraSearchAlgo:
+        SINGLE_CTA,
+        MULTI_CTA,
+        MULTI_KERNEL,
+        AUTO
+
+    ctypedef enum cagraHashMode:
+        HASH,
+        SMALL,
+        AUTO_HASH
+
+    ctypedef struct cagraSearchParams:
+        size_t max_queries
+        size_t itopk_size
+        size_t max_iterations
+        cagraSearchAlgo algo
+        size_t team_size
+        size_t search_width
+        size_t min_iterations
+        size_t thread_block_size
+        cagraHashMode hashmap_mode
+        size_t hashmap_min_bitlen
+        float hashmap_max_fill_rate
+        uint32_t num_random_samplings
+        uint64_t rand_xor_mask
+
+    ctypedef struct cagraIndex:
+        uintptr_t addr
+        DLDataType dtype
+
+    ctypedef cagraIndex* cagraIndex_t
+
+    cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+
+    cuvsError_t cagraIndexDestroy(cagraIndex_t index)
+
+    cuvsError_t cagraBuild(cuvsResources_t res,
+                           cagraIndexParams* params,
+                           DLManagedTensor* dataset,
+                           cagraIndex_t index);
+
+    cuvsError_t cagraSearch(cuvsResources_t res,
+                            cagraSearchParams* params,
+                            cagraIndex_t index,
+                            DLManagedTensor* queries,
+                            DLManagedTensor* neighbors,
+                            DLManagedTensor* distances)
+
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index 0b668dbba..aef614787 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -61,7 +61,7 @@ cdef class IndexParams:
               building the knn graph. It is expected to be generally
               faster than ivf_pq.
     """
-    cdef cagra_c.cagraIndexParams params
+    cdef cagraIndexParams params
 
     def __init__(self, *,
                  metric="sqeuclidean",
@@ -76,9 +76,9 @@ cdef class IndexParams:
         self.params.intermediate_graph_degree = intermediate_graph_degree
         self.params.graph_degree = graph_degree
         if build_algo == "ivf_pq":
-            self.params.build_algo = cagra_c.cagraGraphBuildAlgo.IVF_PQ
+            self.params.build_algo = cagraGraphBuildAlgo.IVF_PQ
         elif build_algo == "nn_descent":
-            self.params.build_algo = cagra_c.cagraGraphBuildAlgo.NN_DESCENT
+            self.params.build_algo = cagraGraphBuildAlgo.NN_DESCENT
         self.params.nn_descent_niter = nn_descent_niter
 
     # @property
@@ -103,11 +103,11 @@ cdef class IndexParams:
 
 
 cdef class Index:
-    cdef cagra_c.cagraIndex_t index
+    cdef cagraIndex_t index
 
     def __cinit__(self):
         cdef cuvsError_t index_create_status
-        index_create_status = cagra_c.cagraIndexCreate(&self.index)
+        index_create_status = cagraIndexCreate(&self.index)
         self.trained = False
 
         if index_create_status == cuvsError_t.CUVS_ERROR:
@@ -116,7 +116,7 @@ cdef class Index:
     def __dealloc__(self):
         cdef cuvsError_t index_destroy_status
         if self.index is not NULL:
-            index_destroy_status = cagra_c.cagraIndexDestroy(self.index)
+            index_destroy_status = cagraIndexDestroy(self.index)
             if index_destroy_status == cuvsError_t.CUVS_ERROR:
                 raise Exception("FAIL")
 
@@ -187,17 +187,17 @@ def build_index(IndexParams index_params, dataset, resources=None):
     cdef Index idx = Index()
     cdef cuvsError_t build_status
     cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai)
-    cdef cagra_c.cagraIndexParams* params = &index_params.params
+    cdef cagraIndexParams* params = &index_params.params
 
     with cuda_interruptible():
-        build_status = cagra_c.cagraBuild(
+        build_status = cagraBuild(
             deref(resources_),
             params,
             &dataset_dlpack,
             idx.index
         )
 
-        if build_status == cagra_c.cuvsError_t.CUVS_ERROR:
+        if build_status == cuvsError_t.CUVS_ERROR:
             raise RuntimeError("Index failed to build.")
         else:
             idx.trained = True
@@ -254,7 +254,7 @@ cdef class SearchParams:
     rand_xor_mask: int, default = 0x128394
         Bit mask used for initial random seed node selection.
     """
-    cdef cagra_c.cagraSearchParams params
+    cdef cagraSearchParams params
 
     def __init__(self, *,
                  max_queries=0,
@@ -274,13 +274,13 @@ cdef class SearchParams:
         self.params.itopk_size = itopk_size
         self.params.max_iterations = max_iterations
         if algo == "single_cta":
-            self.params.algo = cagra_c.cagraSearchAlgo.SINGLE_CTA
+            self.params.algo = cagraSearchAlgo.SINGLE_CTA
         elif algo == "multi_cta":
-            self.params.algo = cagra_c.cagraSearchAlgo.MULTI_CTA
+            self.params.algo = cagraSearchAlgo.MULTI_CTA
         elif algo == "multi_kernel":
-            self.params.algo = cagra_c.cagraSearchAlgo.MULTI_KERNEL
+            self.params.algo = cagraSearchAlgo.MULTI_KERNEL
         elif algo == "auto":
-            self.params.algo = cagra_c.cagraSearchAlgo.AUTO
+            self.params.algo = cagraSearchAlgo.AUTO
         else:
             raise ValueError("`algo` value not supported.")
 
@@ -289,11 +289,11 @@ cdef class SearchParams:
         self.params.min_iterations = min_iterations
         self.params.thread_block_size = thread_block_size
         if hashmap_mode == "hash":
-            self.params.hashmap_mode = cagra_c.cagraHashMode.HASH
+            self.params.hashmap_mode = cagraHashMode.HASH
         elif hashmap_mode == "small":
-            self.params.hashmap_mode = cagra_c.cagraHashMode.SMALL
+            self.params.hashmap_mode = cagraHashMode.SMALL
         elif hashmap_mode == "auto":
-            self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO_HASH
+            self.params.hashmap_mode = cagraHashMode.AUTO_HASH
         else:
             raise ValueError("`hashmap_mode` value not supported.")
 
@@ -456,13 +456,13 @@ def search(SearchParams search_params,
     _check_input_array(distances_cai, [np.dtype('float32')],
                        exp_rows=n_queries, exp_cols=k)
 
-    cdef cagra_c.cagraSearchParams* params = &search_params.params
+    cdef cagraSearchParams* params = &search_params.params
     cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
     cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
     cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
-        cagra_c.cagraSearch(
+        cagraSearch(
             <cuvsResources_t> resources_,
             params,
             index.index,
@@ -471,4 +471,9 @@ def search(SearchParams search_params,
             &distances_dlpack
         )
 
+        if build_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Index failed to build.")
+        else:
+            idx.trained = True
+
     return (distances, neighbors)

From 3fbd19d41ac6a50423d2ee7c89adc082120e4311 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 26 Feb 2024 22:22:55 -0600
Subject: [PATCH 09/45] FIX style fixes

---
 conda/recipes/cuvs/meta.yaml                  |  2 +-
 python/cuvs/cuvs/common/__init__.py           |  5 +---
 python/cuvs/cuvs/common/c_api.pxd             |  2 +-
 python/cuvs/cuvs/common/cydlpack.pxd          |  1 +
 python/cuvs/cuvs/common/cydlpack.pyx          |  3 +-
 python/cuvs/cuvs/common/temp_raft.py          |  1 -
 python/cuvs/cuvs/neighbors/CMakeLists.txt     |  2 +-
 python/cuvs/cuvs/neighbors/__init__.py        |  5 +---
 .../cuvs/cuvs/neighbors/cagra/CMakeLists.txt  |  2 +-
 python/cuvs/cuvs/neighbors/cagra/cagra.pxd    | 17 ++++++-----
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx    | 30 ++++++++++++++-----
 python/cuvs/cuvs/test/test_cagra.py           |  2 +-
 python/cuvs/cuvs/test/test_doctests.py        |  2 +-
 13 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index 7dd8150d3..19a3d5a2c 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 # Usage:
 #   conda build . -c conda-forge -c numba -c rapidsai -c pytorch
diff --git a/python/cuvs/cuvs/common/__init__.py b/python/cuvs/cuvs/common/__init__.py
index ee66407bb..eb5666659 100644
--- a/python/cuvs/cuvs/common/__init__.py
+++ b/python/cuvs/cuvs/common/__init__.py
@@ -15,7 +15,4 @@
 
 from .temp_raft import auto_sync_resources
 
-
-__all__ = [
-    "auto_sync_resources"
-]
+__all__ = ["auto_sync_resources"]
diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd
index a8b91773f..6addbf16e 100644
--- a/python/cuvs/cuvs/common/c_api.pxd
+++ b/python/cuvs/cuvs/common/c_api.pxd
@@ -16,8 +16,8 @@
 # cython: language_level=3
 
 
-from libc.stdint cimport uintptr_t
 from cuda.ccudart cimport cudaStream_t
+from libc.stdint cimport uintptr_t
 
 
 cdef extern from "cuvs/core/c_api.h":
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
index 1da1cf4f3..66c9f3f03 100644
--- a/python/cuvs/cuvs/common/cydlpack.pxd
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -17,6 +17,7 @@
 
 from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t
 
+
 cdef extern from 'dlpack.h' nogil:
     ctypedef enum DLDeviceType:
         kDLCPU
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
index a0976980f..76ec95756 100644
--- a/python/cuvs/cuvs/common/cydlpack.pyx
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -29,7 +29,7 @@ cdef void deleter(DLManagedTensor* tensor) noexcept:
 
 
 cdef DLManagedTensor dlpack_c(ary):
-    #todo(dgd): add checking options/parameters
+    # todo(dgd): add checking options/parameters
     cdef DLDeviceType dev_type
     cdef DLDevice dev
     cdef DLDataType dtype
@@ -65,7 +65,6 @@ cdef DLManagedTensor dlpack_c(ary):
     else:
         tensor_ptr = ary.__array_interface__["data"][0]
 
-
     tensor.data = <void*> tensor_ptr
     tensor.device = dev
     tensor.dtype = dtype
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
index e67a63929..25d6ed0b4 100644
--- a/python/cuvs/cuvs/common/temp_raft.py
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -22,7 +22,6 @@
 
 from pylibraft.common import DeviceResources
 
-
 _resources_param_string = """
      handle : Optional RAFT resource handle for reusing CUDA resources.
         If a handle isn't supplied, CUDA resources will be
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
index 04c494c46..eaf418c60 100644
--- a/python/cuvs/cuvs/neighbors/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
index c3af93d75..1f8f956d9 100644
--- a/python/cuvs/cuvs/neighbors/__init__.py
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -15,7 +15,4 @@
 
 from cuvs.neighbors import cagra
 
-__all__ = [
-    "common",
-    "cagra"
-]
+__all__ = ["common", "cagra"]
diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
index 882b88646..377cfe779 100644
--- a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
+++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
index 1861cd6c5..269f046bf 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -15,11 +15,17 @@
 #
 # cython: language_level=3
 
-from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
-
-from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
 
 from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 
 
 cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
@@ -28,14 +34,12 @@ cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
         IVF_PQ
         NN_DESCENT
 
-
     ctypedef struct cagraIndexParams:
         size_t intermediate_graph_degree
         size_t graph_degree
         cagraGraphBuildAlgo build_algo
         size_t nn_descent_niter
 
-
     ctypedef enum cagraSearchAlgo:
         SINGLE_CTA,
         MULTI_CTA,
@@ -75,7 +79,7 @@ cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
     cuvsError_t cagraBuild(cuvsResources_t res,
                            cagraIndexParams* params,
                            DLManagedTensor* dataset,
-                           cagraIndex_t index);
+                           cagraIndex_t index)
 
     cuvsError_t cagraSearch(cuvsResources_t res,
                             cagraSearchParams* params,
@@ -83,4 +87,3 @@ cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
                             DLManagedTensor* queries,
                             DLManagedTensor* neighbors,
                             DLManagedTensor* distances)
-
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index aef614787..4cc7e4c1b 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -16,13 +16,15 @@
 # cython: language_level=3
 
 import numpy as np
+
 cimport cuvs.common.cydlpack
 
 from cuvs.common.temp_raft import auto_sync_resources
-from cuvs.common cimport cydlpack
 
 from cython.operator cimport dereference as deref
 
+from cuvs.common cimport cydlpack
+
 from pylibraft.common import (
     DeviceResources,
     auto_convert_output,
@@ -31,12 +33,19 @@ from pylibraft.common import (
 )
 from pylibraft.common.cai_wrapper import wrap_array
 from pylibraft.common.interruptible import cuda_interruptible
-
 from pylibraft.neighbors.common import _check_input_array
-from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
 from pylibraft.common.handle cimport device_resources
 
-from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 
 
 cdef class IndexParams:
@@ -186,7 +195,8 @@ def build_index(IndexParams index_params, dataset, resources=None):
 
     cdef Index idx = Index()
     cdef cuvsError_t build_status
-    cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai)
+    cdef cydlpack.DLManagedTensor dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
     cdef cagraIndexParams* params = &index_params.params
 
     with cuda_interruptible():
@@ -364,6 +374,7 @@ cdef class SearchParams:
     def rand_xor_mask(self):
         return self.params.rand_xor_mask
 
+
 @auto_sync_resources
 @auto_convert_output
 def search(SearchParams search_params,
@@ -457,9 +468,12 @@ def search(SearchParams search_params,
                        exp_rows=n_queries, exp_cols=k)
 
     cdef cagraSearchParams* params = &search_params.params
-    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
-    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
-    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
+    cdef cydlpack.DLManagedTensor queries_dlpack = \
+        cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor neighbors_dlpack = \
+        cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor distances_dlpack = \
+        cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
         cagraSearch(
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
index 38745f55b..78fbc5828 100644
--- a/python/cuvs/cuvs/test/test_cagra.py
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -15,10 +15,10 @@
 
 import numpy as np
 import pytest
+from pylibraft.common import device_ndarray
 from sklearn.neighbors import NearestNeighbors
 from sklearn.preprocessing import normalize
 
-from pylibraft.common import device_ndarray
 from cuvs.neighbors import cagra
 from cuvs.test.ann_utils import calc_recall, generate_data
 
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py
index c75f56523..331b0f7f3 100644
--- a/python/cuvs/cuvs/test/test_doctests.py
+++ b/python/cuvs/cuvs/test/test_doctests.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 244350e066778bae83f38d31f83ff487ce95061a Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 26 Feb 2024 22:23:59 -0600
Subject: [PATCH 10/45] FIX typo in parameter

---
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index 4cc7e4c1b..f1c0c9af5 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -468,16 +468,13 @@ def search(SearchParams search_params,
                        exp_rows=n_queries, exp_cols=k)
 
     cdef cagraSearchParams* params = &search_params.params
-    cdef cydlpack.DLManagedTensor queries_dlpack = \
-        cydlpack.dlpack_c(queries_cai)
-    cdef cydlpack.DLManagedTensor neighbors_dlpack = \
-        cydlpack.dlpack_c(neighbors_cai)
-    cdef cydlpack.DLManagedTensor distances_dlpack = \
-        cydlpack.dlpack_c(distances_cai)
+    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
         cagraSearch(
-            <cuvsResources_t> resources_,
+            deref(resources_),
             params,
             index.index,
             &queries_dlpack,

From 3136d551ccec9df230e4896e40eaeab8e2277b66 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Thu, 29 Feb 2024 20:12:48 -0600
Subject: [PATCH 11/45] Multiple Cython, build and CI improvements and fixes

---
 .../cuda11.8-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda11.8-pip/devcontainer.json  |   2 +-
 .../cuda12.0-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda12.0-pip/devcontainer.json  |   2 +-
 .github/CODEOWNERS                            |  18 ++-
 .github/workflows/build.yaml                  |  12 +-
 .github/workflows/pr.yaml                     |  22 +--
 .github/workflows/test.yaml                   |   8 +-
 README.md                                     |   2 +-
 VERSION                                       |   2 +-
 build.sh                                      |   2 +-
 ci/build_docs.sh                              |   2 +-
 cpp/CMakeLists.txt                            |   4 +-
 cpp/doxygen/Doxyfile                          |   2 +-
 .../cmake/thirdparty/fetch_rapids.cmake       |   2 +-
 cpp/src/neighbors/cagra_c.cpp                 |   6 +-
 docs/source/build.md                          |   4 +-
 docs/source/conf.py                           |   4 +-
 docs/source/developer_guide.md                |   8 +-
 fetch_rapids.cmake                            |   2 +-
 python/cuvs/CMakeLists.txt                    |  15 +-
 python/cuvs/README.md                         |   0
 python/cuvs/cuvs/__init__.py                  |   2 +-
 python/cuvs/cuvs/common/cydlpack.pxd          |   4 +-
 python/cuvs/cuvs/common/cydlpack.pyx          |  43 ++++--
 python/cuvs/cuvs/common/temp_raft.py          |   4 +-
 python/cuvs/cuvs/neighbors/cagra/__init__.py  |   9 +-
 python/cuvs/cuvs/neighbors/cagra/cagra.pxd    |  52 ++++---
 python/cuvs/cuvs/neighbors/cagra/cagra.pyx    | 134 ++++++++++--------
 python/cuvs/cuvs/test/test_cagra.py           |   4 +-
 python/cuvs/cuvs/test/test_doctests.py        |  19 +--
 python/cuvs/pyproject.toml                    |  17 ++-
 python/cuvs/setup.cfg                         |   3 +-
 python/cuvs/setup.py                          |  37 -----
 34 files changed, 235 insertions(+), 216 deletions(-)
 create mode 100644 python/cuvs/README.md
 delete mode 100644 python/cuvs/setup.py

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 76ce8599a..cefbea72b 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 3a126b36e..05518805a 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
index 426aaef98..3f89836e2 100644
--- a/.devcontainer/cuda12.0-conda/devcontainer.json
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
index 1ef2fdcb6..33f67cd3f 100644
--- a/.devcontainer/cuda12.0-pip/devcontainer.json
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 407c5448e..01dbcfc83 100755
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -4,12 +4,18 @@ cpp/               @rapidsai/cuvs-cpp-codeowners
 #python code owners
 python/            @rapidsai/cuvs-python-codeowners
 
-#cmake code owners
-**/CMakeLists.txt  @rapidsai/cuvs-cmake-codeowners
-**/cmake/          @rapidsai/cuvs-cmake-codeowners
-python/setup.py    @rapidsai/cuvs-cmake-codeowners
-build.sh           @rapidsai/cuvs-cmake-codeowners
-**/build.sh        @rapidsai/cuvs-cmake-codeowners
+#rust code owners
+rust/              @rapidsai/cuvs-rust-codeowners
+
+#docs code owners
+docs/              @rapidsai/cuvs-docs-codeowners
+
+#build code owners
+**/CMakeLists.txt  @rapidsai/cuvs-build-codeowners
+**/cmake/          @rapidsai/cuvs-build-codeowners
+python/setup.py    @rapidsai/cuvs-build-codeowners
+build.sh           @rapidsai/cuvs-build-codeowners
+**/build.sh        @rapidsai/cuvs-build-codeowners
 
 #build/ops code owners
 .github/           @rapidsai/ops-codeowners
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index f079b5e78..34cf1f5b0 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -79,7 +79,7 @@ jobs:
   wheel-publish-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b190a2a62..5799f5108 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,29 +23,29 @@ jobs:
       - wheel-tests-cuvs
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: pull-request
       enable_check_symbols: true
@@ -53,19 +53,19 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -75,20 +75,20 @@ jobs:
   wheel-build-cuvs:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/build_wheel_cuvs.sh
   wheel-tests-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/test_wheel_cuvs.sh
   devcontainer:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
     with:
       build_command: |
         sccache -z;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index acea5755f..0e66113f2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -26,7 +26,7 @@ jobs:
       symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass)
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/README.md b/README.md
index e6da1432e..dfba9eb4a 100755
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ mamba install -c conda-forge -c nvidia -c rapidsai pycuvs
 ### Nightlies
 If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
 ```bash
-mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.02*
+mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.04*
 ```
 
 Please see the [Build and Install Guide](docs/source/build.md) for more information on installing cuVS and building from source.
diff --git a/VERSION b/VERSION
index 3c6c5e2b7..4a2fe8aa5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.02.00
+24.04.00
diff --git a/build.sh b/build.sh
index db72bcf46..c6e09c5bf 100755
--- a/build.sh
+++ b/build.sh
@@ -60,7 +60,7 @@ BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${CUVS_DASK_BUILD_DIR}"
 CMAKE_LOG_LEVEL=""
 VERBOSE_FLAG=""
 BUILD_ALL_GPU_ARCH=0
-BUILD_TESTS=OFF
+BUILD_TESTS=ON
 BUILD_TYPE=Release
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 0706b1fca..983e97385 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -24,7 +24,7 @@ rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   libcuvs
 
-export RAPIDS_VERSION_NUMBER="24.02"
+export RAPIDS_VERSION_NUMBER="24.04"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build CPP docs"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ea72eac63..c291c14e3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -10,8 +10,8 @@
 # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
-set(RAPIDS_VERSION "24.02")
-set(CUVS_VERSION "24.02.00")
+set(RAPIDS_VERSION "24.04")
+set(CUVS_VERSION "24.04.00")
 
 cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../fetch_rapids.cmake)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 0a2c7f8f6..94304afe0 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "cuVS C++ API"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "24.02"
+PROJECT_NUMBER         = "24.04"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/cpp/examples/cmake/thirdparty/fetch_rapids.cmake b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
index 4da917e26..c22f586ca 100644
--- a/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
+++ b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 
 # Use this variable to update RAPIDS and RAFT versions
-set(RAPIDS_VERSION "24.02")
+set(RAPIDS_VERSION "24.04")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index b0154acf8..70e268fb2 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -53,7 +53,6 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor*
     auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
     cuvs::neighbors::cagra::build_host(*res_ptr, build_params, mds, *index);
   }
-
   return index;
 }
 
@@ -153,6 +152,9 @@ extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res,
                 dataset.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
+    return CUVS_ERROR;
   } catch (...) {
     return CUVS_ERROR;
   }
@@ -197,6 +199,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
                 queries.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
   } catch (...) {
     return CUVS_ERROR;
   }
diff --git a/docs/source/build.md b/docs/source/build.md
index ae7734d0e..31de69b46 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a
 mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0
 ```
 
-If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
+If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
 
 ## Installing Python through Pip
 
@@ -315,4 +315,4 @@ The `raft::raft` CMake target is made available when including RAFT into your CM
 |-------------|---------------------|----------------------------------------------------------|----------------------------------------|
 | n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
 | compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
\ No newline at end of file
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c09ab953f..1a5c9dfe8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,9 +67,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '24.02'
+version = '24.04'
 # The full version, including alpha/beta/rc tags.
-release = '24.02.00'
+release = '24.04.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index c5bcd03f6..d29130add 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour
 1. Do not split empty functions/records/namespaces.
 2. Two-space indentation everywhere, including the line continuations.
 3. Disable reflowing of comments.
-   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/.clang-format).
+   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format).
 
 [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter.
 In order to run doxygen as a linter on C++/CUDA code, run
@@ -205,7 +205,7 @@ you can run  `codespell -i 3 -w .` from the repository root directory.
 This will bring up an interactive prompt to select which spelling fixes to apply.
 
 ### #include style
-[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
+[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies.
 2. `#include <...>` should be used for referencing everything else
 
@@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list
 ```
 
 ### Copyright header
-[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.02/ci/checks/copyright.py) checks the Copyright header for all git-modified files
+[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files
 
 Manually, you can run the following to bulk-fix the header if only the years need to be updated:
 ```bash
@@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY`
 ## Logging
 
 ### Introduction
-Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
+Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
 
 ### Usage
 ```cpp
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index e63165e1c..330270d66 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake
   )
 endif()
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 0938cf4a6..9a43eba0c 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -39,15 +39,22 @@ option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulti
 
 message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}")
 
+include(../../fetch_rapids.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-export)
+include(rapids-find)
+
+rapids_cpm_init()
+
 # If the user requested it we attempt to find CUVS.
 if(FIND_CUVS_CPP)
-  find_package(cuvs_c ${cuvs_version})
+  find_package(cuvs ${cuvs_version})
+  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 else()
   set(cuvs_FOUND OFF)
 endif()
 
-include(rapids-cython-core)
-
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
   set(CUDA_STATIC_RUNTIME ON)
@@ -60,6 +67,8 @@ if(NOT cuvs_FOUND)
   install(TARGETS cuvs DESTINATION ${cython_lib_dir})
 endif()
 
+include(rapids-cython-core)
+
 rapids_cython_init()
 
 add_subdirectory(cuvs/common)
diff --git a/python/cuvs/README.md b/python/cuvs/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/__init__.py b/python/cuvs/cuvs/__init__.py
index 94b3a200b..9f0481cb7 100644
--- a/python/cuvs/cuvs/__init__.py
+++ b/python/cuvs/cuvs/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
index 66c9f3f03..73334e500 100644
--- a/python/cuvs/cuvs/common/cydlpack.pxd
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -18,7 +18,7 @@
 from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t
 
 
-cdef extern from 'dlpack.h' nogil:
+cdef extern from "dlpack/dlpack.h" nogil:
     ctypedef enum DLDeviceType:
         kDLCPU
         kDLCUDA
@@ -67,4 +67,4 @@ cdef extern from 'dlpack.h' nogil:
         void (*deleter)(DLManagedTensor*)  # noqa: E211
 
 
-cdef DLManagedTensor dlpack_c(ary)
+cdef DLManagedTensor* dlpack_c(ary)
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
index 76ec95756..526f6c78e 100644
--- a/python/cuvs/cuvs/common/cydlpack.pyx
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -18,6 +18,7 @@
 import numpy as np
 
 from libc cimport stdlib
+from libc.stdint cimport uintptr_t
 
 
 cdef void deleter(DLManagedTensor* tensor) noexcept:
@@ -28,15 +29,16 @@ cdef void deleter(DLManagedTensor* tensor) noexcept:
     stdlib.free(tensor)
 
 
-cdef DLManagedTensor dlpack_c(ary):
+cdef DLManagedTensor* dlpack_c(ary):
     # todo(dgd): add checking options/parameters
     cdef DLDeviceType dev_type
     cdef DLDevice dev
     cdef DLDataType dtype
     cdef DLTensor tensor
-    cdef DLManagedTensor dlm
+    cdef DLManagedTensor* dlm = \
+        <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor))
 
-    if hasattr(ary, "__cuda_array_interface__"):
+    if ary.from_cai:
         dev_type = DLDeviceType.kDLCUDA
     else:
         dev_type = DLDeviceType.kDLCPU
@@ -51,24 +53,47 @@ cdef DLManagedTensor dlpack_c(ary):
     elif ary.dtype == np.float64:
         dtype.code = DLDataTypeCode.kDLFloat
         dtype.bits = 64
+    elif ary.dtype == np.int8:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 8
     elif ary.dtype == np.int32:
         dtype.code = DLDataTypeCode.kDLInt
         dtype.bits = 32
     elif ary.dtype == np.int64:
-        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 64
+    elif ary.dtype == np.uint8:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 8
+    elif ary.dtype == np.uint32:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 32
+    elif ary.dtype == np.uint64:
+        dtype.code = DLDataTypeCode.kDLUInt
         dtype.bits = 64
-    elif ary.dtype == np.bool:
+    elif ary.dtype == np.bool_:
         dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 8
 
-    if hasattr(ary, "__cuda_array_interface__"):
-        tensor_ptr = ary.__cuda_array_interface__["data"][0]
-    else:
-        tensor_ptr = ary.__array_interface__["data"][0]
+    dtype.lanes = 1
+
+    cdef size_t ndim = len(ary.shape)
+
+    cdef int64_t* shape = <int64_t*>stdlib.malloc(ndim * sizeof(int64_t))
+
+    for i in range(ndim):
+        shape[i] = ary.shape[i]
+
+    cdef uintptr_t tensor_ptr
+    tensor_ptr = <uintptr_t>ary.ai_["data"][0]
 
     tensor.data = <void*> tensor_ptr
     tensor.device = dev
     tensor.dtype = dtype
     tensor.strides = NULL
+    tensor.ndim = ndim
+    tensor.shape = shape
+    tensor.byte_offset = 0
 
     dlm.dl_tensor = tensor
     dlm.manager_ctx = NULL
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
index 25d6ed0b4..67944eeab 100644
--- a/python/cuvs/cuvs/common/temp_raft.py
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -39,12 +39,12 @@ def auto_sync_resources(f):
 
     @functools.wraps(f)
     def wrapper(*args, resources=None, **kwargs):
-        sync_handle = resources is None
+        sync_resources = resources is None
         resources = resources if resources is not None else DeviceResources()
 
         ret_value = f(*args, resources=resources, **kwargs)
 
-        if sync_handle:
+        if sync_resources:
             resources.sync()
 
         return ret_value
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py
index c3690da87..657c7d366 100644
--- a/python/cuvs/cuvs/neighbors/cagra/__init__.py
+++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py
@@ -13,11 +13,6 @@
 # limitations under the License.
 
 
-from .cagra import Index, IndexParams, SearchParams, build_index
+from .cagra import Index, IndexParams, SearchParams, build_index, search
 
-__all__ = [
-    "Index",
-    "IndexParams",
-    "SearchParams",
-    "build_index",
-]
+__all__ = ["Index", "IndexParams", "SearchParams", "build_index", "search"]
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
index 269f046bf..4293bdc07 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -28,62 +28,68 @@ from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
 from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
 
 
-cdef extern from "cuvs/neighbors/cagra_c.h" nogil:
+cdef extern from "cuvs/neighbors/cagra.h" nogil:
 
-    ctypedef enum cagraGraphBuildAlgo:
+    ctypedef enum cuvsCagraGraphBuildAlgo:
         IVF_PQ
         NN_DESCENT
 
-    ctypedef struct cagraIndexParams:
+    ctypedef struct cuvsCagraIndexParams:
         size_t intermediate_graph_degree
         size_t graph_degree
-        cagraGraphBuildAlgo build_algo
+        cuvsCagraGraphBuildAlgo build_algo
         size_t nn_descent_niter
 
-    ctypedef enum cagraSearchAlgo:
+    ctypedef cuvsCagraIndexParams* cuvsCagraIndexParams_t
+
+    ctypedef enum cuvsCagraSearchAlgo:
         SINGLE_CTA,
         MULTI_CTA,
         MULTI_KERNEL,
         AUTO
 
-    ctypedef enum cagraHashMode:
+    ctypedef enum cuvsCagraHashMode:
         HASH,
         SMALL,
         AUTO_HASH
 
-    ctypedef struct cagraSearchParams:
+    ctypedef struct cuvsCagraSearchParams:
         size_t max_queries
         size_t itopk_size
         size_t max_iterations
-        cagraSearchAlgo algo
+        cuvsCagraSearchAlgo algo
         size_t team_size
         size_t search_width
         size_t min_iterations
         size_t thread_block_size
-        cagraHashMode hashmap_mode
+        cuvsCagraHashMode hashmap_mode
         size_t hashmap_min_bitlen
         float hashmap_max_fill_rate
         uint32_t num_random_samplings
         uint64_t rand_xor_mask
 
-    ctypedef struct cagraIndex:
+    ctypedef struct cuvsCagraIndex:
         uintptr_t addr
         DLDataType dtype
 
-    ctypedef cagraIndex* cagraIndex_t
+    ctypedef cuvsCagraIndex* cuvsCagraIndex_t
+
+    cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
+
+    cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index)
 
-    cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+    cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index)
 
-    cuvsError_t cagraIndexDestroy(cagraIndex_t index)
+    cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index)
 
-    cuvsError_t cagraBuild(cuvsResources_t res,
-                           cagraIndexParams* params,
-                           DLManagedTensor* dataset,
-                           cagraIndex_t index)
+    cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                               cuvsCagraIndexParams* params,
+                               DLManagedTensor* dataset,
+                               cuvsCagraIndex_t index) except +
 
-    cuvsError_t cagraSearch(cuvsResources_t res,
-                            cagraSearchParams* params,
-                            cagraIndex_t index,
-                            DLManagedTensor* queries,
-                            DLManagedTensor* neighbors,
-                            DLManagedTensor* distances)
+    cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                                cuvsCagraSearchParams* params,
+                                cuvsCagraIndex_t index,
+                                DLManagedTensor* queries,
+                                DLManagedTensor* neighbors,
+                                DLManagedTensor* distances) except +
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
index f1c0c9af5..bf17fe6a5 100644
--- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -22,11 +22,13 @@ cimport cuvs.common.cydlpack
 from cuvs.common.temp_raft import auto_sync_resources
 
 from cython.operator cimport dereference as deref
+from libcpp cimport bool, cast
 
 from cuvs.common cimport cydlpack
 
 from pylibraft.common import (
     DeviceResources,
+    Stream,
     auto_convert_output,
     cai_wrapper,
     device_ndarray,
@@ -43,9 +45,12 @@ from libc.stdint cimport (
     uint64_t,
     uintptr_t,
 )
-from pylibraft.common.handle cimport device_resources
 
-from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.c_api cimport (
+    cuvsError_t,
+    cuvsResources_t,
+    cuvsResourcesCreate,
+)
 
 
 cdef class IndexParams:
@@ -70,7 +75,7 @@ cdef class IndexParams:
               building the knn graph. It is expected to be generally
               faster than ivf_pq.
     """
-    cdef cagraIndexParams params
+    cdef cuvsCagraIndexParams* params
 
     def __init__(self, *,
                  metric="sqeuclidean",
@@ -78,6 +83,9 @@ cdef class IndexParams:
                  graph_degree=64,
                  build_algo="ivf_pq",
                  nn_descent_niter=20):
+
+        cuvsCagraIndexParamsCreate(&self.params)
+
         # todo (dgd): enable once other metrics are present
         # and exposed in cuVS C API
         # self.params.metric = _get_metric(metric)
@@ -85,9 +93,9 @@ cdef class IndexParams:
         self.params.intermediate_graph_degree = intermediate_graph_degree
         self.params.graph_degree = graph_degree
         if build_algo == "ivf_pq":
-            self.params.build_algo = cagraGraphBuildAlgo.IVF_PQ
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.IVF_PQ
         elif build_algo == "nn_descent":
-            self.params.build_algo = cagraGraphBuildAlgo.NN_DESCENT
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.NN_DESCENT
         self.params.nn_descent_niter = nn_descent_niter
 
     # @property
@@ -112,22 +120,27 @@ cdef class IndexParams:
 
 
 cdef class Index:
-    cdef cagraIndex_t index
+    cdef cuvsCagraIndex_t index
+    cdef bool trained
 
     def __cinit__(self):
         cdef cuvsError_t index_create_status
-        index_create_status = cagraIndexCreate(&self.index)
+        index_create_status = cuvsCagraIndexCreate(&self.index)
         self.trained = False
 
         if index_create_status == cuvsError_t.CUVS_ERROR:
-            raise Exception("FAIL")
+            raise RuntimeError("Failed to create index.")
 
     def __dealloc__(self):
         cdef cuvsError_t index_destroy_status
         if self.index is not NULL:
-            index_destroy_status = cagraIndexDestroy(self.index)
+            index_destroy_status = cuvsCagraIndexDestroy(self.index)
             if index_destroy_status == cuvsError_t.CUVS_ERROR:
-                raise Exception("FAIL")
+                raise Exception("Failed to deallocate index.")
+
+    @property
+    def trained(self):
+        return self.trained
 
     def __repr__(self):
         # todo(dgd): update repr as we expose data through C API
@@ -165,19 +178,18 @@ def build_index(IndexParams index_params, dataset, resources=None):
     --------
 
     >>> import cupy as cp
-    >>> from pylibraft.neighbors import cagra
+    >>> from cuvs.neighbors import cagra
     >>> n_samples = 50000
     >>> n_features = 50
     >>> n_queries = 1000
     >>> k = 10
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
-    >>> handle = DeviceResources()
     >>> build_params = cagra.IndexParams(metric="sqeuclidean")
     >>> index = cagra.build_index(build_params, dataset)
     >>> distances, neighbors = cagra.search(cagra.SearchParams(),
     ...                                      index, dataset,
-    ...                                      k, handle=handle)
+    ...                                      k)
     >>> distances = cp.asarray(distances)
     >>> neighbors = cp.asarray(neighbors)
     """
@@ -188,22 +200,24 @@ def build_index(IndexParams index_params, dataset, resources=None):
     _check_input_array(dataset_ai, [np.dtype('float32'), np.dtype('byte'),
                                     np.dtype('ubyte')])
 
-    if resources is None:
-        resources = DeviceResources()
-    cdef cuvsResources_t* resources_ = \
-        <cuvsResources_t*><size_t>resources.getHandle()
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
 
     cdef Index idx = Index()
     cdef cuvsError_t build_status
-    cdef cydlpack.DLManagedTensor dataset_dlpack = \
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
         cydlpack.dlpack_c(dataset_ai)
-    cdef cagraIndexParams* params = &index_params.params
+    cdef cuvsCagraIndexParams* params = index_params.params
 
     with cuda_interruptible():
-        build_status = cagraBuild(
-            deref(resources_),
+        build_status = cuvsCagraBuild(
+            res_,
             params,
-            &dataset_dlpack,
+            dataset_dlpack,
             idx.index
         )
 
@@ -264,7 +278,7 @@ cdef class SearchParams:
     rand_xor_mask: int, default = 0x128394
         Bit mask used for initial random seed node selection.
     """
-    cdef cagraSearchParams params
+    cdef cuvsCagraSearchParams params
 
     def __init__(self, *,
                  max_queries=0,
@@ -284,13 +298,13 @@ cdef class SearchParams:
         self.params.itopk_size = itopk_size
         self.params.max_iterations = max_iterations
         if algo == "single_cta":
-            self.params.algo = cagraSearchAlgo.SINGLE_CTA
+            self.params.algo = cuvsCagraSearchAlgo.SINGLE_CTA
         elif algo == "multi_cta":
-            self.params.algo = cagraSearchAlgo.MULTI_CTA
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_CTA
         elif algo == "multi_kernel":
-            self.params.algo = cagraSearchAlgo.MULTI_KERNEL
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_KERNEL
         elif algo == "auto":
-            self.params.algo = cagraSearchAlgo.AUTO
+            self.params.algo = cuvsCagraSearchAlgo.AUTO
         else:
             raise ValueError("`algo` value not supported.")
 
@@ -299,11 +313,11 @@ cdef class SearchParams:
         self.params.min_iterations = min_iterations
         self.params.thread_block_size = thread_block_size
         if hashmap_mode == "hash":
-            self.params.hashmap_mode = cagraHashMode.HASH
+            self.params.hashmap_mode = cuvsCagraHashMode.HASH
         elif hashmap_mode == "small":
-            self.params.hashmap_mode = cagraHashMode.SMALL
+            self.params.hashmap_mode = cuvsCagraHashMode.SMALL
         elif hashmap_mode == "auto":
-            self.params.hashmap_mode = cagraHashMode.AUTO_HASH
+            self.params.hashmap_mode = cuvsCagraHashMode.AUTO_HASH
         else:
             raise ValueError("`hashmap_mode` value not supported.")
 
@@ -407,16 +421,14 @@ def search(SearchParams search_params,
     Examples
     --------
     >>> import cupy as cp
-    >>> from pylibraft.common import DeviceResources
-    >>> from pylibraft.neighbors import cagra
+    >>> from cuvs.neighbors import cagra
     >>> n_samples = 50000
     >>> n_features = 50
     >>> n_queries = 1000
     >>> dataset = cp.random.random_sample((n_samples, n_features),
     ...                                   dtype=cp.float32)
     >>> # Build index
-    >>> handle = DeviceResources()
-    >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle)
+    >>> index = cagra.build_index(cagra.IndexParams(), dataset)
     >>> # Search using the built index
     >>> queries = cp.random.random_sample((n_queries, n_features),
     ...                                   dtype=cp.float32)
@@ -429,62 +441,62 @@ def search(SearchParams search_params,
     >>> # creation during search. This is useful if multiple searches
     >>> # are performad with same query size.
     >>> distances, neighbors = cagra.search(search_params, index, queries,
-    ...                                     k, handle=handle)
-    >>> # pylibraft functions are often asynchronous so the
-    >>> # handle needs to be explicitly synchronized
-    >>> handle.sync()
+    ...                                     k)
     >>> neighbors = cp.asarray(neighbors)
     >>> distances = cp.asarray(distances)
     """
     if not index.trained:
-        raise ValueError("Index need to be built before calling search.")
+        raise ValueError("Index needs to be built before calling search.")
 
-    if resources is None:
-        resources = DeviceResources()
-    cdef device_resources* resources_ = \
-        <device_resources*><size_t>resources.getHandle()
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
 
     # todo(dgd): we can make the check of dtype a parameter of wrap_array
     # in RAFT to make this a single call
-    queries_cai = cai_wrapper(queries)
+    queries_cai = wrap_array(queries)
     _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
-                                     np.dtype('ubyte')],
-                       exp_cols=index.dim)
+                                     np.dtype('ubyte')])
 
     cdef uint32_t n_queries = queries_cai.shape[0]
 
     if neighbors is None:
         neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
 
-    neighbors_cai = cai_wrapper(neighbors)
+    neighbors_cai = wrap_array(neighbors)
     _check_input_array(neighbors_cai, [np.dtype('uint32')],
                        exp_rows=n_queries, exp_cols=k)
 
     if distances is None:
         distances = device_ndarray.empty((n_queries, k), dtype='float32')
 
-    distances_cai = cai_wrapper(distances)
+    distances_cai = wrap_array(distances)
     _check_input_array(distances_cai, [np.dtype('float32')],
                        exp_rows=n_queries, exp_cols=k)
 
-    cdef cagraSearchParams* params = &search_params.params
-    cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai)
-    cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai)
-    cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai)
+    cdef cuvsCagraSearchParams* params = &search_params.params
+    cdef cuvsError_t search_status
+    cdef cydlpack.DLManagedTensor* queries_dlpack = \
+        cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor* neighbors_dlpack = \
+        cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor* distances_dlpack = \
+        cydlpack.dlpack_c(distances_cai)
 
     with cuda_interruptible():
-        cagraSearch(
-            deref(resources_),
+        search_status = cuvsCagraSearch(
+            res_,
             params,
             index.index,
-            &queries_dlpack,
-            &neighbors_dlpack,
-            &distances_dlpack
+            queries_dlpack,
+            neighbors_dlpack,
+            distances_dlpack
         )
 
-        if build_status == cuvsError_t.CUVS_ERROR:
-            raise RuntimeError("Index failed to build.")
-        else:
-            idx.trained = True
+        if search_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Search failed.")
 
     return (distances, neighbors)
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
index 78fbc5828..6074eee3a 100644
--- a/python/cuvs/cuvs/test/test_cagra.py
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -56,8 +56,6 @@ def run_cagra_build_search_test(
     else:
         index = cagra.build_index(build_params, dataset)
 
-    assert index.trained
-
     if not add_data_on_build:
         dataset_1 = dataset[: n_rows // 2, :]
         dataset_2 = dataset[n_rows // 2 :, :]
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py
index 331b0f7f3..6d56ffaa2 100644
--- a/python/cuvs/cuvs/test/test_doctests.py
+++ b/python/cuvs/cuvs/test/test_doctests.py
@@ -20,11 +20,7 @@
 
 import pytest
 
-import pylibraft.cluster
-import pylibraft.distance
-import pylibraft.matrix
-import pylibraft.neighbors
-import pylibraft.random
+import cuvs.neighbors
 
 # Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py  # noqa
 
@@ -92,17 +88,8 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None):
 # since the root pylibraft module doesn't import submodules (or define an
 # __all__) we are explicitly adding all the submodules we want to run
 # doctests for here
-DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.cagra))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_flat))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.refine))
-DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.random))
+DOC_STRINGS = list(_find_doctests_in_obj(cuvs.neighbors))
+DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.cagra))
 
 
 @pytest.mark.parametrize(
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index cba8d4adf..1f9bc29d4 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ requires = [
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-build-backend = "setuptools.build_meta"
+build-backend = "scikit_build_core.build"
 
 [project]
 name = "cuvs"
@@ -109,3 +109,16 @@ skip = [
     "dist",
     "__init__.py",
 ]
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.minimum-version = "3.26.4"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["cuvs"]
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "cuvs/VERSION"
+regex = "(?P<value>.*)"
diff --git a/python/cuvs/setup.cfg b/python/cuvs/setup.cfg
index 3574b4416..57b4954bc 100644
--- a/python/cuvs/setup.cfg
+++ b/python/cuvs/setup.cfg
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
@@ -12,6 +12,7 @@ known_dask=
     distributed
     dask_cuda
 known_rapids=
+    cuvs
     nvtext
     cudf
     cuml
diff --git a/python/cuvs/setup.py b/python/cuvs/setup.py
deleted file mode 100644
index 4e825dab2..000000000
--- a/python/cuvs/setup.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from setuptools import find_packages
-from skbuild import setup
-
-
-def exclude_libcxx_symlink(cmake_manifest):
-    return list(
-        filter(
-            lambda name: not ("include/rapids/libcxx/include" in name),
-            cmake_manifest,
-        )
-    )
-
-
-packages = find_packages(include=["cuvs*"])
-setup(
-    # Don't want libcxx getting pulled into wheel builds.
-    cmake_process_manifest_hook=exclude_libcxx_symlink,
-    packages=packages,
-    package_data={key: ["VERSION", "*.pxd"] for key in packages},
-    zip_safe=False,
-)

From 2b3500c134f19e1eadd6fc27e44b7b029165b1ad Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 20:23:54 -0600
Subject: [PATCH 12/45] ENH Re-enable wheels CI

---
 ci/build_wheel_cuvs.sh      |  2 +-
 ci/test_wheel_cuvs.sh       | 22 +++++++++---------
 ci/wheel_smoke_test_cuvs.py | 45 ++++++++++++++-----------------------
 3 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh
index de0e6f160..d209f93c0 100755
--- a/ci/build_wheel_cuvs.sh
+++ b/ci/build_wheel_cuvs.sh
@@ -6,4 +6,4 @@ set -euo pipefail
 # Set up skbuild options. Enable sccache in skbuild config options
 export SKBUILD_CONFIGURE_OPTIONS="-DCUVS_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF"
 
-#ci/build_wheel.sh cuvs python/cuvs
+ci/build_wheel.sh cuvs python/cuvs
diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh
index 52cfa7ae1..364e00a7c 100755
--- a/ci/test_wheel_cuvs.sh
+++ b/ci/test_wheel_cuvs.sh
@@ -4,15 +4,15 @@
 set -euo pipefail
 
 mkdir -p ./dist
-#RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-#RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 
-## echo to expand wildcard before adding `[extra]` requires for pip
-#python -m pip install $(echo ./dist/cuvs*.whl)[test]
-#
-## Run smoke tests for aarch64 pull requests
-#if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
-#    python ./ci/wheel_smoke_test_cuvs.py
-#else
-#    python -m pytest ./python/cuvs/cuvs/test
-#fi
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install $(echo ./dist/cuvs*.whl)[test]
+
+# Run smoke tests for aarch64 pull requests
+if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
+   python ./ci/wheel_smoke_test_cuvs.py
+else
+   python -m pytest ./python/cuvs/cuvs/test
+fi
diff --git a/ci/wheel_smoke_test_cuvs.py b/ci/wheel_smoke_test_cuvs.py
index 65b5fb8b2..858cc6010 100644
--- a/ci/wheel_smoke_test_cuvs.py
+++ b/ci/wheel_smoke_test_cuvs.py
@@ -13,41 +13,30 @@
 # limitations under the License.
 #
 
+import cupy as cp
 import numpy as np
-from scipy.spatial.distance import cdist
 
-from pylibrat.common import Handle, Stream, device_ndarray
-from cuvs.distance import pairwise_distance
+from cuvs.neighbors import cagra
+from pylibraft.common import Stream, DeviceResources
 
 
 if __name__ == "__main__":
-    metric = "euclidean"
-    n_rows = 1337
-    n_cols = 1337
+    n_samples = 1000
+    n_features = 50
+    n_queries = 1000
+    k = 10
 
-    input1 = np.random.random_sample((n_rows, n_cols))
-    input1 = np.asarray(input1, order="C").astype(np.float64)
+    dataset = cp.random.random_sample((n_samples,
+                                       n_features)).astype(cp.float32)
 
-    output = np.zeros((n_rows, n_rows), dtype=np.float64)
+    build_params = cagra.IndexParams(metric="sqeuclidean",
+                                     build_algo="nn_descent")
 
-    expected = cdist(input1, input1, metric)
+    index = cagra.build_index(build_params, dataset)
 
-    expected[expected <= 1e-5] = 0.0
+    distances, neighbors = cagra.search(cagra.SearchParams(),
+                                          index, dataset,
+                                          k)
 
-    input1_device = device_ndarray(input1)
-    output_device = None
-
-    s2 = Stream()
-    handle = Handle(stream=s2)
-    ret_output = pairwise_distance(
-        input1_device, input1_device, output_device, metric, handle=handle
-    )
-    handle.sync()
-
-    output_device = ret_output
-
-    actual = output_device.copy_to_host()
-
-    actual[actual <= 1e-5] = 0.0
-
-    assert np.allclose(expected, actual, rtol=1e-4)
+    distances = cp.asarray(distances)
+    neighbors = cp.asarray(neighbors)

From e11cfb4553a0d5b3f0ce12a950d1c51f4d2c16f7 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 20:28:30 -0600
Subject: [PATCH 13/45] FIX style fixes

---
 ci/wheel_smoke_test_cuvs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/wheel_smoke_test_cuvs.py b/ci/wheel_smoke_test_cuvs.py
index 858cc6010..26ab5f6b5 100644
--- a/ci/wheel_smoke_test_cuvs.py
+++ b/ci/wheel_smoke_test_cuvs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From bda4f82be15702b23907d9c894fac1013ff337ef Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 21:15:59 -0600
Subject: [PATCH 14/45] FIX scikit-build-core and other small wheel
 dependencies that I forgot to checkin before

---
 conda/environments/all_cuda-118_arch-aarch64.yaml |  3 ++-
 conda/environments/all_cuda-118_arch-x86_64.yaml  |  3 ++-
 conda/environments/all_cuda-120_arch-aarch64.yaml |  3 ++-
 conda/environments/all_cuda-120_arch-x86_64.yaml  |  3 ++-
 dependencies.yaml                                 | 12 +++++++++---
 python/cuvs/pyproject.toml                        |  4 +++-
 6 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index dbf92ec47..93774a13f 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -41,11 +41,12 @@ dependencies:
 - nvcc_linux-aarch64=11.8
 - pre-commit
 - pydata-sphinx-theme
+- pylibraft==24.2.*
 - pytest
 - pytest-cov
 - recommonmark
 - rmm==24.2.*
-- scikit-build>=0.13.1
+- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 2fe184f96..10fcad853 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -41,11 +41,12 @@ dependencies:
 - nvcc_linux-64=11.8
 - pre-commit
 - pydata-sphinx-theme
+- pylibraft==24.2.*
 - pytest
 - pytest-cov
 - recommonmark
 - rmm==24.2.*
-- scikit-build>=0.13.1
+- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml
index 1b7f3908a..862363782 100644
--- a/conda/environments/all_cuda-120_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-120_arch-aarch64.yaml
@@ -37,11 +37,12 @@ dependencies:
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
+- pylibraft==24.2.*
 - pytest
 - pytest-cov
 - recommonmark
 - rmm==24.2.*
-- scikit-build>=0.13.1
+- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 335227994..6463d04f3 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -37,11 +37,12 @@ dependencies:
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
+- pylibraft==24.2.*
 - pytest
 - pytest-cov
 - recommonmark
 - rmm==24.2.*
-- scikit-build>=0.13.1
+- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/dependencies.yaml b/dependencies.yaml
index 6f9f10535..e462b203e 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -84,7 +84,7 @@ dependencies:
           - &cmake_ver cmake>=3.26.4
           - cython>=3.0.0
           - ninja
-          - scikit-build>=0.13.1
+          - "scikit-build-core[pyproject]>=0.7.0"
       - output_types: [conda]
         packages:
           - c-compiler
@@ -129,6 +129,7 @@ dependencies:
       - output_types: [conda]
         packages:
           - &rmm_conda rmm==24.2.*
+          - &pylibraft_conda pylibraft==24.2.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -149,15 +150,17 @@ dependencies:
           - matrix: {cuda: "12.2"}
             packages: &build_cuvs_packages_cu12
               - &rmm_cu12 rmm-cu12==24.2.*
+              - &pylibraft_cu12 pylibraft-cu12==24.2.*
           - {matrix: {cuda: "12.1"}, packages: *build_cuvs_packages_cu12}
           - {matrix: {cuda: "12.0"}, packages: *build_cuvs_packages_cu12}
           - matrix: {cuda: "11.8"}
             packages: &build_cuvs_packages_cu11
               - &rmm_cu11 rmm-cu11==24.2.*
+              - &pylibraft_cu11 pylibraft-cu11==24.2.*
           - {matrix: {cuda: "11.5"}, packages: *build_cuvs_packages_cu11}
           - {matrix: {cuda: "11.4"}, packages: *build_cuvs_packages_cu11}
           - {matrix: {cuda: "11.2"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: null, packages: [*rmm_conda] }
+          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda] }
   checks:
     common:
       - output_types: [conda, requirements]
@@ -334,6 +337,7 @@ dependencies:
       - output_types: [conda]
         packages:
           - *rmm_conda
+          - *pylibraft_conda
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -354,15 +358,17 @@ dependencies:
           - matrix: {cuda: "12.2"}
             packages: &run_cuvs_packages_cu12
               - *rmm_cu12
+              - *pylibraft_cu12
           - {matrix: {cuda: "12.1"}, packages: *run_cuvs_packages_cu12}
           - {matrix: {cuda: "12.0"}, packages: *run_cuvs_packages_cu12}
           - matrix: {cuda: "11.8"}
             packages: &run_cuvs_packages_cu11
               - *rmm_cu11
+              - *pylibraft_cu12
           - {matrix: {cuda: "11.5"}, packages: *run_cuvs_packages_cu11}
           - {matrix: {cuda: "11.4"}, packages: *run_cuvs_packages_cu11}
           - {matrix: {cuda: "11.2"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: null, packages: [*rmm_conda]}
+          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda]}
   test_python_common:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 1f9bc29d4..90b7f40e7 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -19,8 +19,9 @@ requires = [
     "cuda-python>=11.7.1,<12.0a0",
     "cython>=3.0.0",
     "ninja",
+    "pylibraft==24.2.*",
     "rmm==24.2.*",
-    "scikit-build>=0.13.1",
+    "scikit-build-core[pyproject]>=0.7.0",
     "setuptools",
     "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
@@ -39,6 +40,7 @@ requires-python = ">=3.9"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
     "numpy>=1.21",
+    "pylibraft==24.2.*",
     "rmm==24.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

From 112d084344e74e7d797e71ecdbab525a970b6da2 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Thu, 29 Feb 2024 21:21:21 -0600
Subject: [PATCH 15/45] Update test_python.sh for wrongly commented line

Somehow this change does not appear commented in my local clone
---
 ci/test_python.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/test_python.sh b/ci/test_python.sh
index e92072eae..17fbed424 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -20,7 +20,7 @@ set -u
 
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-#PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}

From f6cb1a86bbcd75decb1b12b831edfda292f90a6f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 21:33:48 -0600
Subject: [PATCH 16/45] FIX update versions of rmm and pylibraft

---
 conda/environments/all_cuda-118_arch-aarch64.yaml |  4 ++--
 conda/environments/all_cuda-118_arch-x86_64.yaml  |  4 ++--
 conda/environments/all_cuda-120_arch-aarch64.yaml |  4 ++--
 conda/environments/all_cuda-120_arch-x86_64.yaml  |  4 ++--
 dependencies.yaml                                 | 12 ++++++------
 python/cuvs/pyproject.toml                        |  8 ++++----
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index 93774a13f..715085797 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -41,11 +41,11 @@ dependencies:
 - nvcc_linux-aarch64=11.8
 - pre-commit
 - pydata-sphinx-theme
-- pylibraft==24.2.*
+- pylibraft==24.4.*
 - pytest
 - pytest-cov
 - recommonmark
-- rmm==24.2.*
+- rmm==24.4.*
 - scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 10fcad853..453a83a66 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -41,11 +41,11 @@ dependencies:
 - nvcc_linux-64=11.8
 - pre-commit
 - pydata-sphinx-theme
-- pylibraft==24.2.*
+- pylibraft==24.4.*
 - pytest
 - pytest-cov
 - recommonmark
-- rmm==24.2.*
+- rmm==24.4.*
 - scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml
index 862363782..03c29f103 100644
--- a/conda/environments/all_cuda-120_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-120_arch-aarch64.yaml
@@ -37,11 +37,11 @@ dependencies:
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
-- pylibraft==24.2.*
+- pylibraft==24.4.*
 - pytest
 - pytest-cov
 - recommonmark
-- rmm==24.2.*
+- rmm==24.4.*
 - scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 6463d04f3..6e5abd02e 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -37,11 +37,11 @@ dependencies:
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
-- pylibraft==24.2.*
+- pylibraft==24.4.*
 - pytest
 - pytest-cov
 - recommonmark
-- rmm==24.2.*
+- rmm==24.4.*
 - scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
diff --git a/dependencies.yaml b/dependencies.yaml
index e462b203e..4151288e3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -128,8 +128,8 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - &rmm_conda rmm==24.2.*
-          - &pylibraft_conda pylibraft==24.2.*
+          - &rmm_conda rmm==24.4.*
+          - &pylibraft_conda pylibraft==24.4.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -149,14 +149,14 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.2"}
             packages: &build_cuvs_packages_cu12
-              - &rmm_cu12 rmm-cu12==24.2.*
-              - &pylibraft_cu12 pylibraft-cu12==24.2.*
+              - &rmm_cu12 rmm-cu12==24.4.*
+              - &pylibraft_cu12 pylibraft-cu12==24.4.*
           - {matrix: {cuda: "12.1"}, packages: *build_cuvs_packages_cu12}
           - {matrix: {cuda: "12.0"}, packages: *build_cuvs_packages_cu12}
           - matrix: {cuda: "11.8"}
             packages: &build_cuvs_packages_cu11
-              - &rmm_cu11 rmm-cu11==24.2.*
-              - &pylibraft_cu11 pylibraft-cu11==24.2.*
+              - &rmm_cu11 rmm-cu11==24.4.*
+              - &pylibraft_cu11 pylibraft-cu11==24.4.*
           - {matrix: {cuda: "11.5"}, packages: *build_cuvs_packages_cu11}
           - {matrix: {cuda: "11.4"}, packages: *build_cuvs_packages_cu11}
           - {matrix: {cuda: "11.2"}, packages: *build_cuvs_packages_cu11}
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 90b7f40e7..1f92a5fbe 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -19,8 +19,8 @@ requires = [
     "cuda-python>=11.7.1,<12.0a0",
     "cython>=3.0.0",
     "ninja",
-    "pylibraft==24.2.*",
-    "rmm==24.2.*",
+    "pylibraft==24.4.*",
+    "rmm==24.4.*",
     "scikit-build-core[pyproject]>=0.7.0",
     "setuptools",
     "wheel",
@@ -40,8 +40,8 @@ requires-python = ">=3.9"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
     "numpy>=1.21",
-    "pylibraft==24.2.*",
-    "rmm==24.2.*",
+    "pylibraft==24.4.*",
+    "rmm==24.4.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",

From 415d70cf2d6d6368864f92e7fff5c88113f0443f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 22:03:05 -0600
Subject: [PATCH 17/45] FIX add pip nvidia nightly index:

---
 dependencies.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 4151288e3..7e8ed166d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -133,8 +133,9 @@ dependencies:
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for rmm-cu{11,12}.
+          # This index is needed for cudf and rmm.
           - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
@@ -364,7 +365,7 @@ dependencies:
           - matrix: {cuda: "11.8"}
             packages: &run_cuvs_packages_cu11
               - *rmm_cu11
-              - *pylibraft_cu12
+              - *pylibraft_cu11
           - {matrix: {cuda: "11.5"}, packages: *run_cuvs_packages_cu11}
           - {matrix: {cuda: "11.4"}, packages: *run_cuvs_packages_cu11}
           - {matrix: {cuda: "11.2"}, packages: *run_cuvs_packages_cu11}

From 42e82842d44ea4548f18ffaf9679c3180e0e4f77 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 23:08:03 -0600
Subject: [PATCH 18/45] DBG more index fixes for wheel builds

---
 ci/build_python.sh                                | 2 +-
 conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml  | 2 +-
 conda/environments/all_cuda-120_arch-aarch64.yaml | 2 +-
 conda/environments/all_cuda-120_arch-x86_64.yaml  | 2 +-
 dependencies.yaml                                 | 5 +++--
 python/cuvs/pyproject.toml                        | 2 +-
 7 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/ci/build_python.sh b/ci/build_python.sh
index 75002b9f9..dd259bdad 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -25,7 +25,7 @@ git_commit=$(git rev-parse HEAD)
 export RAPIDS_PACKAGE_VERSION=${version}
 echo "${version}" > VERSION
 
-sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/_version.py"
+sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${package_name}/_version.py"
 
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index 715085797..1a88fa606 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -36,7 +36,7 @@ dependencies:
 - libcusparse=11.7.5.86
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - nvcc_linux-aarch64=11.8
 - pre-commit
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 453a83a66..89598db71 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -36,7 +36,7 @@ dependencies:
 - libcusparse=11.7.5.86
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - nvcc_linux-64=11.8
 - pre-commit
diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml
index 03c29f103..312238c5d 100644
--- a/conda/environments/all_cuda-120_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-120_arch-aarch64.yaml
@@ -33,7 +33,7 @@ dependencies:
 - libcusparse-dev
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 6e5abd02e..3bac8ff4b 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -33,7 +33,7 @@ dependencies:
 - libcusparse-dev
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
diff --git a/dependencies.yaml b/dependencies.yaml
index 7e8ed166d..7eb751012 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -133,7 +133,7 @@ dependencies:
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for cudf and rmm.
+          # This index is needed for pylibraft and rmm.
           - --extra-index-url=https://pypi.nvidia.com
           - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
     specific:
@@ -334,7 +334,7 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - &numpy numpy>=1.21
+          - &numpy numpy>=1.23
       - output_types: [conda]
         packages:
           - *rmm_conda
@@ -344,6 +344,7 @@ dependencies:
           # pip recognizes the index as a global option for the requirements.txt file
           # This index is needed for cudf and rmm.
           - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 1f92a5fbe..5e2aace5d 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -39,7 +39,7 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
-    "numpy>=1.21",
+    "numpy>=1.23",
     "pylibraft==24.4.*",
     "rmm==24.4.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From 62089878b7c79bf54fb851a7d7b8c06531d637ab Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 23:30:21 -0600
Subject: [PATCH 19/45] DBG large updates to dependencies.yaml and cuda 12.2
 updates

---
 ci/release/update-version.sh                  |   2 +
 .../all_cuda-118_arch-aarch64.yaml            |   8 +-
 .../all_cuda-118_arch-x86_64.yaml             |   8 +-
 .../all_cuda-122_arch-aarch64.yaml            |  49 ++++++++
 .../all_cuda-122_arch-x86_64.yaml             |  49 ++++++++
 dependencies.yaml                             | 111 +++++++++++-------
 python/cuvs/pyproject.toml                    |   2 +-
 7 files changed, 174 insertions(+), 55 deletions(-)
 create mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml
 create mode 100644 conda/environments/all_cuda-122_arch-x86_64.yaml

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index d730cdc4b..b122e73bd 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -57,6 +57,8 @@ DEPENDENCIES=(
   cuvs
   cuvs-cu11
   cuvs-cu12
+  pylibraft-cu11
+  pylibraft-cu12
   rmm
   rmm-cu11
   rmm-cu12
diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index 1a88fa606..41670448b 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -12,12 +12,10 @@ dependencies:
 - clang-tools=16.0.6
 - clang==16.0.6
 - cmake>=3.26.4
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
-- cupy>=12.0.0
+- cupy>=12.2.0
 - cxx-compiler
 - cython>=3.0.0
 - doxygen>=1.8.20
@@ -28,13 +26,14 @@ dependencies:
 - ipython
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
+- libcufft-dev=10.9.0.58
+- libcufft=10.9.0.58
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libcusolver-dev=11.4.1.48
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- nccl>=2.9.9
 - ninja
 - numpy>=1.23
 - numpydoc
@@ -46,7 +45,6 @@ dependencies:
 - pytest-cov
 - recommonmark
 - rmm==24.4.*
-- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 89598db71..3f90d93b5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -12,12 +12,10 @@ dependencies:
 - clang-tools=16.0.6
 - clang==16.0.6
 - cmake>=3.26.4
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
-- cupy>=12.0.0
+- cupy>=12.2.0
 - cxx-compiler
 - cython>=3.0.0
 - doxygen>=1.8.20
@@ -28,13 +26,14 @@ dependencies:
 - ipython
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
+- libcufft-dev=10.9.0.58
+- libcufft=10.9.0.58
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libcusolver-dev=11.4.1.48
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- nccl>=2.9.9
 - ninja
 - numpy>=1.23
 - numpydoc
@@ -46,7 +45,6 @@ dependencies:
 - pytest-cov
 - recommonmark
 - rmm==24.4.*
-- scikit-build-core[pyproject]>=0.7.0
 - scikit-learn
 - scipy
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
new file mode 100644
index 000000000..af754dd35
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.2.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-aarch64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcufft-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest
+- pytest-cov
+- recommonmark
+- rmm==24.4.*
+- scikit-learn
+- scipy
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-aarch64==2.17
+name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
new file mode 100644
index 000000000..55ef10e38
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.2.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcufft-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest
+- pytest-cov
+- recommonmark
+- rmm==24.4.*
+- scikit-learn
+- scipy
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-64==2.17
+name: all_cuda-122_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 7eb751012..29ab521dc 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,12 +3,13 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["11.8", "12.0"]
+      cuda: ["11.8", "12.2"]
       arch: [x86_64, aarch64]
     includes:
       - build
       - build_cuvs
-      - cudatoolkit
+      - cuda_version
+      - cuda
       - develop
       - checks
       - build_wheels
@@ -21,12 +22,14 @@ files:
   test_cpp:
     output: none
     includes:
-      - cudatoolkit
+      - cuda
+      - cuda_version
       - test_libcuvs
   test_python:
     output: none
     includes:
-      - cudatoolkit
+      - cuda
+      - cuda_version
       - py_version
       - test_python_common
       - test_cuvs
@@ -41,7 +44,8 @@ files:
     includes:
       - test_cuvs
       - cupy
-      - cudatoolkit
+      - cuda
+      - cuda_version
       - docs
       - py_version
   py_build_cuvs:
@@ -84,12 +88,10 @@ dependencies:
           - &cmake_ver cmake>=3.26.4
           - cython>=3.0.0
           - ninja
-          - "scikit-build-core[pyproject]>=0.7.0"
       - output_types: [conda]
         packages:
           - c-compiler
           - cxx-compiler
-          - nccl>=2.9.9
     specific:
       - output_types: conda
         matrices:
@@ -105,8 +107,8 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix: {cuda: "12.0"}
-            packages: [cuda-version=12.0, cuda-nvcc]
+          - matrix: {cuda: "12.2"}
+            packages: [cuda-version=12.2, cuda-nvcc]
           - matrix: {cuda: "11.8", arch: x86_64}
             packages: [nvcc_linux-64=11.8]
           - matrix: {cuda: "11.8", arch: aarch64}
@@ -136,11 +138,14 @@ dependencies:
           # This index is needed for pylibraft and rmm.
           - --extra-index-url=https://pypi.nvidia.com
           - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+      - output_types: [pyproject, requirements]
+        packages:
+          - scikit-build-core[pyproject]>=0.7.0
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
           - matrix:
-              cuda: "12.0"
+              cuda: "12.*"
             packages:
               - &cuda_python12 cuda-python>=12.0,<13.0a0
           - matrix: # All CUDA 11 versions
@@ -148,20 +153,15 @@ dependencies:
               - &cuda_python11 cuda-python>=11.7.1,<12.0a0
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.2"}
-            packages: &build_cuvs_packages_cu12
-              - &rmm_cu12 rmm-cu12==24.4.*
+          - matrix: {cuda: "12.*"}
+            packages:
               - &pylibraft_cu12 pylibraft-cu12==24.4.*
-          - {matrix: {cuda: "12.1"}, packages: *build_cuvs_packages_cu12}
-          - {matrix: {cuda: "12.0"}, packages: *build_cuvs_packages_cu12}
-          - matrix: {cuda: "11.8"}
-            packages: &build_cuvs_packages_cu11
-              - &rmm_cu11 rmm-cu11==24.4.*
+              - &rmm_cu12 rmm-cu12==24.4.*
+          - matrix: {cuda: "11.*"}
+            packages:
               - &pylibraft_cu11 pylibraft-cu11==24.4.*
-          - {matrix: {cuda: "11.5"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.4"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.2"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda] }
+              - &rmm_cu11 rmm-cu11==24.4.*
+          - {matrix: null, packages: [*pylibraft_conda, *rmm_conda] }
   checks:
     common:
       - output_types: [conda, requirements]
@@ -173,30 +173,56 @@ dependencies:
         packages:
           - clang==16.0.6
           - clang-tools=16.0.6
-  cudatoolkit:
+  cuda_version:
     specific:
       - output_types: conda
         matrices:
+          - matrix:
+              cuda: "11.2"
+            packages:
+              - cuda-version=11.2
+          - matrix:
+              cuda: "11.4"
+            packages:
+              - cuda-version=11.4
+          - matrix:
+              cuda: "11.5"
+            packages:
+              - cuda-version=11.5
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
           - matrix:
               cuda: "12.0"
             packages:
               - cuda-version=12.0
-              - cuda-nvtx-dev
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
+  cuda:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
               - cuda-cudart-dev
               - cuda-profiler-api
               - libcublas-dev
+              - libcufft-dev
               - libcurand-dev
               - libcusolver-dev
               - libcusparse-dev
           - matrix:
               cuda: "11.8"
             packages:
-              - cuda-version=11.8
               - cudatoolkit
-              - cuda-nvtx=11.8
-              - cuda-profiler-api=11.8.86
               - libcublas-dev=11.11.3.6
               - libcublas=11.11.3.6
+              - libcufft-dev=10.9.0.58
+              - libcufft=10.9.0.58
               - libcurand-dev=10.3.0.86
               - libcurand=10.3.0.86
               - libcusolver-dev=11.4.1.48
@@ -206,12 +232,11 @@ dependencies:
           - matrix:
               cuda: "11.5"
             packages:
-              - cuda-version=11.5
               - cudatoolkit
-              - cuda-nvtx=11.5
-              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               - libcublas-dev>=11.7.3.1,<=11.7.4.6
               - libcublas>=11.7.3.1,<=11.7.4.6
+              - libcufft-dev>=10.6.0.54,<=10.6.0.107
+              - libcufft>=10.6.0.54,<=10.6.0.107
               - libcurand-dev>=10.2.6.48,<=10.2.7.107
               - libcurand>=10.2.6.48,<=10.2.7.107
               - libcusolver-dev>=11.2.1.48,<=11.3.2.107
@@ -221,12 +246,11 @@ dependencies:
           - matrix:
               cuda: "11.4"
             packages:
-              - cuda-version=11.4
               - cudatoolkit
-              - &cudanvtx114 cuda-nvtx=11.4
-              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               - &libcublas_dev114 libcublas-dev>=11.5.2.43,<=11.6.5.2
               - &libcublas114 libcublas>=11.5.2.43,<=11.6.5.2
+              - &libcufft_dev114 libcufft-dev>=10.5.0.43,<=10.5.2.100
+              - &libcufft114 libcufft>=10.5.0.43,<=10.5.2.100
               - &libcurand_dev114 libcurand-dev>=10.2.5.43,<=10.2.5.120
               - &libcurand114 libcurand>=10.2.5.43,<=10.2.5.120
               - &libcusolver_dev114 libcusolver-dev>=11.2.0.43,<=11.2.0.120
@@ -236,14 +260,13 @@ dependencies:
           - matrix:
               cuda: "11.2"
             packages:
-              - cuda-version=11.2
               - cudatoolkit
-              - *cudanvtx114
-              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               # The NVIDIA channel doesn't publish pkgs older than 11.4 for these libs,
               # so 11.2 uses 11.4 packages (the oldest available).
               - *libcublas_dev114
               - *libcublas114
+              - *libcufft_dev114
+              - *libcufft114
               - *libcurand_dev114
               - *libcurand114
               - *libcusolver_dev114
@@ -255,27 +278,27 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - cupy>=12.0.0
+          - cupy>=12.2.0
     specific:
       - output_types: [requirements, pyproject]
         matrices:
           # All CUDA 12 + x86_64 versions
           - matrix: {cuda: "12.2", arch: x86_64}
             packages: &cupy_packages_cu12_x86_64
-              - &cupy_cu12_x86_64 cupy-cuda12x>=12.0.0
+              - &cupy_cu12_x86_64 cupy-cuda12x>=12.2.0
           - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
-          - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
+          - {matrix: {cuda: "12.2", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
           # All CUDA 12 + aarch64 versions
           - matrix: {cuda: "12.2", arch: aarch64}
             packages: &cupy_packages_cu12_aarch64
               - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
           - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
-          - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
+          - {matrix: {cuda: "12.2", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
 
           # All CUDA 11 + x86_64 versions
           - matrix: {cuda: "11.8", arch: x86_64}
             packages: &cupy_packages_cu11_x86_64
-              - cupy-cuda11x>=12.0.0
+              - cupy-cuda11x>=12.2.0
           - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
           - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
           - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
@@ -287,7 +310,7 @@ dependencies:
           - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
           - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
           - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: null, packages: [cupy-cuda11x>=12.0.0]}
+          - {matrix: null, packages: [cupy-cuda11x>=12.2.0]}
 
   test_libcuvs:
     common:
@@ -349,7 +372,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         matrices:
           - matrix:
-              cuda: "12.0"
+              cuda: "12.2"
             packages:
               - *cuda_python12
           - matrix: # All CUDA 11 versions
@@ -362,7 +385,7 @@ dependencies:
               - *rmm_cu12
               - *pylibraft_cu12
           - {matrix: {cuda: "12.1"}, packages: *run_cuvs_packages_cu12}
-          - {matrix: {cuda: "12.0"}, packages: *run_cuvs_packages_cu12}
+          - {matrix: {cuda: "12.2"}, packages: *run_cuvs_packages_cu12}
           - matrix: {cuda: "11.8"}
             packages: &run_cuvs_packages_cu11
               - *rmm_cu11
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 5e2aace5d..ed78ee18a 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -52,7 +52,7 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cupy-cuda11x>=12.0.0",
+    "cupy-cuda11x>=12.2.0",
     "pytest",
     "pytest-cov",
     "scikit-learn",

From efdaf580646080b2f1a6486c2699a7a00daac323 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 23:45:49 -0600
Subject: [PATCH 20/45] DBG more changes to dep file to make wheels happy

---
 .../all_cuda-118_arch-x86_64.yaml             |  30 +-
 .../all_cuda-120_arch-aarch64.yaml            |  51 ---
 .../all_cuda-120_arch-x86_64.yaml             |  51 ---
 .../all_cuda-122_arch-x86_64.yaml             |  30 +-
 .../clang_tidy_cuda-118_arch-x86_64.yaml      |  36 ++
 .../cpp_all_cuda-118_arch-x86_64.yaml         |  33 ++
 ...yaml => cpp_all_cuda-122_arch-x86_64.yaml} |  32 +-
 dependencies.yaml                             | 334 +++++++++---------
 python/cuvs/pyproject.toml                    |  15 +-
 9 files changed, 281 insertions(+), 331 deletions(-)
 delete mode 100644 conda/environments/all_cuda-120_arch-aarch64.yaml
 delete mode 100644 conda/environments/all_cuda-120_arch-x86_64.yaml
 create mode 100644 conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
 create mode 100644 conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
 rename conda/environments/{all_cuda-122_arch-aarch64.yaml => cpp_all_cuda-122_arch-x86_64.yaml} (51%)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 3f90d93b5..87bb5c48f 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -3,26 +3,25 @@
 channels:
 - rapidsai
 - rapidsai-nightly
-- dask/label/dev
 - conda-forge
 - nvidia
 dependencies:
-- breathe
 - c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
 - cmake>=3.26.4
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
-- cupy>=12.2.0
+- cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- doxygen>=1.8.20
+- dlpack
+- doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
 - graphviz
 - gtest>=1.13.0
+- hypothesis>=6.0,<7
+- ipykernel
 - ipython
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
@@ -34,20 +33,27 @@ dependencies:
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
+- libraft-headers==24.4.*
+- libraft==24.4.*
+- librmm==24.4.*
+- nbsphinx
 - ninja
-- numpy>=1.23
 - numpydoc
 - nvcc_linux-64=11.8
-- pre-commit
-- pydata-sphinx-theme
+- pydata-sphinx-theme!=0.14.2
 - pylibraft==24.4.*
-- pytest
+- pytest-benchmark
+- pytest-cases
 - pytest-cov
+- pytest-xdist
+- pytest==7.*
+- python>=3.9,<3.12
 - recommonmark
 - rmm==24.4.*
-- scikit-learn
-- scipy
+- scikit-build-core>=0.7.0
+- scikit-learn==1.2
 - sphinx-copybutton
 - sphinx-markdown-tables
+- sphinx<6
 - sysroot_linux-64==2.17
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-120_arch-aarch64.yaml
deleted file mode 100644
index 312238c5d..000000000
--- a/conda/environments/all_cuda-120_arch-aarch64.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.0
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-aarch64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest
-- pytest-cov
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core[pyproject]>=0.7.0
-- scikit-learn
-- scipy
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-aarch64==2.17
-name: all_cuda-120_arch-aarch64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
deleted file mode 100644
index 3bac8ff4b..000000000
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.0
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest
-- pytest-cov
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core[pyproject]>=0.7.0
-- scikit-learn
-- scipy
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-64==2.17
-name: all_cuda-120_arch-x86_64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 55ef10e38..d883ec234 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -3,47 +3,53 @@
 channels:
 - rapidsai
 - rapidsai-nightly
-- dask/label/dev
 - conda-forge
 - nvidia
 dependencies:
-- breathe
 - c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
 - cmake>=3.26.4
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-profiler-api
 - cuda-python>=12.0,<13.0a0
 - cuda-version=12.2
-- cupy>=12.2.0
+- cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- doxygen>=1.8.20
+- dlpack
+- doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
 - graphviz
 - gtest>=1.13.0
+- hypothesis>=6.0,<7
+- ipykernel
 - ipython
 - libcublas-dev
 - libcufft-dev
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libraft-headers==24.4.*
+- libraft==24.4.*
+- librmm==24.4.*
+- nbsphinx
 - ninja
-- numpy>=1.23
 - numpydoc
-- pre-commit
-- pydata-sphinx-theme
+- pydata-sphinx-theme!=0.14.2
 - pylibraft==24.4.*
-- pytest
+- pytest-benchmark
+- pytest-cases
 - pytest-cov
+- pytest-xdist
+- pytest==7.*
+- python>=3.9,<3.12
 - recommonmark
 - rmm==24.4.*
-- scikit-learn
-- scipy
+- scikit-build-core>=0.7.0
+- scikit-learn==1.2
 - sphinx-copybutton
 - sphinx-markdown-tables
+- sphinx<6
 - sysroot_linux-64==2.17
 name: all_cuda-122_arch-x86_64
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
new file mode 100644
index 000000000..0039af7d5
--- /dev/null
+++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -0,0 +1,36 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- clang-tools==15.0.7
+- clang==15.0.7
+- cmake>=3.26.4
+- cuda-version=11.8
+- cudatoolkit
+- cxx-compiler
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- gtest>=1.13.0
+- libcublas-dev=11.11.3.6
+- libcublas=11.11.3.6
+- libcufft-dev=10.9.0.58
+- libcufft=10.9.0.58
+- libcurand-dev=10.3.0.86
+- libcurand=10.3.0.86
+- libcusolver-dev=11.4.1.48
+- libcusolver=11.4.1.48
+- libcusparse-dev=11.7.5.86
+- libcusparse=11.7.5.86
+- libraft-headers==24.4.*
+- libraft==24.4.*
+- librmm==24.4.*
+- ninja
+- nvcc_linux-64=11.8
+- sysroot_linux-64==2.17
+- tomli
+name: clang_tidy_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
new file mode 100644
index 000000000..a94e1b542
--- /dev/null
+++ b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
@@ -0,0 +1,33 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- cmake>=3.26.4
+- cuda-version=11.8
+- cudatoolkit
+- cxx-compiler
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- gtest>=1.13.0
+- libcublas-dev=11.11.3.6
+- libcublas=11.11.3.6
+- libcufft-dev=10.9.0.58
+- libcufft=10.9.0.58
+- libcurand-dev=10.3.0.86
+- libcurand=10.3.0.86
+- libcusolver-dev=11.4.1.48
+- libcusolver=11.4.1.48
+- libcusparse-dev=11.7.5.86
+- libcusparse=11.7.5.86
+- libraft-headers==24.4.*
+- libraft==24.4.*
+- librmm==24.4.*
+- ninja
+- nvcc_linux-64=11.8
+- sysroot_linux-64==2.17
+name: cpp_all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/cpp_all_cuda-122_arch-x86_64.yaml
similarity index 51%
rename from conda/environments/all_cuda-122_arch-aarch64.yaml
rename to conda/environments/cpp_all_cuda-122_arch-x86_64.yaml
index af754dd35..ff417e96a 100644
--- a/conda/environments/all_cuda-122_arch-aarch64.yaml
+++ b/conda/environments/cpp_all_cuda-122_arch-x86_64.yaml
@@ -3,47 +3,27 @@
 channels:
 - rapidsai
 - rapidsai-nightly
-- dask/label/dev
 - conda-forge
 - nvidia
 dependencies:
-- breathe
 - c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
 - cmake>=3.26.4
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
 - cuda-version=12.2
-- cupy>=12.2.0
 - cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-aarch64=11.*
+- gcc_linux-64=11.*
 - gmock>=1.13.0
-- graphviz
 - gtest>=1.13.0
-- ipython
 - libcublas-dev
 - libcufft-dev
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libraft-headers==24.4.*
+- libraft==24.4.*
+- librmm==24.4.*
 - ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest
-- pytest-cov
-- recommonmark
-- rmm==24.4.*
-- scikit-learn
-- scipy
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-aarch64==2.17
-name: all_cuda-122_arch-aarch64
+- sysroot_linux-64==2.17
+name: cpp_all_cuda-122_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 29ab521dc..0a3918b40 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -4,94 +4,125 @@ files:
     output: conda
     matrix:
       cuda: ["11.8", "12.2"]
-      arch: [x86_64, aarch64]
+      arch: [x86_64]
     includes:
-      - build
-      - build_cuvs
+      - common_build
+      - cuda
       - cuda_version
+      - docs
+      - py_build
+      - py_run
+      - py_version
+      - test_python
+  cpp_all:
+    output: conda
+    matrix:
+      cuda: ["11.8", "12.2"]
+      arch: [x86_64]
+    includes:
+      - common_build
       - cuda
-      - develop
+      - cuda_version
+  checks:
+    output: none
+    includes:
       - checks
-      - build_wheels
-      - test_libcuvs
+      - py_version
+  clang_tidy:
+    output: conda
+    matrix:
+      cuda: ["11.8"]
+      arch: [x86_64]
+    includes:
+      - clang_tidy
+      - common_build
+      - cuda
+      - cuda_version
+  docs:
+    output: none
+    includes:
+      - cuda_version
       - docs
-      - run_cuvs
-      - test_python_common
-      - test_cuvs
-      - cupy
+      - py_version
   test_cpp:
     output: none
     includes:
-      - cuda
       - cuda_version
       - test_libcuvs
+      - test_cpp
   test_python:
     output: none
     includes:
-      - cuda
       - cuda_version
       - py_version
-      - test_python_common
       - test_cuvs
-      - cupy
-  checks:
+      - test_python
+  test_notebooks:
     output: none
     includes:
-      - checks
-      - py_version
-  docs:
-    output: none
-    includes:
-      - test_cuvs
-      - cupy
-      - cuda
       - cuda_version
-      - docs
+      - py_run
       - py_version
-  py_build_cuvs:
+      - test_cuvs
+      - test_notebooks
+  py_build:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: build-system
     includes:
-      - build
-      - build_cuvs
-      - build_wheels
-  py_run_cuvs:
+      - common_build
+      - py_build
+  py_run:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: project
     includes:
-      - run_cuvs
-  py_test_cuvs:
+      - py_run
+  py_test:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: project.optional-dependencies
       key: test
     includes:
-      - test_python_common
-      - test_cuvs
-      - cupy
+      - test_python
 channels:
   - rapidsai
   - rapidsai-nightly
-  - dask/label/dev
   - conda-forge
   - nvidia
 dependencies:
-  build:
+  checks:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - pre-commit
+  clang_tidy:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          # clang 15 required by libcudacxx.
+          - clang==15.0.7
+          - clang-tools==15.0.7
+          - ninja
+          - tomli
+  common_build:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cmake_ver cmake>=3.26.4
-          - cython>=3.0.0
           - ninja
-      - output_types: [conda]
+      - output_types: conda
         packages:
           - c-compiler
           - cxx-compiler
+          - gmock>=1.13.0
+          - gtest>=1.13.0
+          - libraft==24.4.*
+          - libraft-headers==24.4.*
+          - librmm==24.4.*
     specific:
       - output_types: conda
         matrices:
@@ -107,31 +138,31 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix: {cuda: "12.2"}
-            packages: [cuda-version=12.2, cuda-nvcc]
-          - matrix: {cuda: "11.8", arch: x86_64}
-            packages: [nvcc_linux-64=11.8]
-          - matrix: {cuda: "11.8", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.8]
-          - matrix: {cuda: "11.5", arch: x86_64}
-            packages: [nvcc_linux-64=11.5]
-          - matrix: {cuda: "11.5", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.5]
-          - matrix: {cuda: "11.4", arch: x86_64}
-            packages: [nvcc_linux-64=11.4]
-          - matrix: {cuda: "11.4", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.4]
-          - matrix: {cuda: "11.2", arch: x86_64}
-            packages: [nvcc_linux-64=11.2]
-          - matrix: {cuda: "11.2", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.2]
-
-  build_cuvs:
+          - matrix:
+              arch: x86_64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-64=11.8
+          - matrix:
+              arch: aarch64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-aarch64=11.8
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - cuda-nvcc
+  py_build:
     common:
-      - output_types: [conda]
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - cython>=3.0.0
+      - output_types: conda
         packages:
-          - &rmm_conda rmm==24.4.*
           - &pylibraft_conda pylibraft==24.4.*
+          - &rmm_conda rmm==24.4.*
+          - scikit-build-core>=0.7.0
+          - dlpack
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -147,32 +178,51 @@ dependencies:
           - matrix:
               cuda: "12.*"
             packages:
-              - &cuda_python12 cuda-python>=12.0,<13.0a0
+              - cuda-python>=12.0,<13.0a0
           - matrix: # All CUDA 11 versions
             packages:
-              - &cuda_python11 cuda-python>=11.7.1,<12.0a0
+              - cuda-python>=11.7.1,<12.0a0
       - output_types: [requirements, pyproject]
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - &pylibraft_cu12 pylibraft-cu12==24.4.*
-              - &rmm_cu12 rmm-cu12==24.4.*
+              - pylibraft-cu12==24.4.*
+              - rmm-cu12==24.4.*
           - matrix: {cuda: "11.*"}
             packages:
               - &pylibraft_cu11 pylibraft-cu11==24.4.*
               - &rmm_cu11 rmm-cu11==24.4.*
           - {matrix: null, packages: [*pylibraft_conda, *rmm_conda] }
-  checks:
-    common:
-      - output_types: [conda, requirements]
-        packages:
-          - pre-commit
-  develop:
+
+  py_run:
     common:
       - output_types: conda
         packages:
-          - clang==16.0.6
-          - clang-tools=16.0.6
+          - &cupy_conda cupy>=12.0.0
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for pylibraft and rmm.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix: {cuda: "12.*"}
+            packages:
+              - cupy-cuda12x>=12.0.0
+              - pylibraft-cu12==24.4.*
+              - rmm-cu12==24.4.*
+          - matrix: {cuda: "11.*"}
+            packages: &py_run_packages_cu11
+              - &cupy_pyproject_cu11 cupy-cuda11x>=12.0.0
+              - *pylibraft_cu11
+              - *rmm_cu11
+          - matrix: null
+            packages:
+              - *cupy_pyproject_cu11
+              - *pylibraft_conda
+              - *rmm_conda
   cuda_version:
     specific:
       - output_types: conda
@@ -273,71 +323,25 @@ dependencies:
               - *libcusolver114
               - *libcusparse_dev114
               - *libcusparse114
-
-  cupy:
-    common:
-      - output_types: conda
-        packages:
-          - cupy>=12.2.0
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          # All CUDA 12 + x86_64 versions
-          - matrix: {cuda: "12.2", arch: x86_64}
-            packages: &cupy_packages_cu12_x86_64
-              - &cupy_cu12_x86_64 cupy-cuda12x>=12.2.0
-          - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
-          - {matrix: {cuda: "12.2", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
-          # All CUDA 12 + aarch64 versions
-          - matrix: {cuda: "12.2", arch: aarch64}
-            packages: &cupy_packages_cu12_aarch64
-              - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
-          - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
-          - {matrix: {cuda: "12.2", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
-
-          # All CUDA 11 + x86_64 versions
-          - matrix: {cuda: "11.8", arch: x86_64}
-            packages: &cupy_packages_cu11_x86_64
-              - cupy-cuda11x>=12.2.0
-          - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-          - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-          - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-
-          # All CUDA 11 + aarch64 versions
-          - matrix: {cuda: "11.8", arch: aarch64}
-            packages: &cupy_packages_cu11_aarch64
-              - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
-          - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: null, packages: [cupy-cuda11x>=12.2.0]}
-
-  test_libcuvs:
-    common:
-      - output_types: [conda]
-        packages:
-          - *cmake_ver
-          - gtest>=1.13.0
-          - gmock>=1.13.0
   docs:
     common:
-      - output_types: [conda]
+      - output_types: [conda, requirements]
         packages:
-          - breathe
-          - doxygen>=1.8.20
           - graphviz
           - ipython
+          - ipykernel
+          - nbsphinx
           - numpydoc
-          - pydata-sphinx-theme
+          # https://github.com/pydata/pydata-sphinx-theme/issues/1539
+          - pydata-sphinx-theme!=0.14.2
           - recommonmark
+          - &scikit_learn scikit-learn==1.2
+          - sphinx<6
           - sphinx-copybutton
           - sphinx-markdown-tables
-  build_wheels:
-    common:
-      - output_types: [requirements, pyproject]
+      - output_types: conda
         packages:
-          - wheel
-          - setuptools
+          - doxygen=1.9.1
   py_version:
     specific:
       - output_types: conda
@@ -350,59 +354,47 @@ dependencies:
               py: "3.10"
             packages:
               - python=3.10
+          - matrix:
+              py: "3.11"
+            packages:
+              - python=3.11
           - matrix:
             packages:
-              - python>=3.9,<3.11
-  run_cuvs:
+              - python>=3.9,<3.12
+  test_libcuvs:
     common:
-      - output_types: [conda, pyproject]
+      - output_types: conda
         packages:
-          - &numpy numpy>=1.23
-      - output_types: [conda]
+          - libcuvs==24.4.*
+          - libcuvs-tests==24.4.*
+  test_cuvs:
+    common:
+      - output_types: conda
         packages:
-          - *rmm_conda
-          - *pylibraft_conda
-      - output_types: requirements
+          - libcuvs==24.4.*
+          - cuvs==24.4.*
+  test_cpp:
+    common:
+      - output_types: conda
         packages:
-          # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for cudf and rmm.
-          - --extra-index-url=https://pypi.nvidia.com
-          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-    specific:
-      - output_types: [conda, requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.2"
-            packages:
-              - *cuda_python12
-          - matrix: # All CUDA 11 versions
-            packages:
-              - *cuda_python11
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix: {cuda: "12.2"}
-            packages: &run_cuvs_packages_cu12
-              - *rmm_cu12
-              - *pylibraft_cu12
-          - {matrix: {cuda: "12.1"}, packages: *run_cuvs_packages_cu12}
-          - {matrix: {cuda: "12.2"}, packages: *run_cuvs_packages_cu12}
-          - matrix: {cuda: "11.8"}
-            packages: &run_cuvs_packages_cu11
-              - *rmm_cu11
-              - *pylibraft_cu11
-          - {matrix: {cuda: "11.5"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.4"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.2"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda]}
-  test_python_common:
+          - *cmake_ver
+  test_python:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - pytest
+          - hypothesis>=6.0,<7
+          - numpydoc
+          - pytest==7.*
+          - pytest-benchmark
+          - pytest-cases
           - pytest-cov
-  test_cuvs:
+          - pytest-xdist
+  test_notebooks:
     common:
-      - output_types: [conda, requirements, pyproject]
+      - output_types: [conda, requirements]
         packages:
-          - scikit-learn
-          - scipy
+          - jupyter
+          - matplotlib
+          - numpy
+          - pandas
+          - seaborn
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index ed78ee18a..3aa87316a 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -22,8 +22,6 @@ requires = [
     "pylibraft==24.4.*",
     "rmm==24.4.*",
     "scikit-build-core[pyproject]>=0.7.0",
-    "setuptools",
-    "wheel",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 build-backend = "scikit_build_core.build"
 
@@ -38,8 +36,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cuda-python>=11.7.1,<12.0a0",
-    "numpy>=1.23",
+    "cupy-cuda11x>=12.0.0",
     "pylibraft==24.4.*",
     "rmm==24.4.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
@@ -52,11 +49,13 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "cupy-cuda11x>=12.2.0",
-    "pytest",
+    "hypothesis>=6.0,<7",
+    "numpydoc",
+    "pytest-benchmark",
+    "pytest-cases",
     "pytest-cov",
-    "scikit-learn",
-    "scipy",
+    "pytest-xdist",
+    "pytest==7.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]

From bb0873824d722be1bde70f39ae55e791f592b58f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 23:48:46 -0600
Subject: [PATCH 21/45] FIX rdfg fixes

---
 .../all_cuda-118_arch-aarch64.yaml            | 53 -------------------
 .../all_cuda-122_arch-x86_64.yaml             |  2 +-
 dependencies.yaml                             | 19 ++++++-
 3 files changed, 18 insertions(+), 56 deletions(-)
 delete mode 100644 conda/environments/all_cuda-118_arch-aarch64.yaml

diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
deleted file mode 100644
index 41670448b..000000000
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-python>=11.7.1,<12.0a0
-- cuda-version=11.8
-- cudatoolkit
-- cupy>=12.2.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-aarch64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcufft-dev=10.9.0.58
-- libcufft=10.9.0.58
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- ninja
-- numpy>=1.23
-- numpydoc
-- nvcc_linux-aarch64=11.8
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest
-- pytest-cov
-- recommonmark
-- rmm==24.4.*
-- scikit-learn
-- scipy
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-aarch64==2.17
-name: all_cuda-118_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index d883ec234..836cc4fad 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -11,7 +11,7 @@ dependencies:
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
+- cuda-python>=11.7.1,<12.0a0
 - cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index 0a3918b40..42a3a5e2c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -149,7 +149,12 @@ dependencies:
             packages:
               - nvcc_linux-aarch64=11.8
           - matrix:
-              cuda: "12.*"
+              cuda: "12.0"
+            packages:
+              - cuda-nvcc
+          - matrix:
+              arch: x86_64
+              cuda: "12.2"
             packages:
               - cuda-nvcc
   py_build:
@@ -256,7 +261,17 @@ dependencies:
       - output_types: conda
         matrices:
           - matrix:
-              cuda: "12.*"
+              cuda: "12.2"
+            packages:
+              - cuda-cudart-dev
+              - cuda-profiler-api
+              - libcublas-dev
+              - libcufft-dev
+              - libcurand-dev
+              - libcusolver-dev
+              - libcusparse-dev
+          - matrix:
+              cuda: "12.0"
             packages:
               - cuda-cudart-dev
               - cuda-profiler-api

From dd8e0b0d41f749d6e9b18b12db5f4e6f8c45e5a8 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 29 Feb 2024 23:59:47 -0600
Subject: [PATCH 22/45] FIX rdfg fixes

---
 conda/environments/all_cuda-122_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 836cc4fad..d883ec234 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -11,7 +11,7 @@ dependencies:
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-profiler-api
-- cuda-python>=11.7.1,<12.0a0
+- cuda-python>=12.0,<13.0a0
 - cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index 42a3a5e2c..99a32316d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -189,7 +189,7 @@ dependencies:
               - cuda-python>=11.7.1,<12.0a0
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.*"}
+          - matrix: {cuda: "12.2"}
             packages:
               - pylibraft-cu12==24.4.*
               - rmm-cu12==24.4.*

From bcccc6f0d1cd200d1c643abfaf54a7babdfcafd0 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 00:04:00 -0600
Subject: [PATCH 23/45] FIX rdfg fixes

---
 conda/environments/all_cuda-122_arch-x86_64.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index d883ec234..836cc4fad 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -11,7 +11,7 @@ dependencies:
 - cuda-cudart-dev
 - cuda-nvcc
 - cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
+- cuda-python>=11.7.1,<12.0a0
 - cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler

From 3c4ef522816d24b2f5ebee54df7447e8ea912016 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 12:31:21 -0600
Subject: [PATCH 24/45] FIX wheel CI fixes

---
 ci/build_wheel.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 3cc7f06c8..680467f87 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -38,9 +38,14 @@ fi
 
 sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
 
-if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
-    sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
-    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
+# if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+#     sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+#     sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
+# fi
+
+if [[ ${package_name} == "raft-dask" ]]; then
+    sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+    sed -r -i "s/rmm==(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
 fi
 
 cd "${package_dir}"

From 95a671523b99447b728e2b8b0c6f4287db9f033a Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 12:39:31 -0600
Subject: [PATCH 25/45] FIX remove conditional in wheel CI

---
 ci/build_wheel.sh | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 680467f87..54caf68d7 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -38,15 +38,8 @@ fi
 
 sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
 
-# if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
-#     sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
-#     sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
-# fi
-
-if [[ ${package_name} == "raft-dask" ]]; then
-    sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
-    sed -r -i "s/rmm==(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
-fi
+sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+sed -r -i "s/rmm==(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
 
 cd "${package_dir}"
 

From 27774df159584baebc579451266336b6ef5ad830 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 13:01:23 -0600
Subject: [PATCH 26/45] FIX undo prior changes to dependencies and update it
 now that wheel patches are in CI script

---
 .../all_cuda-118_arch-aarch64.yaml            |  53 +++
 .../all_cuda-118_arch-x86_64.yaml             |  28 +-
 .../all_cuda-122_arch-aarch64.yaml            |  49 +++
 .../all_cuda-122_arch-x86_64.yaml             |  28 +-
 dependencies.yaml                             | 345 ++++++++----------
 python/cuvs/pyproject.toml                    |  11 +-
 6 files changed, 283 insertions(+), 231 deletions(-)
 create mode 100644 conda/environments/all_cuda-118_arch-aarch64.yaml
 create mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml

diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
new file mode 100644
index 000000000..82d37bef0
--- /dev/null
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -0,0 +1,53 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-nvtx=11.8
+- cuda-profiler-api=11.8.86
+- cuda-python>=11.7.1,<12.0a0
+- cuda-version=11.8
+- cudatoolkit
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-aarch64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev=11.11.3.6
+- libcublas=11.11.3.6
+- libcurand-dev=10.3.0.86
+- libcurand=10.3.0.86
+- libcusolver-dev=11.4.1.48
+- libcusolver=11.4.1.48
+- libcusparse-dev=11.7.5.86
+- libcusparse=11.7.5.86
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- nvcc_linux-aarch64=11.8
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-aarch64==2.17
+name: all_cuda-118_arch-aarch64
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 87bb5c48f..f1ed30bf9 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -3,57 +3,51 @@
 channels:
 - rapidsai
 - rapidsai-nightly
+- dask/label/dev
 - conda-forge
 - nvidia
 dependencies:
+- breathe
 - c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
 - cmake>=3.26.4
+- cuda-nvtx=11.8
+- cuda-profiler-api=11.8.86
 - cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dlpack
-- doxygen=1.9.1
+- doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
 - graphviz
 - gtest>=1.13.0
-- hypothesis>=6.0,<7
-- ipykernel
 - ipython
 - libcublas-dev=11.11.3.6
 - libcublas=11.11.3.6
-- libcufft-dev=10.9.0.58
-- libcufft=10.9.0.58
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
 - libcusolver-dev=11.4.1.48
 - libcusolver=11.4.1.48
 - libcusparse-dev=11.7.5.86
 - libcusparse=11.7.5.86
-- libraft-headers==24.4.*
-- libraft==24.4.*
-- librmm==24.4.*
-- nbsphinx
+- nccl>=2.9.9
 - ninja
+- numpy>=1.23
 - numpydoc
 - nvcc_linux-64=11.8
-- pydata-sphinx-theme!=0.14.2
+- pre-commit
+- pydata-sphinx-theme
 - pylibraft==24.4.*
-- pytest-benchmark
-- pytest-cases
 - pytest-cov
-- pytest-xdist
 - pytest==7.*
-- python>=3.9,<3.12
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
-- scikit-learn==1.2
 - sphinx-copybutton
 - sphinx-markdown-tables
-- sphinx<6
 - sysroot_linux-64==2.17
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
new file mode 100644
index 000000000..759ad2f88
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-aarch64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-aarch64==2.17
+name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 836cc4fad..e28e8671e 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -3,53 +3,47 @@
 channels:
 - rapidsai
 - rapidsai-nightly
+- dask/label/dev
 - conda-forge
 - nvidia
 dependencies:
+- breathe
 - c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
 - cmake>=3.26.4
 - cuda-cudart-dev
 - cuda-nvcc
+- cuda-nvtx-dev
 - cuda-profiler-api
-- cuda-python>=11.7.1,<12.0a0
+- cuda-python>=12.0,<13.0a0
 - cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dlpack
-- doxygen=1.9.1
+- doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
 - graphviz
 - gtest>=1.13.0
-- hypothesis>=6.0,<7
-- ipykernel
 - ipython
 - libcublas-dev
-- libcufft-dev
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==24.4.*
-- libraft==24.4.*
-- librmm==24.4.*
-- nbsphinx
+- nccl>=2.9.9
 - ninja
+- numpy>=1.23
 - numpydoc
-- pydata-sphinx-theme!=0.14.2
+- pre-commit
+- pydata-sphinx-theme
 - pylibraft==24.4.*
-- pytest-benchmark
-- pytest-cases
 - pytest-cov
-- pytest-xdist
 - pytest==7.*
-- python>=3.9,<3.12
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
-- scikit-learn==1.2
 - sphinx-copybutton
 - sphinx-markdown-tables
-- sphinx<6
 - sysroot_linux-64==2.17
 name: all_cuda-122_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 99a32316d..26f6428f1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -4,125 +4,95 @@ files:
     output: conda
     matrix:
       cuda: ["11.8", "12.2"]
-      arch: [x86_64]
+      arch: [x86_64, aarch64]
     includes:
-      - common_build
+      - build
+      - build_py_cuvs
       - cuda
       - cuda_version
-      - docs
-      - py_build
-      - py_run
-      - py_version
-      - test_python
-  cpp_all:
-    output: conda
-    matrix:
-      cuda: ["11.8", "12.2"]
-      arch: [x86_64]
-    includes:
-      - common_build
-      - cuda
-      - cuda_version
-  checks:
-    output: none
-    includes:
+      - develop
       - checks
-      - py_version
-  clang_tidy:
-    output: conda
-    matrix:
-      cuda: ["11.8"]
-      arch: [x86_64]
-    includes:
-      - clang_tidy
-      - common_build
-      - cuda
-      - cuda_version
-  docs:
-    output: none
-    includes:
-      - cuda_version
+      - build_wheels
+      - test_libcuvs
       - docs
-      - py_version
+      - run_py_cuvs
+      - test_python_common
+      - test_py_cuvs
+      - cupy
   test_cpp:
     output: none
     includes:
       - cuda_version
       - test_libcuvs
-      - test_cpp
   test_python:
     output: none
     includes:
       - cuda_version
       - py_version
-      - test_cuvs
-      - test_python
-  test_notebooks:
+      - test_python_common
+      - test_py_cuvs
+      - cupy
+  checks:
+    output: none
+    includes:
+      - checks
+      - py_version
+  docs:
     output: none
     includes:
       - cuda_version
-      - py_run
+      - cupy
+      - docs
       - py_version
-      - test_cuvs
-      - test_notebooks
-  py_build:
+      - test_py_cuvs
+  py_build_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: build-system
     includes:
-      - common_build
-      - py_build
-  py_run:
+      - build
+      - build_py_cuvs
+  py_run_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: project
     includes:
-      - py_run
-  py_test:
+      - run_py_cuvs
+  py_test_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: project.optional-dependencies
       key: test
     includes:
-      - test_python
+      - test_python_common
+      - test_py_cuvs
+      - cupy
 channels:
   - rapidsai
   - rapidsai-nightly
+  - dask/label/dev
   - conda-forge
   - nvidia
 dependencies:
-  checks:
-    common:
-      - output_types: [conda, requirements]
-        packages:
-          - pre-commit
-  clang_tidy:
-    common:
-      - output_types: [conda, requirements]
-        packages:
-          # clang 15 required by libcudacxx.
-          - clang==15.0.7
-          - clang-tools==15.0.7
-          - ninja
-          - tomli
-  common_build:
+  build:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &cmake_ver cmake>=3.26.4
+          - cython>=3.0.0
           - ninja
-      - output_types: conda
+      - output_types: [conda]
         packages:
           - c-compiler
           - cxx-compiler
-          - gmock>=1.13.0
-          - gtest>=1.13.0
-          - libraft==24.4.*
-          - libraft-headers==24.4.*
-          - librmm==24.4.*
+          - nccl>=2.9.9
+          - scikit-build-core>=0.7.0
+      - output_types: [requirements, pyproject]
+        packages:
+          - scikit-build-core[pyproject]>=0.7.0
     specific:
       - output_types: conda
         matrices:
@@ -138,96 +108,69 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix:
-              arch: x86_64
-              cuda: "11.8"
-            packages:
-              - nvcc_linux-64=11.8
-          - matrix:
-              arch: aarch64
-              cuda: "11.8"
-            packages:
-              - nvcc_linux-aarch64=11.8
-          - matrix:
-              cuda: "12.0"
-            packages:
-              - cuda-nvcc
-          - matrix:
-              arch: x86_64
-              cuda: "12.2"
-            packages:
-              - cuda-nvcc
-  py_build:
+          - matrix: {cuda: "12.*"}
+            packages: [cuda-nvcc]
+          - matrix: {cuda: "11.8", arch: x86_64}
+            packages: [nvcc_linux-64=11.8]
+          - matrix: {cuda: "11.8", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.8]
+          - matrix: {cuda: "11.5", arch: x86_64}
+            packages: [nvcc_linux-64=11.5]
+          - matrix: {cuda: "11.5", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.5]
+          - matrix: {cuda: "11.4", arch: x86_64}
+            packages: [nvcc_linux-64=11.4]
+          - matrix: {cuda: "11.4", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.4]
+          - matrix: {cuda: "11.2", arch: x86_64}
+            packages: [nvcc_linux-64=11.2]
+          - matrix: {cuda: "11.2", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.2]
+
+  build_py_cuvs:
     common:
-      - output_types: [conda, requirements, pyproject]
-        packages:
-          - cython>=3.0.0
-      - output_types: conda
+      - output_types: [conda]
         packages:
-          - &pylibraft_conda pylibraft==24.4.*
           - &rmm_conda rmm==24.4.*
-          - scikit-build-core>=0.7.0
-          - dlpack
+          - &pylibraft_conda pylibraft==24.4.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for pylibraft and rmm.
+          # This index is needed for rmm-cu{11,12}.
           - --extra-index-url=https://pypi.nvidia.com
           - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-      - output_types: [pyproject, requirements]
-        packages:
-          - scikit-build-core[pyproject]>=0.7.0
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
           - matrix:
               cuda: "12.*"
             packages:
-              - cuda-python>=12.0,<13.0a0
+              - &cuda_python12 cuda-python>=12.0,<13.0a0
           - matrix: # All CUDA 11 versions
             packages:
-              - cuda-python>=11.7.1,<12.0a0
+              - &cuda_python11 cuda-python>=11.7.1,<12.0a0
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.2"}
+          - matrix: {cuda: "12.*"}
             packages:
-              - pylibraft-cu12==24.4.*
-              - rmm-cu12==24.4.*
+              - &rmm_cu12 rmm-cu12==24.4.*
+              - &pylibraft_cu12 pylibraft-cu12==24.4.*
           - matrix: {cuda: "11.*"}
             packages:
-              - &pylibraft_cu11 pylibraft-cu11==24.4.*
               - &rmm_cu11 rmm-cu11==24.4.*
-          - {matrix: null, packages: [*pylibraft_conda, *rmm_conda] }
-
-  py_run:
+              - &pylibraft_cu11 pylibraft-cu11==24.4.*
+          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda] }
+  checks:
     common:
-      - output_types: conda
+      - output_types: [conda, requirements]
         packages:
-          - &cupy_conda cupy>=12.0.0
-      - output_types: requirements
+          - pre-commit
+  develop:
+    common:
+      - output_types: conda
         packages:
-          # pip recognizes the index as a global option for the requirements.txt file
-          # This index is needed for pylibraft and rmm.
-          - --extra-index-url=https://pypi.nvidia.com
-          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix: {cuda: "12.*"}
-            packages:
-              - cupy-cuda12x>=12.0.0
-              - pylibraft-cu12==24.4.*
-              - rmm-cu12==24.4.*
-          - matrix: {cuda: "11.*"}
-            packages: &py_run_packages_cu11
-              - &cupy_pyproject_cu11 cupy-cuda11x>=12.0.0
-              - *pylibraft_cu11
-              - *rmm_cu11
-          - matrix: null
-            packages:
-              - *cupy_pyproject_cu11
-              - *pylibraft_conda
-              - *rmm_conda
+          - clang==16.0.6
+          - clang-tools=16.0.6
   cuda_version:
     specific:
       - output_types: conda
@@ -261,22 +204,12 @@ dependencies:
       - output_types: conda
         matrices:
           - matrix:
-              cuda: "12.2"
-            packages:
-              - cuda-cudart-dev
-              - cuda-profiler-api
-              - libcublas-dev
-              - libcufft-dev
-              - libcurand-dev
-              - libcusolver-dev
-              - libcusparse-dev
-          - matrix:
-              cuda: "12.0"
+              cuda: "12.*"
             packages:
+              - cuda-nvtx-dev
               - cuda-cudart-dev
               - cuda-profiler-api
               - libcublas-dev
-              - libcufft-dev
               - libcurand-dev
               - libcusolver-dev
               - libcusparse-dev
@@ -284,10 +217,10 @@ dependencies:
               cuda: "11.8"
             packages:
               - cudatoolkit
+              - cuda-nvtx=11.8
+              - cuda-profiler-api=11.8.86
               - libcublas-dev=11.11.3.6
               - libcublas=11.11.3.6
-              - libcufft-dev=10.9.0.58
-              - libcufft=10.9.0.58
               - libcurand-dev=10.3.0.86
               - libcurand=10.3.0.86
               - libcusolver-dev=11.4.1.48
@@ -298,10 +231,10 @@ dependencies:
               cuda: "11.5"
             packages:
               - cudatoolkit
+              - cuda-nvtx=11.5
+              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               - libcublas-dev>=11.7.3.1,<=11.7.4.6
               - libcublas>=11.7.3.1,<=11.7.4.6
-              - libcufft-dev>=10.6.0.54,<=10.6.0.107
-              - libcufft>=10.6.0.54,<=10.6.0.107
               - libcurand-dev>=10.2.6.48,<=10.2.7.107
               - libcurand>=10.2.6.48,<=10.2.7.107
               - libcusolver-dev>=11.2.1.48,<=11.3.2.107
@@ -312,10 +245,10 @@ dependencies:
               cuda: "11.4"
             packages:
               - cudatoolkit
+              - &cudanvtx114 cuda-nvtx=11.4
+              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               - &libcublas_dev114 libcublas-dev>=11.5.2.43,<=11.6.5.2
               - &libcublas114 libcublas>=11.5.2.43,<=11.6.5.2
-              - &libcufft_dev114 libcufft-dev>=10.5.0.43,<=10.5.2.100
-              - &libcufft114 libcufft>=10.5.0.43,<=10.5.2.100
               - &libcurand_dev114 libcurand-dev>=10.2.5.43,<=10.2.5.120
               - &libcurand114 libcurand>=10.2.5.43,<=10.2.5.120
               - &libcusolver_dev114 libcusolver-dev>=11.2.0.43,<=11.2.0.120
@@ -326,37 +259,61 @@ dependencies:
               cuda: "11.2"
             packages:
               - cudatoolkit
+              - *cudanvtx114
+              - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
               # The NVIDIA channel doesn't publish pkgs older than 11.4 for these libs,
               # so 11.2 uses 11.4 packages (the oldest available).
               - *libcublas_dev114
               - *libcublas114
-              - *libcufft_dev114
-              - *libcufft114
               - *libcurand_dev114
               - *libcurand114
               - *libcusolver_dev114
               - *libcusolver114
               - *libcusparse_dev114
               - *libcusparse114
+
+  cupy:
+    common:
+      - output_types: conda
+        packages:
+          - cupy>=12.0.0
+    specific:
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix: {cuda: "12.*"}
+            packages:
+              - cupy-cuda12x>=12.0.0
+          - matrix: {cuda: "11.*"}
+            packages:
+              - cupy-cuda11x>=12.0.0
+          - {matrix: null, packages: [cupy-cuda11x>=12.0.0]}
+
+  test_libcuvs:
+    common:
+      - output_types: [conda]
+        packages:
+          - *cmake_ver
+          - gtest>=1.13.0
+          - gmock>=1.13.0
   docs:
     common:
-      - output_types: [conda, requirements]
+      - output_types: [conda]
         packages:
+          - breathe
+          - doxygen>=1.8.20
           - graphviz
           - ipython
-          - ipykernel
-          - nbsphinx
           - numpydoc
-          # https://github.com/pydata/pydata-sphinx-theme/issues/1539
-          - pydata-sphinx-theme!=0.14.2
+          - pydata-sphinx-theme
           - recommonmark
-          - &scikit_learn scikit-learn==1.2
-          - sphinx<6
           - sphinx-copybutton
           - sphinx-markdown-tables
-      - output_types: conda
+  build_wheels:
+    common:
+      - output_types: [requirements, pyproject]
         packages:
-          - doxygen=1.9.1
+          - wheel
+          - setuptools
   py_version:
     specific:
       - output_types: conda
@@ -376,40 +333,48 @@ dependencies:
           - matrix:
             packages:
               - python>=3.9,<3.12
-  test_libcuvs:
+  run_py_cuvs:
     common:
-      - output_types: conda
+      - output_types: [conda, pyproject]
         packages:
-          - libcuvs==24.4.*
-          - libcuvs-tests==24.4.*
-  test_cuvs:
-    common:
-      - output_types: conda
+          - &numpy numpy>=1.23
+      - output_types: [conda]
         packages:
-          - libcuvs==24.4.*
-          - cuvs==24.4.*
-  test_cpp:
-    common:
-      - output_types: conda
+          - *rmm_conda
+          - *pylibraft_conda
+      - output_types: requirements
         packages:
-          - *cmake_ver
-  test_python:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for cudf and rmm.
+          - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - *cuda_python12
+          - matrix: # All CUDA 11 versions
+            packages:
+              - *cuda_python11
+      - output_types: [requirements, pyproject]
+        matrices:
+          - matrix: {cuda: "12.*"}
+            packages:
+              - *pylibraft_cu12
+          - matrix: {cuda: "11.*"}
+            packages:
+              - *pylibraft_cu11
+          - {matrix: null, packages: [*rmm_conda]}
+  test_python_common:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - hypothesis>=6.0,<7
-          - numpydoc
           - pytest==7.*
-          - pytest-benchmark
-          - pytest-cases
           - pytest-cov
-          - pytest-xdist
-  test_notebooks:
+  test_py_cuvs:
     common:
-      - output_types: [conda, requirements]
+      - output_types: [conda, requirements, pyproject]
         packages:
-          - jupyter
-          - matplotlib
-          - numpy
-          - pandas
-          - seaborn
+          - *pylibraft_conda
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index 3aa87316a..df201e95c 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -36,8 +36,8 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cupy-cuda11x>=12.0.0",
-    "pylibraft==24.4.*",
+    "cuda-python>=11.7.1,<12.0a0",
+    "numpy>=1.23",
     "rmm==24.4.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -49,12 +49,9 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "hypothesis>=6.0,<7",
-    "numpydoc",
-    "pytest-benchmark",
-    "pytest-cases",
+    "cupy-cuda11x>=12.0.0",
+    "pylibraft==24.4.*",
     "pytest-cov",
-    "pytest-xdist",
     "pytest==7.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 

From fca0d2e645594048a9e3d813385dac487ef6fbfd Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 13:04:21 -0600
Subject: [PATCH 27/45] FIX style fixes

---
 .../all_cuda-122_arch-aarch64.yaml            | 49 -------------------
 .../all_cuda-122_arch-x86_64.yaml             | 49 -------------------
 .../clang_tidy_cuda-118_arch-x86_64.yaml      | 36 --------------
 .../cpp_all_cuda-118_arch-x86_64.yaml         | 33 -------------
 .../cpp_all_cuda-122_arch-x86_64.yaml         | 29 -----------
 5 files changed, 196 deletions(-)
 delete mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml
 delete mode 100644 conda/environments/all_cuda-122_arch-x86_64.yaml
 delete mode 100644 conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
 delete mode 100644 conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
 delete mode 100644 conda/environments/cpp_all_cuda-122_arch-x86_64.yaml

diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
deleted file mode 100644
index 759ad2f88..000000000
--- a/conda/environments/all_cuda-122_arch-aarch64.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.2
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-aarch64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest-cov
-- pytest==7.*
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core>=0.7.0
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-aarch64==2.17
-name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
deleted file mode 100644
index e28e8671e..000000000
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.2
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest-cov
-- pytest==7.*
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core>=0.7.0
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-64==2.17
-name: all_cuda-122_arch-x86_64
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
deleted file mode 100644
index 0039af7d5..000000000
--- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- conda-forge
-- nvidia
-dependencies:
-- c-compiler
-- clang-tools==15.0.7
-- clang==15.0.7
-- cmake>=3.26.4
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- gtest>=1.13.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcufft-dev=10.9.0.58
-- libcufft=10.9.0.58
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libraft-headers==24.4.*
-- libraft==24.4.*
-- librmm==24.4.*
-- ninja
-- nvcc_linux-64=11.8
-- sysroot_linux-64==2.17
-- tomli
-name: clang_tidy_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
deleted file mode 100644
index a94e1b542..000000000
--- a/conda/environments/cpp_all_cuda-118_arch-x86_64.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- conda-forge
-- nvidia
-dependencies:
-- c-compiler
-- cmake>=3.26.4
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- gtest>=1.13.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcufft-dev=10.9.0.58
-- libcufft=10.9.0.58
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libraft-headers==24.4.*
-- libraft==24.4.*
-- librmm==24.4.*
-- ninja
-- nvcc_linux-64=11.8
-- sysroot_linux-64==2.17
-name: cpp_all_cuda-118_arch-x86_64
diff --git a/conda/environments/cpp_all_cuda-122_arch-x86_64.yaml b/conda/environments/cpp_all_cuda-122_arch-x86_64.yaml
deleted file mode 100644
index ff417e96a..000000000
--- a/conda/environments/cpp_all_cuda-122_arch-x86_64.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- conda-forge
-- nvidia
-dependencies:
-- c-compiler
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-profiler-api
-- cuda-version=12.2
-- cxx-compiler
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- gtest>=1.13.0
-- libcublas-dev
-- libcufft-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- libraft-headers==24.4.*
-- libraft==24.4.*
-- librmm==24.4.*
-- ninja
-- sysroot_linux-64==2.17
-name: cpp_all_cuda-122_arch-x86_64

From afc9b942a24f82b54352efe83de6a449a77cf21c Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 13:11:28 -0600
Subject: [PATCH 28/45] FIX simplify matrix of build in deps

---
 .../all_cuda-122_arch-aarch64.yaml            | 49 +++++++++++++++++++
 .../all_cuda-122_arch-x86_64.yaml             | 49 +++++++++++++++++++
 dependencies.yaml                             | 32 ++++++------
 3 files changed, 112 insertions(+), 18 deletions(-)
 create mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml
 create mode 100644 conda/environments/all_cuda-122_arch-x86_64.yaml

diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
new file mode 100644
index 000000000..759ad2f88
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-aarch64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-aarch64==2.17
+name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
new file mode 100644
index 000000000..e28e8671e
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-64==2.17
+name: all_cuda-122_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 26f6428f1..252c66602 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -108,24 +108,20 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix: {cuda: "12.*"}
-            packages: [cuda-nvcc]
-          - matrix: {cuda: "11.8", arch: x86_64}
-            packages: [nvcc_linux-64=11.8]
-          - matrix: {cuda: "11.8", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.8]
-          - matrix: {cuda: "11.5", arch: x86_64}
-            packages: [nvcc_linux-64=11.5]
-          - matrix: {cuda: "11.5", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.5]
-          - matrix: {cuda: "11.4", arch: x86_64}
-            packages: [nvcc_linux-64=11.4]
-          - matrix: {cuda: "11.4", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.4]
-          - matrix: {cuda: "11.2", arch: x86_64}
-            packages: [nvcc_linux-64=11.2]
-          - matrix: {cuda: "11.2", arch: aarch64}
-            packages: [nvcc_linux-aarch64=11.2]
+          - matrix:
+              arch: x86_64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-64=11.8
+          - matrix:
+              arch: aarch64
+              cuda: "11.8"
+            packages:
+              - nvcc_linux-aarch64=11.8
+          - matrix:
+              cuda: "12.*"
+            packages:
+              - cuda-nvcc
 
   build_py_cuvs:
     common:

From 109e09db14dba4a9dd200512964236adaaf4d69f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 13:14:58 -0600
Subject: [PATCH 29/45] DBG deps debugging

---
 .../all_cuda-122_arch-aarch64.yaml            | 49 -------------------
 .../all_cuda-122_arch-x86_64.yaml             | 49 -------------------
 dependencies.yaml                             | 32 ++++++------
 3 files changed, 18 insertions(+), 112 deletions(-)
 delete mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml
 delete mode 100644 conda/environments/all_cuda-122_arch-x86_64.yaml

diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
deleted file mode 100644
index 759ad2f88..000000000
--- a/conda/environments/all_cuda-122_arch-aarch64.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.2
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-aarch64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest-cov
-- pytest==7.*
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core>=0.7.0
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-aarch64==2.17
-name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
deleted file mode 100644
index e28e8671e..000000000
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- breathe
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-python>=12.0,<13.0a0
-- cuda-version=12.2
-- cupy>=12.0.0
-- cxx-compiler
-- cython>=3.0.0
-- doxygen>=1.8.20
-- gcc_linux-64=11.*
-- gmock>=1.13.0
-- graphviz
-- gtest>=1.13.0
-- ipython
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- nccl>=2.9.9
-- ninja
-- numpy>=1.23
-- numpydoc
-- pre-commit
-- pydata-sphinx-theme
-- pylibraft==24.4.*
-- pytest-cov
-- pytest==7.*
-- recommonmark
-- rmm==24.4.*
-- scikit-build-core>=0.7.0
-- sphinx-copybutton
-- sphinx-markdown-tables
-- sysroot_linux-64==2.17
-name: all_cuda-122_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 252c66602..26f6428f1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -108,20 +108,24 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix:
-              arch: x86_64
-              cuda: "11.8"
-            packages:
-              - nvcc_linux-64=11.8
-          - matrix:
-              arch: aarch64
-              cuda: "11.8"
-            packages:
-              - nvcc_linux-aarch64=11.8
-          - matrix:
-              cuda: "12.*"
-            packages:
-              - cuda-nvcc
+          - matrix: {cuda: "12.*"}
+            packages: [cuda-nvcc]
+          - matrix: {cuda: "11.8", arch: x86_64}
+            packages: [nvcc_linux-64=11.8]
+          - matrix: {cuda: "11.8", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.8]
+          - matrix: {cuda: "11.5", arch: x86_64}
+            packages: [nvcc_linux-64=11.5]
+          - matrix: {cuda: "11.5", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.5]
+          - matrix: {cuda: "11.4", arch: x86_64}
+            packages: [nvcc_linux-64=11.4]
+          - matrix: {cuda: "11.4", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.4]
+          - matrix: {cuda: "11.2", arch: x86_64}
+            packages: [nvcc_linux-64=11.2]
+          - matrix: {cuda: "11.2", arch: aarch64}
+            packages: [nvcc_linux-aarch64=11.2]
 
   build_py_cuvs:
     common:

From d1c06d666553c1b201692c4d97bf924f768c42ea Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 18:02:34 -0600
Subject: [PATCH 30/45] FIX pre-commit hook was pointing to old version of rfdg

---
 .pre-commit-config.yaml                       |  2 +-
 .../all_cuda-122_arch-aarch64.yaml            | 49 +++++++++++++++++++
 .../all_cuda-122_arch-x86_64.yaml             | 49 +++++++++++++++++++
 3 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 conda/environments/all_cuda-122_arch-aarch64.yaml
 create mode 100644 conda/environments/all_cuda-122_arch-x86_64.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9e3b1a38b..a82fb74a5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -99,7 +99,7 @@ repos:
                 args: ["--toml", "pyproject.toml"]
                 exclude: (?x)^(^CHANGELOG.md$)
       - repo: https://github.com/rapidsai/dependency-file-generator
-        rev: v1.5.1
+        rev: v1.8.0
         hooks:
             - id: rapids-dependency-file-generator
               args: ["--clean"]
diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
new file mode 100644
index 000000000..759ad2f88
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-aarch64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-aarch64==2.17
+name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
new file mode 100644
index 000000000..e28e8671e
--- /dev/null
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -0,0 +1,49 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- breathe
+- c-compiler
+- clang-tools=16.0.6
+- clang==16.0.6
+- cmake>=3.26.4
+- cuda-cudart-dev
+- cuda-nvcc
+- cuda-nvtx-dev
+- cuda-profiler-api
+- cuda-python>=12.0,<13.0a0
+- cuda-version=12.2
+- cupy>=12.0.0
+- cxx-compiler
+- cython>=3.0.0
+- doxygen>=1.8.20
+- gcc_linux-64=11.*
+- gmock>=1.13.0
+- graphviz
+- gtest>=1.13.0
+- ipython
+- libcublas-dev
+- libcurand-dev
+- libcusolver-dev
+- libcusparse-dev
+- nccl>=2.9.9
+- ninja
+- numpy>=1.23
+- numpydoc
+- pre-commit
+- pydata-sphinx-theme
+- pylibraft==24.4.*
+- pytest-cov
+- pytest==7.*
+- recommonmark
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sysroot_linux-64==2.17
+name: all_cuda-122_arch-x86_64

From 41be87054651bc260153013c17ff05534b60b468 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Mar 2024 19:39:45 -0600
Subject: [PATCH 31/45] FIX CI fixes

---
 ci/build_wheel.sh            | 9 ++++++---
 conda/recipes/cuvs/meta.yaml | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 54caf68d7..8b89aa345 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -37,9 +37,12 @@ if ! rapids-is-release-build; then
 fi
 
 sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
+sed -r -i "s/pylibraft(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
 
-sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
-sed -r -i "s/rmm==(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+    sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+fi
 
 cd "${package_dir}"
 
diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index 19a3d5a2c..d7487bf98 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -46,7 +46,7 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
     - cython >=3.0.0
-    - pylibraft {{ version }}
+    - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
     - python x.x
     - rmm ={{ minor_version }}
@@ -57,7 +57,7 @@ requirements:
     {% if cuda_major == "11" %}
     - cudatoolkit
     {% endif %}
-    - pylibraft {{ version }}
+    - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
     - python x.x
     - rmm ={{ minor_version }}

From d2f0ffb7ae3a1f75fd5484dfdd77c5531c092bd3 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sat, 2 Mar 2024 11:58:46 -0600
Subject: [PATCH 32/45] FIX CI fixes

---
 ci/build_wheel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 8b89aa345..8632d8bc3 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 

From 835078f36cf8c277bd251eb1f10266af654e76c3 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sat, 2 Mar 2024 16:28:45 -0600
Subject: [PATCH 33/45] FIX passing c api flag from python to build it in wheel
 builds

---
 conda/recipes/cuvs/meta.yaml | 1 +
 python/cuvs/CMakeLists.txt   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index d7487bf98..435786b69 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -46,6 +46,7 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
     - cython >=3.0.0
+    - dlpack >= 0.8
     - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
     - python x.x
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 9a43eba0c..c7b5e140b 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -57,6 +57,7 @@ endif()
 
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
+  set(BUILD_C_LIBRARY ON)
   set(CUDA_STATIC_RUNTIME ON)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)

From 940370d5f2b205cddec30e1e3b4520785ae4f82a Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sat, 2 Mar 2024 17:20:39 -0600
Subject: [PATCH 34/45] FIX Install libcuvs_c.so inside the wheel

---
 python/cuvs/CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index c7b5e140b..d23d43d0b 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -62,10 +62,8 @@ if(NOT cuvs_FOUND)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 
-  # When building the C++ libraries from source we must copy libcuvs.so alongside the Cython
-  # libraries TODO: when we have a single 'compiled' cuvs library, we shouldn't need this
   set(cython_lib_dir cuvs_py)
-  install(TARGETS cuvs DESTINATION ${cython_lib_dir})
+  install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir})
 endif()
 
 include(rapids-cython-core)

From 95002e1300f6387001efdbf10606064680b7c38b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sat, 2 Mar 2024 19:17:58 -0600
Subject: [PATCH 35/45] FIX RAFT C++ static build for wheels

---
 conda/recipes/cuvs/meta.yaml        |  2 +-
 cpp/CMakeLists.txt                  |  8 +++++++
 cpp/cmake/thirdparty/get_raft.cmake | 36 +++++++++++++++++++++--------
 python/cuvs/CMakeLists.txt          |  6 +++++
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index 435786b69..0902dd15a 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -46,7 +46,7 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
     - cython >=3.0.0
-    - dlpack >= 0.8
+    - dlpack >=0.8
     - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
     - python x.x
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c291c14e3..c076472c1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -62,12 +62,19 @@ option(CUDA_ENABLE_LINEINFO
        "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
 )
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
+option(CUVS_USE_RAFT_STATIC "Build and statically link the RAFT libraries" OFF)
 option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_log.csv" OFF)
 option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
 option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
 option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(CUVS_NVTX "Enable nvtx markers" OFF)
 
+# The options below allow incorporating libcuvs into another build process
+# without installing all its components. This is useful if total file size is
+# at a premium and we do not expect other consumers to use any APIs of the
+# dependency except those that are directly linked to by the dependent library.
+option(CUVS_EXCLUDE_RAFT_FROM_ALL "Exclude RAFT targets from cuVS's 'all' target" OFF)
+
 if((BUILD_TESTS OR BUILD_C_LIBRARY) AND NOT BUILD_CPU_ONLY)
 
 endif()
@@ -102,6 +109,7 @@ message(VERBOSE "cuVS: Enable nvtx markers: ${CUVS_NVTX}")
 message(VERBOSE
         "cuVS: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}"
 )
+message(VERBOSE "cuVS: Build and statically link RAFT libraries: ${CUVS_USE_RAFT_STATIC}")
 
 # Set RMM logging level
 set(RMM_LOGGING_LEVEL
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index d57d27312..bc974f6b2 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -21,9 +21,24 @@ function(find_and_configure_raft)
     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
             "${multiValueArgs}" ${ARGN} )
 
+    if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUML_BRANCH_VERSION_raft}")
+        message(STATUS "cuVS: RAFT pinned tag found: ${PKG_PINNED_TAG}. Cloning raft locally.")
+        set(CPM_DOWNLOAD_raft ON)
+    elseif(PKG_USE_RAFT_STATIC AND (NOT CPM_raft_SOURCE))
+        message(STATUS "cuVS: Cloning raft locally to build static libraries.")
+        set(CPM_DOWNLOAD_raft ON)
+    endif()
+
     set(RAFT_COMPONENTS "")
+
     if(PKG_COMPILE_LIBRARY)
+      if(NOT PKG_USE_RAFT_STATIC)
         string(APPEND RAFT_COMPONENTS " compiled")
+        set(RAFT_COMPILED_LIB raft::compiled PARENT_SCOPE)
+      else()
+        string(APPEND RAFT_COMPONENTS " compiled_static")
+        set(RAFT_COMPILED_LIB raft::compiled_static PARENT_SCOPE)
+      endif()
     endif()
 
     if(PKG_ENABLE_MNMG_DEPENDENCIES)
@@ -39,15 +54,16 @@ function(find_and_configure_raft)
             INSTALL_EXPORT_SET  cuvs-exports
             COMPONENTS          ${RAFT_COMPONENTS}
             CPM_ARGS
-            GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
-            GIT_TAG        ${PKG_PINNED_TAG}
-            SOURCE_SUBDIR  cpp
-            OPTIONS
-            "BUILD_TESTS OFF"
-            "BUILD_PRIMS_BENCH OFF"
-            "BUILD_ANN_BENCH OFF"
-            "RAFT_NVTX ${PKG_ENABLE_NVTX}"
-            "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
+              GIT_REPOSITORY        https://github.com/${PKG_FORK}/raft.git
+              GIT_TAG               ${PKG_PINNED_TAG}
+              SOURCE_SUBDIR         cpp
+              EXCLUDE_FROM_ALL      ${PKG_EXCLUDE_FROM_ALL}
+              OPTIONS
+              "BUILD_TESTS OFF"
+              "BUILD_PRIMS_BENCH OFF"
+              "BUILD_ANN_BENCH OFF"
+              "RAFT_NVTX ${PKG_ENABLE_NVTX}"
+              "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
             )
 endfunction()
 
@@ -58,6 +74,8 @@ find_and_configure_raft(VERSION  ${RAFT_VERSION}.00
         FORK                     ${RAFT_FORK}
         PINNED_TAG               ${RAFT_PINNED_TAG}
         COMPILE_LIBRARY          ON
+        USE_RAFT_STATIC          ${CUVS_USE_RAFT_STATIC}
+        EXCLUDE_FROM_ALL         ${CUVS_EXCLUDE_RAFT_FROM_ALL}
         ENABLE_MNMG_DEPENDENCIES OFF
         ENABLE_NVTX              OFF
 )
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index d23d43d0b..f83bd56d2 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -58,7 +58,13 @@ endif()
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
   set(BUILD_C_LIBRARY ON)
+
+  # Statically link dependencies if building wheels
   set(CUDA_STATIC_RUNTIME ON)
+  set(CUVS_USE_RAFT_STATIC ON)
+
+  # Don't install the static libs into wheels
+  set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 

From 56d5455dae11aa5c40135b45f71d23ba1afa833f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sat, 2 Mar 2024 19:42:08 -0600
Subject: [PATCH 36/45] FIX cuvs variable name

---
 ci/build_wheel.sh          | 3 ++-
 python/cuvs/CMakeLists.txt | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 8632d8bc3..713d3ff33 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -47,7 +47,8 @@ fi
 cd "${package_dir}"
 
 # Hardcode the output dir
-python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
+SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON" \
+    python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index f83bd56d2..1a06c8b5f 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -64,7 +64,7 @@ if(NOT cuvs_FOUND)
   set(CUVS_USE_RAFT_STATIC ON)
 
   # Don't install the static libs into wheels
-  set(CUML_EXCLUDE_RAFT_FROM_ALL ON)
+  set(CUVS_EXCLUDE_RAFT_FROM_ALL ON)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 

From f504af3e9139da00b9ad5dcc2bdb2a3bb8b062f3 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 14:01:17 -0600
Subject: [PATCH 37/45] FIX small fix for conda CI to upload artifacts and
 wheel cmake fixes

---
 ci/build_python.sh                  |  2 ++
 ci/build_wheel.sh                   |  3 +--
 ci/build_wheel_cuvs.sh              |  2 +-
 cpp/CMakeLists.txt                  | 17 ++++++++++-------
 cpp/cmake/thirdparty/get_raft.cmake |  3 ---
 python/cuvs/CMakeLists.txt          |  3 ---
 6 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/ci/build_python.sh b/ci/build_python.sh
index dd259bdad..c50a413c6 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -33,3 +33,5 @@ rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   conda/recipes/cuvs
+
+rapids-upload-conda-to-s3 python
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 713d3ff33..8632d8bc3 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -47,8 +47,7 @@ fi
 cd "${package_dir}"
 
 # Hardcode the output dir
-SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DDISABLE_DEPRECATION_WARNINGS=ON" \
-    python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
+python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh
index d209f93c0..238483b6e 100755
--- a/ci/build_wheel_cuvs.sh
+++ b/ci/build_wheel_cuvs.sh
@@ -4,6 +4,6 @@
 set -euo pipefail
 
 # Set up skbuild options. Enable sccache in skbuild config options
-export SKBUILD_CONFIGURE_OPTIONS="-DCUVS_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF"
+export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF"
 
 ci/build_wheel.sh cuvs python/cuvs
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c076472c1..ceab53fb3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -69,12 +69,6 @@ option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
 option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(CUVS_NVTX "Enable nvtx markers" OFF)
 
-# The options below allow incorporating libcuvs into another build process
-# without installing all its components. This is useful if total file size is
-# at a premium and we do not expect other consumers to use any APIs of the
-# dependency except those that are directly linked to by the dependent library.
-option(CUVS_EXCLUDE_RAFT_FROM_ALL "Exclude RAFT targets from cuVS's 'all' target" OFF)
-
 if((BUILD_TESTS OR BUILD_C_LIBRARY) AND NOT BUILD_CPU_ONLY)
 
 endif()
@@ -222,7 +216,16 @@ target_include_directories(
 
 if(NOT BUILD_CPU_ONLY)
   # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-  target_link_libraries(cuvs PUBLIC raft::raft raft::compiled nvidia::cutlass::cutlass)
+  target_link_libraries(cuvs
+        PUBLIC
+            rmm::rmm
+            $<$<NOT:$<BOOL:${CUDA_STATIC_RUNTIME}>>:raft::raft>
+            $<$<NOT:$<BOOL:${CUDA_STATIC_RUNTIME}>>:raft::compiled>
+        PRIVATE
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled>
+            nvidia::cutlass::cutlass
+    )
 endif()
 
 # Endian detection
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index bc974f6b2..ace0165f7 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -57,7 +57,6 @@ function(find_and_configure_raft)
               GIT_REPOSITORY        https://github.com/${PKG_FORK}/raft.git
               GIT_TAG               ${PKG_PINNED_TAG}
               SOURCE_SUBDIR         cpp
-              EXCLUDE_FROM_ALL      ${PKG_EXCLUDE_FROM_ALL}
               OPTIONS
               "BUILD_TESTS OFF"
               "BUILD_PRIMS_BENCH OFF"
@@ -74,8 +73,6 @@ find_and_configure_raft(VERSION  ${RAFT_VERSION}.00
         FORK                     ${RAFT_FORK}
         PINNED_TAG               ${RAFT_PINNED_TAG}
         COMPILE_LIBRARY          ON
-        USE_RAFT_STATIC          ${CUVS_USE_RAFT_STATIC}
-        EXCLUDE_FROM_ALL         ${CUVS_EXCLUDE_RAFT_FROM_ALL}
         ENABLE_MNMG_DEPENDENCIES OFF
         ENABLE_NVTX              OFF
 )
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 1a06c8b5f..29ac255c5 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -63,9 +63,6 @@ if(NOT cuvs_FOUND)
   set(CUDA_STATIC_RUNTIME ON)
   set(CUVS_USE_RAFT_STATIC ON)
 
-  # Don't install the static libs into wheels
-  set(CUVS_EXCLUDE_RAFT_FROM_ALL ON)
-
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 
   set(cython_lib_dir cuvs_py)

From 3f08f1e70a96967d1bd426c263d0f4b9135c679a Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 14:20:27 -0600
Subject: [PATCH 38/45] FIX libcuvs_c linked libraries for wheel static build

---
 cpp/CMakeLists.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ceab53fb3..71bdec702 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -317,7 +317,13 @@ if(BUILD_C_LIBRARY)
     INTERFACE "$<INSTALL_INTERFACE:include>"
   )
 
-  target_link_libraries(cuvs_c PUBLIC cuvs::cuvs)
+  target_link_libraries(cuvs_c
+        PUBLIC
+            cuvs::cuvs
+        PRIVATE
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled>
+    )
 
   # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
   target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

From e2d5b8e8401e2209d5ec8d645f03d892c6f7b799 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 14:47:15 -0600
Subject: [PATCH 39/45] FIX use static raft target for wheels

---
 cpp/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 71bdec702..3ec11e411 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -223,7 +223,7 @@ if(NOT BUILD_CPU_ONLY)
             $<$<NOT:$<BOOL:${CUDA_STATIC_RUNTIME}>>:raft::compiled>
         PRIVATE
             $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
-            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled>
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled_static>
             nvidia::cutlass::cutlass
     )
 endif()

From 198f462d2e51d896902186f7e77935ed6ec6f9d6 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 15:09:00 -0600
Subject: [PATCH 40/45] FIX use static raft target for wheels in c api

---
 cpp/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3ec11e411..d3fde890a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -322,7 +322,7 @@ if(BUILD_C_LIBRARY)
             cuvs::cuvs
         PRIVATE
             $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
-            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled>
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled_static>
     )
 
   # ensure CUDA symbols aren't relocated to the middle of the debug build binaries

From 53a19b3caa2921a8a91b1c791bc0c04d075cd952 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 15:09:34 -0600
Subject: [PATCH 41/45] FIX remove static raft target for wheels in c api

---
 cpp/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d3fde890a..b18e4ff0b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -322,7 +322,6 @@ if(BUILD_C_LIBRARY)
             cuvs::cuvs
         PRIVATE
             $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
-            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled_static>
     )
 
   # ensure CUDA symbols aren't relocated to the middle of the debug build binaries

From 09f11c68017267f83d9e18d06466f9047c91c03b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 16:03:00 -0600
Subject: [PATCH 42/45] FIX cython install path of libcuvs and libcuvs_c

---
 python/cuvs/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index 29ac255c5..feb9b096c 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -65,7 +65,7 @@ if(NOT cuvs_FOUND)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 
-  set(cython_lib_dir cuvs_py)
+  set(cython_lib_dir cuvs)
   install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir})
 endif()
 

From 2581814773d183dad7e30f238f1e19c222b69caa Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 16:13:25 -0600
Subject: [PATCH 43/45] FIX cython install path of libcuvs and libcuvs_c

---
 conda/environments/all_cuda-118_arch-aarch64.yaml | 1 +
 conda/environments/all_cuda-118_arch-x86_64.yaml  | 1 +
 conda/environments/all_cuda-122_arch-aarch64.yaml | 1 +
 conda/environments/all_cuda-122_arch-x86_64.yaml  | 1 +
 dependencies.yaml                                 | 1 +
 python/cuvs/pyproject.toml                        | 1 +
 6 files changed, 6 insertions(+)

diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index 82d37bef0..e4f922378 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -47,6 +47,7 @@ dependencies:
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
+- scikit-learn
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-aarch64==2.17
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f1ed30bf9..a26314b22 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -47,6 +47,7 @@ dependencies:
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
+- scikit-learn
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-64==2.17
diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
index 759ad2f88..91d55917a 100644
--- a/conda/environments/all_cuda-122_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -43,6 +43,7 @@ dependencies:
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
+- scikit-learn
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-aarch64==2.17
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index e28e8671e..f27d131ff 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -43,6 +43,7 @@ dependencies:
 - recommonmark
 - rmm==24.4.*
 - scikit-build-core>=0.7.0
+- scikit-learn
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-64==2.17
diff --git a/dependencies.yaml b/dependencies.yaml
index 26f6428f1..f17b84dff 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -378,3 +378,4 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - *pylibraft_conda
+          - scikit-learn
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index df201e95c..664cf2969 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -53,6 +53,7 @@ test = [
     "pylibraft==24.4.*",
     "pytest-cov",
     "pytest==7.*",
+    "scikit-learn",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]

From 9824dff5750c9f9a8a9c25142612a39c70dcbc50 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 16:43:27 -0600
Subject: [PATCH 44/45] FIX rpath of libcuvs targets

---
 cpp/CMakeLists.txt         | 24 +++++++++++++++---------
 python/cuvs/CMakeLists.txt |  3 +--
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b18e4ff0b..73dec92a2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -280,11 +280,14 @@ endif()
 
 set_target_properties(
   cuvs
-  PROPERTIES CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             CUDA_STANDARD 17
-             CUDA_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
+  PROPERTIES BUILD_RPATH                         "\$ORIGIN"
+             INSTALL_RPATH                       "\$ORIGIN"
+             CXX_STANDARD                        17
+             CXX_STANDARD_REQUIRED               ON
+             CUDA_STANDARD                       17
+             CUDA_STANDARD_REQUIRED              ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+             POSITION_INDEPENDENT_CODE           ON
 )
 
 target_compile_options(
@@ -303,10 +306,13 @@ if(BUILD_C_LIBRARY)
 
   set_target_properties(
     cuvs_c
-    PROPERTIES CXX_STANDARD 17
-               CXX_STANDARD_REQUIRED ON
-               POSITION_INDEPENDENT_CODE ON
-               EXPORT_NAME c_api
+    PROPERTIES BUILD_RPATH                         "\$ORIGIN"
+               INSTALL_RPATH                       "\$ORIGIN"
+               CXX_STANDARD                        17
+               CXX_STANDARD_REQUIRED               ON
+               POSITION_INDEPENDENT_CODE           ON
+               INTERFACE_POSITION_INDEPENDENT_CODE ON
+               EXPORT_NAME                         c_api
   )
 
   target_compile_options(cuvs_c PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUVS_CXX_FLAGS}>")
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index feb9b096c..5a5efe8f9 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -42,6 +42,7 @@ message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}")
 include(../../fetch_rapids.cmake)
 include(rapids-cmake)
 include(rapids-cpm)
+include(rapids-cython-core)
 include(rapids-export)
 include(rapids-find)
 
@@ -69,8 +70,6 @@ if(NOT cuvs_FOUND)
   install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir})
 endif()
 
-include(rapids-cython-core)
-
 rapids_cython_init()
 
 add_subdirectory(cuvs/common)

From ff29ec75222a90884d20185a26a74a53daba9a7b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 3 Mar 2024 17:56:46 -0600
Subject: [PATCH 45/45] FIX install correct cupy in cuda 12 wheel tests

---
 ci/build_wheel.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 8632d8bc3..4f9f96d19 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -42,6 +42,7 @@ sed -r -i "s/pylibraft(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g"
 
 if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
     sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
 fi
 
 cd "${package_dir}"