diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 76ce8599a..cefbea72b 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 3a126b36e..05518805a 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
index 426aaef98..3f89836e2 100644
--- a/.devcontainer/cuda12.0-conda/devcontainer.json
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
index 1ef2fdcb6..33f67cd3f 100644
--- a/.devcontainer/cuda12.0-pip/devcontainer.json
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -5,7 +5,7 @@
     "args": {
       "CUDA": "12.0",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.02-cpp-llvm16-cuda12.0-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda12.0-ubuntu22.04"
     }
   },
   "hostRequirements": {"gpu": "optional"},
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index f079b5e78..34cf1f5b0 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -79,7 +79,7 @@ jobs:
   wheel-publish-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index b190a2a62..5799f5108 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,29 +23,29 @@ jobs:
       - wheel-tests-cuvs
       - devcontainer
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: cpu16
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: pull-request
       enable_check_symbols: true
@@ -53,19 +53,19 @@ jobs:
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: pull-request
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -75,20 +75,20 @@ jobs:
   wheel-build-cuvs:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/build_wheel_cuvs.sh
   wheel-tests-cuvs:
     needs: wheel-build-cuvs
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: pull-request
       script: ci/test_wheel_cuvs.sh
   devcontainer:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.04
     with:
       build_command: |
         sccache -z;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index acea5755f..0e66113f2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -26,7 +26,7 @@ jobs:
       symbol_exclusions: (void (thrust::|cub::)|_ZN\d+raft_cutlass)
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -34,7 +34,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -42,7 +42,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-tests-cuvs:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9e3b1a38b..a82fb74a5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -99,7 +99,7 @@ repos:
                 args: ["--toml", "pyproject.toml"]
                 exclude: (?x)^(^CHANGELOG.md$)
       - repo: https://github.com/rapidsai/dependency-file-generator
-        rev: v1.5.1
+        rev: v1.8.0
         hooks:
             - id: rapids-dependency-file-generator
               args: ["--clean"]
diff --git a/README.md b/README.md
index 428ab04a9..dfba9eb4a 100755
--- a/README.md
+++ b/README.md
@@ -1,19 +1,14 @@
 # <div align="left"><img src="https://rapids.ai/assets/images/rapids_logo.png" width="90px"/>&nbsp;cuVS: Vector Search and Clustering on the GPU</div>
 
-### NOTE: cuVS is currently being 
-
 ## Contents
-<hr>
 
 1. [Useful Resources](#useful-resources)
 2. [What is cuVS?](#what-is-cuvs)
-3. [Getting Started](#getting-started)
-4. [Installing cuVS](#installing)
+3. [Installing cuVS](#installing)
+4. [Getting Started](#getting-started)
 5. [Contributing](#contributing)
 6. [References](#references)
 
-<hr>
-
 ## Useful Resources
 
 - [cuVS Reference Documentation](https://docs.rapids.ai/api/cuvs/stable/): API Documentation.
@@ -26,15 +21,152 @@
 
 ## What is cuVS?
 
-cuVS contains many algorithms for running approximate nearest neighbors and clustering on the GPU.
+cuVS contains state-of-the-art implementations of several algorithms for running approximate nearest neighbors and clustering on the GPU. It can be used directly or through the various databases and other libraries that have integrated it. The primary goal of cuVS is to simplify the use of GPUs for vector similarity search and clustering.
+
+**Please note** that cuVS is a new library mostly derived from the approximate nearest neighbors and clustering algorithms in the [RAPIDS RAFT](https://github.com/rapidsai) library of data mining primitives. RAPIDS RAFT currently contains the most fully-featured versions of the approximate nearest neighbors and clustering algorithms in cuVS. We are in the process of migrating the algorithms from RAFT to cuVS, but if you are unsure of which to use, please consider the following:
+1. RAFT contains C++ and Python APIs for all of the approximate nearest neighbors and clustering algorithms.
+2. cuVS contains a growing support for different languages, including C, C++, Python, and Rust. We will be adding more language support to cuVS in the future but will not be improving the language support for RAFT.
+3. Once all of RAFT's approximate nearest neighbors and clustering algorithms are moved to cuVS, the RAFT APIs will be deprecated and eventually removed altogether. Once removed, RAFT will become a lightweight header-only library. In the meantime, there's no harm in using RAFT if support for additional languages is not needed.
+
+## Installing cuVS
+
+cuVS comes with pre-built packages that can be installed through [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python). Different packages are available for the different languages supported by cuVS:
+
+| Python | C++ | C | Rust |
+|--------|-----|---|------|
+| `pycuvs`| `libcuvs` | `libcuvs_c` | `cuvs-rs` |
+
+### Stable release
+
+It is recommended to use [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html) to install the desired packages. The following command will install the Python package. You can substitute `pycuvs` for any of the packages in the table above:
+```bash
+mamba install -c conda-forge -c nvidia -c rapidsai pycuvs
+```
+
+### Nightlies
+If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
+```bash
+mamba install -c conda-forge -c nvidia -c rapidsai-nightly pycuvs=24.04*
+```
+
+Please see the [Build and Install Guide](docs/source/build.md) for more information on installing cuVS and building from source.
 
 ## Getting Started
 
+The following code snippets train an approximate nearest neighbors index for the CAGRA algorithm.
 
+### Python API
+
+```python
+from cuvs.neighbors import cagra
+
+dataset = load_data()
+index_params = cagra.IndexParams()
+
+index = cagra.build_index(build_params, dataset)
+```
+
+### C++ API
+
+```c++
+#include <cuvs/neighbors/cagra.hpp>
+
+using namespace cuvs::neighbors;
+
+raft::device_matrix_view<float> dataset = load_dataset();
+raft::device_resources res;
+
+cagra::index_params index_params;
+
+auto index = cagra::build(res, index_params, dataset);
+```
+
+For more example of the C++ APIs, refer to [cpp/examples](https://github.com/rapidsai/cuvs/tree/HEAD/cpp/examples) directory in the codebase.
+
+### C API
+
+```c
+#include <cuvs/neighbors/cagra.h>
+
+cuvsResources_t res;
+cuvsCagraIndexParams_t index_params;
+cuvsCagraIndex_t index;
+
+DLManagedTensor *dataset;
+load_dataset(dataset);
+
+cuvsResourcesCreate(&res);
+cuvsCagraIndexParamsCreate(&index_params);
+cuvsCagraIndexCreate(&index);
+
+cuvsCagraBuild(res, index_params, dataset, index);
+
+cuvsCagraIndexDestroy(index);
+cuvsCagraIndexParamsDestroy(index_params);
+cuvsResourcesDestroy(res);
+```
 
-## Installing cuVS
 
 ## Contributing
 
+If you are interested in contributing to the cuVS library, please read our [Contributing guidelines](docs/source/contributing.md). Refer to the [Developer Guide](docs/source/developer_guide.md) for details on the developer guidelines, workflows, and principals.
+
 ## References
 
+When citing cuVS generally, please consider referencing this Github repository.
+```bibtex
+@misc{rapidsai,
+  title={Rapidsai/cuVS: Vector Search and Clustering on the GPU.},
+  url={https://github.com/rapidsai/cuvs},
+  journal={GitHub},
+  publisher={Nvidia RAPIDS},
+  author={Rapidsai},
+  year={2024}
+}
+```
+
+If citing CAGRA, please consider the following bibtex:
+```bibtex
+@misc{ootomo2023cagra,
+      title={CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search for GPUs},
+      author={Hiroyuki Ootomo and Akira Naruse and Corey Nolet and Ray Wang and Tamas Feher and Yong Wang},
+      year={2023},
+      eprint={2308.15136},
+      archivePrefix={arXiv},
+      primaryClass={cs.DS}
+}
+```
+
+If citing the k-selection routines, please consider the following bibtex:
+```bibtex
+@proceedings{10.1145/3581784,
+    title = {SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
+    year = {2023},
+    isbn = {9798400701092},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    abstract = {Started in 1988, the SC Conference has become the annual nexus for researchers and practitioners from academia, industry and government to share information and foster collaborations to advance the state of the art in High Performance Computing (HPC), Networking, Storage, and Analysis.},
+    location = {, Denver, CO, USA, }
+}
+```
+
+If citing the nearest neighbors descent API, please consider the following bibtex:
+```bibtex
+@inproceedings{10.1145/3459637.3482344,
+    author = {Wang, Hui and Zhao, Wan-Lei and Zeng, Xiangxiang and Yang, Jianye},
+    title = {Fast K-NN Graph Construction by GPU Based NN-Descent},
+    year = {2021},
+    isbn = {9781450384469},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    url = {https://doi.org/10.1145/3459637.3482344},
+    doi = {10.1145/3459637.3482344},
+    abstract = {NN-Descent is a classic k-NN graph construction approach. It is still widely employed in machine learning, computer vision, and information retrieval tasks due to its efficiency and genericness. However, the current design only works well on CPU. In this paper, NN-Descent has been redesigned to adapt to the GPU architecture. A new graph update strategy called selective update is proposed. It reduces the data exchange between GPU cores and GPU global memory significantly, which is the processing bottleneck under GPU computation architecture. This redesign leads to full exploitation of the parallelism of the GPU hardware. In the meantime, the genericness, as well as the simplicity of NN-Descent, are well-preserved. Moreover, a procedure that allows to k-NN graph to be merged efficiently on GPU is proposed. It makes the construction of high-quality k-NN graphs for out-of-GPU-memory datasets tractable. Our approach is 100-250\texttimes{} faster than the single-thread NN-Descent and is 2.5-5\texttimes{} faster than the existing GPU-based approaches as we tested on million as well as billion scale datasets.},
+    booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
+    pages = {1929–1938},
+    numpages = {10},
+    keywords = {high-dimensional, nn-descent, gpu, k-nearest neighbor graph},
+    location = {Virtual Event, Queensland, Australia},
+    series = {CIKM '21}
+}
+```
diff --git a/VERSION b/VERSION
index 3c6c5e2b7..4a2fe8aa5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.02.00
+24.04.00
diff --git a/build.sh b/build.sh
index 6dd250c51..c6e09c5bf 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcuvs python docs tests template clean --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libcuvs python docs tests examples clean --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
 HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -27,7 +27,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    python           - build the cuvs Python package
    docs             - build the documentation
    tests            - build the tests
-   template         - build the example CUVS application template
+   examples         - build the examples
 
  and <flag> is:
    -v                          - verbose build mode
@@ -60,7 +60,7 @@ BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${CUVS_DASK_BUILD_DIR}"
 CMAKE_LOG_LEVEL=""
 VERBOSE_FLAG=""
 BUILD_ALL_GPU_ARCH=0
-BUILD_TESTS=OFF
+BUILD_TESTS=ON
 BUILD_TYPE=Release
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
@@ -305,6 +305,9 @@ if [[ ${CMAKE_TARGET} == "" ]]; then
 fi
 
 # Append `-DFIND_CUVS_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
+
+
+
 SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}"
 if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUVS_CPP"* ]]; then
     SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_CUVS_CPP=ON"
@@ -433,10 +436,10 @@ if hasArg docs; then
 fi
 
 ################################################################################
-# Initiate build for example CUVS application template (if needed)
+# Initiate build for c++ examples (if needed)
 
-if hasArg template; then
-    pushd ${REPODIR}/cpp/template
+if hasArg examples; then
+    pushd ${REPODIR}/cpp/examples
     ./build.sh
     popd
 fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 1193285da..983e97385 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -18,17 +18,13 @@ rapids-print-env
 
 rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
-PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
+#PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  --channel "${PYTHON_CHANNEL}" \
-  libcuvs \
-  libcuvs-headers \
-  cuvs \
-  raft-dask
+  libcuvs
 
-export RAPIDS_VERSION_NUMBER="24.02"
+export RAPIDS_VERSION_NUMBER="24.04"
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
 rapids-logger "Build CPP docs"
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 525d21c51..c50a413c6 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -15,18 +15,17 @@ rapids-print-env
 
 rapids-logger "Begin py build"
 
+package_name="cuvs"
+package_dir="python"
+
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
 version=$(rapids-generate-version)
 git_commit=$(git rev-parse HEAD)
-export RAPIDS_PACKAGE_VERSION=${version} 
+export RAPIDS_PACKAGE_VERSION=${version}
 echo "${version}" > VERSION
 
-package_dir="python"
-for package_name in cuvs raft-dask; do
-  underscore_package_name=$(echo "${package_name}" | tr "-" "_")
-  sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
-done
+sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${package_name}/_version.py"
 
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
@@ -34,3 +33,5 @@ rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   conda/recipes/cuvs
+
+rapids-upload-conda-to-s3 python
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 3cc7f06c8..4f9f96d19 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -37,6 +37,8 @@ if ! rapids-is-release-build; then
 fi
 
 sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
+sed -r -i "s/pylibraft(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
+
 
 if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
     sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh
index b4765be38..238483b6e 100755
--- a/ci/build_wheel_cuvs.sh
+++ b/ci/build_wheel_cuvs.sh
@@ -4,6 +4,6 @@
 set -euo pipefail
 
 # Set up skbuild options. Enable sccache in skbuild config options
-export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUVS_CPP=OFF"
 
-#ci/build_wheel.sh cuvs python/cuvs
+ci/build_wheel.sh cuvs python/cuvs
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index f463aeb65..b122e73bd 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 ########################
 # RAFT Version Updater #
 ########################
@@ -38,7 +38,7 @@ function sed_runner() {
 
 sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/CMakeLists.txt
 sed_runner "s/set(RAPIDS_VERSION .*)/set(RAPIDS_VERSION \"${NEXT_SHORT_TAG}\")/g" cpp/template/cmake/thirdparty/fetch_rapids.cmake
-sed_runner "s/set(RAFT_VERSION .*)/set(RAFT_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
+sed_runner "s/set(CUVS_VERSION .*)/set(CUVS_VERSION \"${NEXT_FULL_TAG}\")/g" cpp/CMakeLists.txt
 sed_runner 's/'"cuvs_version .*)"'/'"cuvs_version ${NEXT_FULL_TAG})"'/g' python/cuvs/CMakeLists.txt
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
 
@@ -57,6 +57,8 @@ DEPENDENCIES=(
   cuvs
   cuvs-cu11
   cuvs-cu12
+  pylibraft-cu11
+  pylibraft-cu12
   rmm
   rmm-cu11
   rmm-cu12
@@ -85,7 +87,7 @@ sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TA
 
 sed_runner "/^PROJECT_NUMBER/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" cpp/doxygen/Doxyfile
 
-sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
+sed_runner "/^set(CUVS_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/build.md
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
 sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
 
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 9f0c9d6ee..17fbed424 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -31,7 +31,7 @@ rapids-print-env
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  libcuvs #cuvs
+  libcuvs cuvs
 
 rapids-logger "Check GPU usage"
 nvidia-smi
@@ -40,17 +40,16 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-#rapids-logger "pytest cuvs"
-#pushd python/cuvs/cuvs
-#pytest \
-#  --cache-clear \
-#  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
-#  --cov-config=../.coveragerc \
-#  --cov=cuvs \
-#  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
-#  --cov-report=term \
-#  test
-#popd
+rapids-logger "pytest cuvs"
+pushd python/cuvs/cuvs
+pytest \
+ --cache-clear \
+ --junitxml="${RAPIDS_TESTS_DIR}/junit-cuvs.xml" \
+ --cov-config=../.coveragerc \
+ --cov=cuvs \
+ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuvs-coverage.xml" \
+ --cov-report=term \
+ test
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/ci/test_wheel_cuvs.sh b/ci/test_wheel_cuvs.sh
index de7501915..364e00a7c 100755
--- a/ci/test_wheel_cuvs.sh
+++ b/ci/test_wheel_cuvs.sh
@@ -7,12 +7,12 @@ mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 
-## echo to expand wildcard before adding `[extra]` requires for pip
-#python -m pip install $(echo ./dist/cuvs*.whl)[test]
-#
-## Run smoke tests for aarch64 pull requests
-#if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
-#    python ./ci/wheel_smoke_test_cuvs.py
-#else
-#    python -m pytest ./python/cuvs/cuvs/test
-#fi
+# echo to expand wildcard before adding `[extra]` requires for pip
+python -m pip install $(echo ./dist/cuvs*.whl)[test]
+
+# Run smoke tests for aarch64 pull requests
+if [[ "$(arch)" == "aarch64" && "${RAPIDS_BUILD_TYPE}" == "pull-request" ]]; then
+   python ./ci/wheel_smoke_test_cuvs.py
+else
+   python -m pytest ./python/cuvs/cuvs/test
+fi
diff --git a/ci/wheel_smoke_test_cuvs.py b/ci/wheel_smoke_test_cuvs.py
index 65b5fb8b2..26ab5f6b5 100644
--- a/ci/wheel_smoke_test_cuvs.py
+++ b/ci/wheel_smoke_test_cuvs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,41 +13,30 @@
 # limitations under the License.
 #
 
+import cupy as cp
 import numpy as np
-from scipy.spatial.distance import cdist
 
-from pylibrat.common import Handle, Stream, device_ndarray
-from cuvs.distance import pairwise_distance
+from cuvs.neighbors import cagra
+from pylibraft.common import Stream, DeviceResources
 
 
 if __name__ == "__main__":
-    metric = "euclidean"
-    n_rows = 1337
-    n_cols = 1337
+    n_samples = 1000
+    n_features = 50
+    n_queries = 1000
+    k = 10
 
-    input1 = np.random.random_sample((n_rows, n_cols))
-    input1 = np.asarray(input1, order="C").astype(np.float64)
+    dataset = cp.random.random_sample((n_samples,
+                                       n_features)).astype(cp.float32)
 
-    output = np.zeros((n_rows, n_rows), dtype=np.float64)
+    build_params = cagra.IndexParams(metric="sqeuclidean",
+                                     build_algo="nn_descent")
 
-    expected = cdist(input1, input1, metric)
+    index = cagra.build_index(build_params, dataset)
 
-    expected[expected <= 1e-5] = 0.0
+    distances, neighbors = cagra.search(cagra.SearchParams(),
+                                          index, dataset,
+                                          k)
 
-    input1_device = device_ndarray(input1)
-    output_device = None
-
-    s2 = Stream()
-    handle = Handle(stream=s2)
-    ret_output = pairwise_distance(
-        input1_device, input1_device, output_device, metric, handle=handle
-    )
-    handle.sync()
-
-    output_device = ret_output
-
-    actual = output_device.copy_to_host()
-
-    actual[actual <= 1e-5] = 0.0
-
-    assert np.allclose(expected, actual, rtol=1e-4)
+    distances = cp.asarray(distances)
+    neighbors = cp.asarray(neighbors)
diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index dbf92ec47..e4f922378 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -36,18 +36,18 @@ dependencies:
 - libcusparse=11.7.5.86
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - nvcc_linux-aarch64=11.8
 - pre-commit
 - pydata-sphinx-theme
-- pytest
+- pylibraft==24.4.*
 - pytest-cov
+- pytest==7.*
 - recommonmark
-- rmm==24.2.*
-- scikit-build>=0.13.1
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
 - scikit-learn
-- scipy
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-aarch64==2.17
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 2fe184f96..a26314b22 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -36,18 +36,18 @@ dependencies:
 - libcusparse=11.7.5.86
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - nvcc_linux-64=11.8
 - pre-commit
 - pydata-sphinx-theme
-- pytest
+- pylibraft==24.4.*
 - pytest-cov
+- pytest==7.*
 - recommonmark
-- rmm==24.2.*
-- scikit-build>=0.13.1
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
 - scikit-learn
-- scipy
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-64==2.17
diff --git a/conda/environments/all_cuda-120_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml
similarity index 85%
rename from conda/environments/all_cuda-120_arch-aarch64.yaml
rename to conda/environments/all_cuda-122_arch-aarch64.yaml
index 1b7f3908a..91d55917a 100644
--- a/conda/environments/all_cuda-120_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-122_arch-aarch64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.0,<13.0a0
-- cuda-version=12.0
+- cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
@@ -33,18 +33,18 @@ dependencies:
 - libcusparse-dev
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
-- pytest
+- pylibraft==24.4.*
 - pytest-cov
+- pytest==7.*
 - recommonmark
-- rmm==24.2.*
-- scikit-build>=0.13.1
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
 - scikit-learn
-- scipy
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-aarch64==2.17
-name: all_cuda-120_arch-aarch64
+name: all_cuda-122_arch-aarch64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
similarity index 85%
rename from conda/environments/all_cuda-120_arch-x86_64.yaml
rename to conda/environments/all_cuda-122_arch-x86_64.yaml
index 335227994..f27d131ff 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -17,7 +17,7 @@ dependencies:
 - cuda-nvtx-dev
 - cuda-profiler-api
 - cuda-python>=12.0,<13.0a0
-- cuda-version=12.0
+- cuda-version=12.2
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
@@ -33,18 +33,18 @@ dependencies:
 - libcusparse-dev
 - nccl>=2.9.9
 - ninja
-- numpy>=1.21
+- numpy>=1.23
 - numpydoc
 - pre-commit
 - pydata-sphinx-theme
-- pytest
+- pylibraft==24.4.*
 - pytest-cov
+- pytest==7.*
 - recommonmark
-- rmm==24.2.*
-- scikit-build>=0.13.1
+- rmm==24.4.*
+- scikit-build-core>=0.7.0
 - scikit-learn
-- scipy
 - sphinx-copybutton
 - sphinx-markdown-tables
 - sysroot_linux-64==2.17
-name: all_cuda-120_arch-x86_64
+name: all_cuda-122_arch-x86_64
diff --git a/conda/recipes/cuvs/build.sh b/conda/recipes/cuvs/build.sh
index 81f762068..767d06672 100644
--- a/conda/recipes/cuvs/build.sh
+++ b/conda/recipes/cuvs/build.sh
@@ -2,4 +2,4 @@
 #!/usr/bin/env bash
 
 # This assumes the script is executed from the root of the repo directory
-./build.sh python --no-nvtx
+./build.sh python --no-nvtx -v
diff --git a/conda/recipes/cuvs/meta.yaml b/conda/recipes/cuvs/meta.yaml
index f22bd01d5..0902dd15a 100644
--- a/conda/recipes/cuvs/meta.yaml
+++ b/conda/recipes/cuvs/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 # Usage:
 #   conda build . -c conda-forge -c numba -c rapidsai -c pytorch
@@ -46,19 +46,19 @@ requirements:
     {% endif %}
     - cuda-version ={{ cuda_version }}
     - cython >=3.0.0
-    - pylibraft {{ version }}
+    - dlpack >=0.8
+    - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
-    - numpy >=1.21
     - python x.x
     - rmm ={{ minor_version }}
-    - scikit-build >=0.13.1
+    - scikit-build-core >=0.7.0
     - setuptools
   run:
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
     {% if cuda_major == "11" %}
     - cudatoolkit
     {% endif %}
-    - pylibraft {{ version }}
+    - pylibraft {{ minor_version }}
     - libcuvs {{ version }}
     - python x.x
     - rmm ={{ minor_version }}
diff --git a/conda/recipes/libcuvs/build_libcuvs_template.sh b/conda/recipes/libcuvs/build_libcuvs_examples.sh
similarity index 61%
rename from conda/recipes/libcuvs/build_libcuvs_template.sh
rename to conda/recipes/libcuvs/build_libcuvs_examples.sh
index bd7719af7..6286a530e 100644
--- a/conda/recipes/libcuvs/build_libcuvs_template.sh
+++ b/conda/recipes/libcuvs/build_libcuvs_examples.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 # Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template
+./build.sh examples
diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml
index 4e437f3e6..4b6ff87e9 100644
--- a/conda/recipes/libcuvs/meta.yaml
+++ b/conda/recipes/libcuvs/meta.yaml
@@ -195,9 +195,9 @@ outputs:
       home: https://rapids.ai/
       license: Apache-2.0
       summary: libcuvs tests
-  - name: libcuvs-template
+  - name: libcuvs-examples
     version: {{ version }}
-    script: build_libcuvs_template.sh
+    script: build_libcuvs_examples.sh
     build:
       script_env: *script_env
       number: {{ GIT_DESCRIBE_NUMBER }}
@@ -241,4 +241,4 @@ outputs:
     about:
       home: https://rapids.ai/
       license: Apache-2.0
-      summary: libcuvs template
+      summary: libcuvs examples
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3c9cd6d1b..73dec92a2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -10,8 +10,8 @@
 # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
-set(RAPIDS_VERSION "24.02")
-set(CUVS_VERSION "24.02.00")
+set(RAPIDS_VERSION "24.04")
+set(CUVS_VERSION "24.04.00")
 
 cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../fetch_rapids.cmake)
@@ -28,12 +28,12 @@ set(lang_list "CXX")
 
 if(NOT BUILD_CPU_ONLY)
   include(rapids-cuda)
-  rapids_cuda_init_architectures(cuVS)
+  rapids_cuda_init_architectures(CUVS)
   list(APPEND lang_list "CUDA")
 endif()
 
 project(
-  cuVS
+  CUVS
   VERSION ${CUVS_VERSION}
   LANGUAGES ${lang_list}
 )
@@ -62,6 +62,7 @@ option(CUDA_ENABLE_LINEINFO
        "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
 )
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
+option(CUVS_USE_RAFT_STATIC "Build and statically link the RAFT libraries" OFF)
 option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_log.csv" OFF)
 option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
 option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
@@ -102,6 +103,7 @@ message(VERBOSE "cuVS: Enable nvtx markers: ${CUVS_NVTX}")
 message(VERBOSE
         "cuVS: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}"
 )
+message(VERBOSE "cuVS: Build and statically link RAFT libraries: ${CUVS_USE_RAFT_STATIC}")
 
 # Set RMM logging level
 set(RMM_LOGGING_LEVEL
@@ -214,7 +216,16 @@ target_include_directories(
 
 if(NOT BUILD_CPU_ONLY)
   # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-  target_link_libraries(cuvs PUBLIC raft::raft raft::compiled nvidia::cutlass::cutlass)
+  target_link_libraries(cuvs
+        PUBLIC
+            rmm::rmm
+            $<$<NOT:$<BOOL:${CUDA_STATIC_RUNTIME}>>:raft::raft>
+            $<$<NOT:$<BOOL:${CUDA_STATIC_RUNTIME}>>:raft::compiled>
+        PRIVATE
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::compiled_static>
+            nvidia::cutlass::cutlass
+    )
 endif()
 
 # Endian detection
@@ -269,11 +280,14 @@ endif()
 
 set_target_properties(
   cuvs
-  PROPERTIES CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             CUDA_STANDARD 17
-             CUDA_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
+  PROPERTIES BUILD_RPATH                         "\$ORIGIN"
+             INSTALL_RPATH                       "\$ORIGIN"
+             CXX_STANDARD                        17
+             CXX_STANDARD_REQUIRED               ON
+             CUDA_STANDARD                       17
+             CUDA_STANDARD_REQUIRED              ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+             POSITION_INDEPENDENT_CODE           ON
 )
 
 target_compile_options(
@@ -292,10 +306,13 @@ if(BUILD_C_LIBRARY)
 
   set_target_properties(
     cuvs_c
-    PROPERTIES CXX_STANDARD 17
-               CXX_STANDARD_REQUIRED ON
-               POSITION_INDEPENDENT_CODE ON
-               EXPORT_NAME c_api
+    PROPERTIES BUILD_RPATH                         "\$ORIGIN"
+               INSTALL_RPATH                       "\$ORIGIN"
+               CXX_STANDARD                        17
+               CXX_STANDARD_REQUIRED               ON
+               POSITION_INDEPENDENT_CODE           ON
+               INTERFACE_POSITION_INDEPENDENT_CODE ON
+               EXPORT_NAME                         c_api
   )
 
   target_compile_options(cuvs_c PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUVS_CXX_FLAGS}>")
@@ -306,7 +323,12 @@ if(BUILD_C_LIBRARY)
     INTERFACE "$<INSTALL_INTERFACE:include>"
   )
 
-  target_link_libraries(cuvs_c PUBLIC cuvs::cuvs)
+  target_link_libraries(cuvs_c
+        PUBLIC
+            cuvs::cuvs
+        PRIVATE
+            $<$<BOOL:${CUDA_STATIC_RUNTIME}>:raft::raft>
+    )
 
   # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
   target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake
index d57d27312..ace0165f7 100644
--- a/cpp/cmake/thirdparty/get_raft.cmake
+++ b/cpp/cmake/thirdparty/get_raft.cmake
@@ -21,9 +21,24 @@ function(find_and_configure_raft)
     cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
             "${multiValueArgs}" ${ARGN} )
 
+    if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUML_BRANCH_VERSION_raft}")
+        message(STATUS "cuVS: RAFT pinned tag found: ${PKG_PINNED_TAG}. Cloning raft locally.")
+        set(CPM_DOWNLOAD_raft ON)
+    elseif(PKG_USE_RAFT_STATIC AND (NOT CPM_raft_SOURCE))
+        message(STATUS "cuVS: Cloning raft locally to build static libraries.")
+        set(CPM_DOWNLOAD_raft ON)
+    endif()
+
     set(RAFT_COMPONENTS "")
+
     if(PKG_COMPILE_LIBRARY)
+      if(NOT PKG_USE_RAFT_STATIC)
         string(APPEND RAFT_COMPONENTS " compiled")
+        set(RAFT_COMPILED_LIB raft::compiled PARENT_SCOPE)
+      else()
+        string(APPEND RAFT_COMPONENTS " compiled_static")
+        set(RAFT_COMPILED_LIB raft::compiled_static PARENT_SCOPE)
+      endif()
     endif()
 
     if(PKG_ENABLE_MNMG_DEPENDENCIES)
@@ -39,15 +54,15 @@ function(find_and_configure_raft)
             INSTALL_EXPORT_SET  cuvs-exports
             COMPONENTS          ${RAFT_COMPONENTS}
             CPM_ARGS
-            GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
-            GIT_TAG        ${PKG_PINNED_TAG}
-            SOURCE_SUBDIR  cpp
-            OPTIONS
-            "BUILD_TESTS OFF"
-            "BUILD_PRIMS_BENCH OFF"
-            "BUILD_ANN_BENCH OFF"
-            "RAFT_NVTX ${PKG_ENABLE_NVTX}"
-            "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
+              GIT_REPOSITORY        https://github.com/${PKG_FORK}/raft.git
+              GIT_TAG               ${PKG_PINNED_TAG}
+              SOURCE_SUBDIR         cpp
+              OPTIONS
+              "BUILD_TESTS OFF"
+              "BUILD_PRIMS_BENCH OFF"
+              "BUILD_ANN_BENCH OFF"
+              "RAFT_NVTX ${PKG_ENABLE_NVTX}"
+              "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
             )
 endfunction()
 
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 0a2c7f8f6..94304afe0 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "cuVS C++ API"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "24.02"
+PROJECT_NUMBER         = "24.04"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/cpp/template/CMakeLists.txt b/cpp/examples/CMakeLists.txt
similarity index 100%
rename from cpp/template/CMakeLists.txt
rename to cpp/examples/CMakeLists.txt
diff --git a/cpp/template/README.md b/cpp/examples/README.md
similarity index 82%
rename from cpp/template/README.md
rename to cpp/examples/README.md
index 5393c0229..125c6dba2 100644
--- a/cpp/template/README.md
+++ b/cpp/examples/README.md
@@ -1,14 +1,14 @@
-# Example CUVS Project Template
+# cuVS C++ Examples
 
 This template project provides a drop-in sample to either start building a new application with, or using CUVS in an existing CMake project. 
 
-First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use CUVS.
+First, please refer to our [installation docs](https://docs.rapids.ai/api/cuvs/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use cuVS.
 
 Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow.  
 
-This directory (`CUVS_SOURCE/cpp/template`) can be copied directly in order to build a new application with CUVS.
+This directory (`CUVS_SOURCE/cpp/examples`) can be copied directly in order to build a new application with CUVS.
 
-CUVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
+cuVS can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure cuvs` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_cuvs.cmake`. 
 
 Make sure to link against the appropriate Cmake targets. Use `cuvs::cuvs` to utilize the shared library.
 
diff --git a/cpp/template/build.sh b/cpp/examples/build.sh
similarity index 93%
rename from cpp/template/build.sh
rename to cpp/examples/build.sh
index 25ccb3461..7a948d9a8 100755
--- a/cpp/template/build.sh
+++ b/cpp/examples/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 # cuvs empty project template build script
 
diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
similarity index 92%
rename from cpp/template/cmake/thirdparty/fetch_rapids.cmake
rename to cpp/examples/cmake/thirdparty/fetch_rapids.cmake
index 15b6c43a6..c22f586ca 100644
--- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake
+++ b/cpp/examples/cmake/thirdparty/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 
 # Use this variable to update RAPIDS and RAFT versions
-set(RAPIDS_VERSION "24.02")
+set(RAPIDS_VERSION "24.04")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
diff --git a/cpp/template/cmake/thirdparty/get_cuvs.cmake b/cpp/examples/cmake/thirdparty/get_cuvs.cmake
similarity index 100%
rename from cpp/template/cmake/thirdparty/get_cuvs.cmake
rename to cpp/examples/cmake/thirdparty/get_cuvs.cmake
diff --git a/cpp/template/src/cagra_example.cu b/cpp/examples/src/cagra_example.cu
similarity index 100%
rename from cpp/template/src/cagra_example.cu
rename to cpp/examples/src/cagra_example.cu
diff --git a/cpp/template/src/common.cuh b/cpp/examples/src/common.cuh
similarity index 98%
rename from cpp/template/src/common.cuh
rename to cpp/examples/src/common.cuh
index 0b72d3bf3..757123cea 100644
--- a/cpp/template/src/common.cuh
+++ b/cpp/examples/src/common.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/cuvs/neighbors/cagra_c.h b/cpp/include/cuvs/neighbors/cagra.h
similarity index 79%
rename from cpp/include/cuvs/neighbors/cagra_c.h
rename to cpp/include/cuvs/neighbors/cagra.h
index 59861b502..64a26b924 100644
--- a/cpp/include/cuvs/neighbors/cagra_c.h
+++ b/cpp/include/cuvs/neighbors/cagra.h
@@ -33,7 +33,7 @@ extern "C" {
  * @brief Enum to denote which ANN algorithm is used to build CAGRA graph
  *
  */
-enum cagraGraphBuildAlgo {
+enum cuvsCagraGraphBuildAlgo {
   /* Use IVF-PQ to build all-neighbors knn graph */
   IVF_PQ,
   /* Experimental, use NN-Descent to build all-neighbors knn graph */
@@ -44,18 +44,18 @@ enum cagraGraphBuildAlgo {
  * @brief Supplemental parameters to build CAGRA Index
  *
  */
-struct cagraIndexParams {
+struct cuvsCagraIndexParams {
   /** Degree of input graph for pruning. */
   size_t intermediate_graph_degree;
   /** Degree of output graph. */
   size_t graph_degree;
   /** ANN algorithm to build knn graph. */
-  enum cagraGraphBuildAlgo build_algo;
+  enum cuvsCagraGraphBuildAlgo build_algo;
   /** Number of Iterations to run if building with NN_DESCENT */
   size_t nn_descent_niter;
 };
 
-typedef struct cagraIndexParams* cuvsCagraIndexParams_t;
+typedef struct cuvsCagraIndexParams* cuvsCagraIndexParams_t;
 
 /**
  * @brief Allocate CAGRA Index params, and populate with default values
@@ -77,7 +77,7 @@ cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index);
  * @brief Enum to denote algorithm used to search CAGRA Index
  *
  */
-enum cagraSearchAlgo {
+enum cuvsCagraSearchAlgo {
   /** For large batch sizes. */
   SINGLE_CTA,
   /** For small batch sizes. */
@@ -90,13 +90,13 @@ enum cagraSearchAlgo {
  * @brief Enum to denote Hash Mode used while searching CAGRA index
  *
  */
-enum cagraHashMode { HASH, SMALL, AUTO_HASH };
+enum cuvsCagraHashMode { HASH, SMALL, AUTO_HASH };
 
 /**
  * @brief Supplemental parameters to search CAGRA index
  *
  */
-struct cagraSearchParams {
+struct cuvsCagraSearchParams {
   /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/
   size_t max_queries;
 
@@ -114,7 +114,7 @@ struct cagraSearchParams {
   // Reasonable default values are automatically chosen.
 
   /** Which search implementation to use. */
-  enum cagraSearchAlgo algo;
+  enum cuvsCagraSearchAlgo algo;
 
   /** Number of threads used to calculate a single distance. 4, 8, 16, or 32. */
   size_t team_size;
@@ -128,7 +128,7 @@ struct cagraSearchParams {
   /** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */
   size_t thread_block_size;
   /** Hashmap type. Auto selection when AUTO. */
-  enum cagraHashMode hashmap_mode;
+  enum cuvsCagraHashMode hashmap_mode;
   /** Lower limit of hashmap bit length. More than 8. */
   size_t hashmap_min_bitlen;
   /** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/
@@ -140,7 +140,7 @@ struct cagraSearchParams {
   uint64_t rand_xor_mask;
 };
 
-typedef struct cagraSearchParams* cuvsCagraSearchParams_t;
+typedef struct cuvsCagraSearchParams* cuvsCagraSearchParams_t;
 
 /**
  * @brief Allocate CAGRA search params, and populate with default values
@@ -166,24 +166,24 @@ typedef struct {
   uintptr_t addr;
   DLDataType dtype;
 
-} cagraIndex;
+} cuvsCagraIndex;
 
-typedef cagraIndex* cagraIndex_t;
+typedef cuvsCagraIndex* cuvsCagraIndex_t;
 
 /**
  * @brief Allocate CAGRA index
  *
- * @param[in] index cagraIndex_t to allocate
+ * @param[in] index cuvsCagraIndex_t to allocate
  * @return cagraError_t
  */
-cuvsError_t cagraIndexCreate(cagraIndex_t* index);
+cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
 
 /**
  * @brief De-allocate CAGRA index
  *
- * @param[in] index cagraIndex_t to de-allocate
+ * @param[in] index cuvsCagraIndex_t to de-allocate
  */
-cuvsError_t cagraIndexDestroy(cagraIndex_t index);
+cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
 
 /**
  * @brief Build a CAGRA index with a `DLManagedTensor` which has underlying
@@ -195,7 +195,7 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index);
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
- * #include <cuvs/neighbors/cagra_c.h>
+ * #include <cuvs/neighbors/cagra.h>
  *
  * // Create cuvsResources_t
  * cuvsResources_t res;
@@ -209,28 +209,28 @@ cuvsError_t cagraIndexDestroy(cagraIndex_t index);
  * cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(&params);
  *
  * // Create CAGRA index
- * cagraIndex_t index;
- * cuvsError_t index_create_status = cagraIndexCreate(&index);
+ * cuvsCagraIndex_t index;
+ * cuvsError_t index_create_status = cuvsCagraIndexCreate(&index);
  *
  * // Build the CAGRA Index
- * cuvsError_t build_status = cagraBuild(res, params, &dataset, index);
+ * cuvsError_t build_status = cuvsCagraBuild(res, params, &dataset, index);
  *
  * // de-allocate `params`, `index` and `res`
  * cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params);
- * cuvsError_t index_destroy_status = cagraIndexDestroy(index);
+ * cuvsError_t index_destroy_status = cuvsCagraIndexDestroy(index);
  * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
  * @endcode
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsCagraIndexParams_t used to build CAGRA index
  * @param[in] dataset DLManagedTensor* training dataset
- * @param[out] index cagraIndex_t Newly built CAGRA index
+ * @param[out] index cuvsCagraIndex_t Newly built CAGRA index
  * @return cuvsError_t
  */
-cuvsError_t cagraBuild(cuvsResources_t res,
-                       cuvsCagraIndexParams_t params,
-                       DLManagedTensor* dataset,
-                       cagraIndex_t index);
+cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                           cuvsCagraIndexParams_t params,
+                           DLManagedTensor* dataset,
+                           cuvsCagraIndex_t index);
 
 /**
  * @brief Search a CAGRA index with a `DLManagedTensor` which has underlying
@@ -244,7 +244,7 @@ cuvsError_t cagraBuild(cuvsResources_t res,
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
- * #include <cuvs/neighbors/cagra_c.h>
+ * #include <cuvs/neighbors/cagra.h>
  *
  * // Create cuvsResources_t
  * cuvsResources_t res;
@@ -259,8 +259,8 @@ cuvsError_t cagraBuild(cuvsResources_t res,
  * cuvsCagraSearchParams_t params;
  * cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(&params);
  *
- * // Search the `index` built using `cagraBuild`
- * cuvsError_t search_status = cagraSearch(res, params, index, queries, neighbors, distances);
+ * // Search the `index` built using `cuvsCagraBuild`
+ * cuvsError_t search_status = cuvsCagraSearch(res, params, index, queries, neighbors, distances);
  *
  * // de-allocate `params` and `res`
  * cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params);
@@ -269,17 +269,17 @@ cuvsError_t cagraBuild(cuvsResources_t res,
  *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] params cuvsCagraSearchParams_t used to search CAGRA index
- * @param[in] index cagraIndex which has been returned by `cagraBuild`
+ * @param[in] index cuvsCagraIndex which has been returned by `cuvsCagraBuild`
  * @param[in] queries DLManagedTensor* queries dataset to search
  * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries
  * @param[out] distances DLManagedTensor* output `k` distances for queries
  */
-cuvsError_t cagraSearch(cuvsResources_t res,
-                        cuvsCagraSearchParams_t params,
-                        cagraIndex_t index,
-                        DLManagedTensor* queries,
-                        DLManagedTensor* neighbors,
-                        DLManagedTensor* distances);
+cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                            cuvsCagraSearchParams_t params,
+                            cuvsCagraIndex_t index,
+                            DLManagedTensor* queries,
+                            DLManagedTensor* neighbors,
+                            DLManagedTensor* distances);
 
 #ifdef __cplusplus
 }
diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp
index 638c9a23d..70e268fb2 100644
--- a/cpp/src/neighbors/cagra_c.cpp
+++ b/cpp/src/neighbors/cagra_c.cpp
@@ -24,13 +24,13 @@
 
 #include <cuvs/core/c_api.h>
 #include <cuvs/core/interop.hpp>
+#include <cuvs/neighbors/cagra.h>
 #include <cuvs/neighbors/cagra.hpp>
-#include <cuvs/neighbors/cagra_c.h>
 
 namespace {
 
 template <typename T>
-void* _build(cuvsResources_t res, cagraIndexParams params, DLManagedTensor* dataset_tensor)
+void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* dataset_tensor)
 {
   auto dataset = dataset_tensor->dl_tensor;
 
@@ -53,14 +53,13 @@ void* _build(cuvsResources_t res, cagraIndexParams params, DLManagedTensor* data
     auto mds          = cuvs::core::from_dlpack<mdspan_type>(dataset_tensor);
     cuvs::neighbors::cagra::build_host(*res_ptr, build_params, mds, *index);
   }
-
   return index;
 }
 
 template <typename T>
 void _search(cuvsResources_t res,
-             cagraSearchParams params,
-             cagraIndex index,
+             cuvsCagraSearchParams params,
+             cuvsCagraIndex index,
              DLManagedTensor* queries_tensor,
              DLManagedTensor* neighbors_tensor,
              DLManagedTensor* distances_tensor)
@@ -95,17 +94,17 @@ void _search(cuvsResources_t res,
 
 }  // namespace
 
-extern "C" cuvsError_t cagraIndexCreate(cagraIndex_t* index)
+extern "C" cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index)
 {
   try {
-    *index = new cagraIndex{};
+    *index = new cuvsCagraIndex{};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
   }
 }
 
-extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr)
+extern "C" cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index_c_ptr)
 {
   try {
     auto index = *index_c_ptr;
@@ -130,10 +129,10 @@ extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr)
   }
 }
 
-extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
-                                  cuvsCagraIndexParams_t params,
-                                  DLManagedTensor* dataset_tensor,
-                                  cagraIndex_t index)
+extern "C" cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                                      cuvsCagraIndexParams_t params,
+                                      DLManagedTensor* dataset_tensor,
+                                      cuvsCagraIndex_t index)
 {
   try {
     auto dataset = dataset_tensor->dl_tensor;
@@ -153,17 +152,20 @@ extern "C" cuvsError_t cagraBuild(cuvsResources_t res,
                 dataset.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
+    return CUVS_ERROR;
   } catch (...) {
     return CUVS_ERROR;
   }
 }
 
-extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
-                                   cuvsCagraSearchParams_t params,
-                                   cagraIndex_t index_c_ptr,
-                                   DLManagedTensor* queries_tensor,
-                                   DLManagedTensor* neighbors_tensor,
-                                   DLManagedTensor* distances_tensor)
+extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                                       cuvsCagraSearchParams_t params,
+                                       cuvsCagraIndex_t index_c_ptr,
+                                       DLManagedTensor* queries_tensor,
+                                       DLManagedTensor* neighbors_tensor,
+                                       DLManagedTensor* distances_tensor)
 {
   try {
     auto queries   = queries_tensor->dl_tensor;
@@ -197,6 +199,8 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
                 queries.dtype.bits);
     }
     return CUVS_SUCCESS;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error occurred: " << ex.what() << std::endl;
   } catch (...) {
     return CUVS_ERROR;
   }
@@ -205,10 +209,10 @@ extern "C" cuvsError_t cagraSearch(cuvsResources_t res,
 extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
 {
   try {
-    *params = new cagraIndexParams{.intermediate_graph_degree = 128,
-                                   .graph_degree              = 64,
-                                   .build_algo                = IVF_PQ,
-                                   .nn_descent_niter          = 20};
+    *params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128,
+                                       .graph_degree              = 64,
+                                       .build_algo                = IVF_PQ,
+                                       .nn_descent_niter          = 20};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
@@ -228,11 +232,11 @@ extern "C" cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params
 extern "C" cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t* params)
 {
   try {
-    *params = new cagraSearchParams{.itopk_size            = 64,
-                                    .search_width          = 1,
-                                    .hashmap_max_fill_rate = 0.5,
-                                    .num_random_samplings  = 1,
-                                    .rand_xor_mask         = 0x128394};
+    *params = new cuvsCagraSearchParams{.itopk_size            = 64,
+                                        .search_width          = 1,
+                                        .hashmap_max_fill_rate = 0.5,
+                                        .num_random_samplings  = 1,
+                                        .rand_xor_mask         = 0x128394};
     return CUVS_SUCCESS;
   } catch (...) {
     return CUVS_ERROR;
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 609bc2d4e..f33c14179 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -57,7 +57,7 @@ function(ConfigureTest)
   )
   set_target_properties(
     ${TEST_NAME}
-    PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/gtests>"
+    PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUVS_BINARY_DIR}/gtests>"
                INSTALL_RPATH "\$ORIGIN/../../../lib"
                CXX_STANDARD 17
                CXX_STANDARD_REQUIRED ON
diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu
index 4870ac3b8..6e3a3cbd1 100644
--- a/cpp/test/neighbors/ann_cagra_c.cu
+++ b/cpp/test/neighbors/ann_cagra_c.cu
@@ -19,7 +19,7 @@
 #include <dlpack/dlpack.h>
 
 #include <cstdint>
-#include <cuvs/neighbors/cagra_c.h>
+#include <cuvs/neighbors/cagra.h>
 
 #include <cuda_runtime.h>
 #include <gtest/gtest.h>
@@ -56,13 +56,13 @@ TEST(CagraC, BuildSearch)
   dataset_tensor.dl_tensor.strides            = nullptr;
 
   // create index
-  cagraIndex_t index;
-  cagraIndexCreate(&index);
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
 
   // build index
   cuvsCagraIndexParams_t build_params;
   cuvsCagraIndexParamsCreate(&build_params);
-  cagraBuild(res, build_params, &dataset_tensor, index);
+  cuvsCagraBuild(res, build_params, &dataset_tensor, index);
 
   // create queries DLTensor
   float* queries_d;
@@ -113,7 +113,7 @@ TEST(CagraC, BuildSearch)
   // search index
   cuvsCagraSearchParams_t search_params;
   cuvsCagraSearchParamsCreate(&search_params);
-  cagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
+  cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor);
 
   // verify output
   ASSERT_TRUE(cuvs::devArrMatchHost(neighbors_exp, neighbors_d, 4, cuvs::Compare<uint32_t>()));
@@ -128,6 +128,6 @@ TEST(CagraC, BuildSearch)
   // de-allocate index and res
   cuvsCagraSearchParamsDestroy(search_params);
   cuvsCagraIndexParamsDestroy(build_params);
-  cagraIndexDestroy(index);
+  cuvsCagraIndexDestroy(index);
   cuvsResourcesDestroy(res);
 }
diff --git a/cpp/test/neighbors/c_api.c b/cpp/test/neighbors/c_api.c
index d4f5ad08e..0c476e95b 100644
--- a/cpp/test/neighbors/c_api.c
+++ b/cpp/test/neighbors/c_api.c
@@ -15,17 +15,17 @@
  */
 
 #include <cuvs/core/c_api.h>
-#include <cuvs/neighbors/cagra_c.h>
+#include <cuvs/neighbors/cagra.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 int main()
 {
-  // simple smoke test to make sure that we can compile the cagra_c.h API
+  // simple smoke test to make sure that we can compile the cagra.h API
   // using a c compiler. This isn't aiming to be a full test, just checking
   // that the exposed C-API is valid C code and doesn't contain C++ features
-  cagraIndex_t index;
-  cagraIndexCreate(&index);
-  cagraIndexDestroy(index);
+  cuvsCagraIndex_t index;
+  cuvsCagraIndexCreate(&index);
+  cuvsCagraIndexDestroy(index);
   return 0;
 }
diff --git a/dependencies.yaml b/dependencies.yaml
index 6f9f10535..f17b84dff 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -3,33 +3,34 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["11.8", "12.0"]
+      cuda: ["11.8", "12.2"]
       arch: [x86_64, aarch64]
     includes:
       - build
-      - build_cuvs
-      - cudatoolkit
+      - build_py_cuvs
+      - cuda
+      - cuda_version
       - develop
       - checks
       - build_wheels
       - test_libcuvs
       - docs
-      - run_cuvs
+      - run_py_cuvs
       - test_python_common
-      - test_cuvs
+      - test_py_cuvs
       - cupy
   test_cpp:
     output: none
     includes:
-      - cudatoolkit
+      - cuda_version
       - test_libcuvs
   test_python:
     output: none
     includes:
-      - cudatoolkit
+      - cuda_version
       - py_version
       - test_python_common
-      - test_cuvs
+      - test_py_cuvs
       - cupy
   checks:
     output: none
@@ -39,28 +40,27 @@ files:
   docs:
     output: none
     includes:
-      - test_cuvs
+      - cuda_version
       - cupy
-      - cudatoolkit
       - docs
       - py_version
-  py_build_cuvs:
+      - test_py_cuvs
+  py_build_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: build-system
     includes:
       - build
-      - build_cuvs
-      - build_wheels
-  py_run_cuvs:
+      - build_py_cuvs
+  py_run_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
       table: project
     includes:
-      - run_cuvs
-  py_test_cuvs:
+      - run_py_cuvs
+  py_test_py_cuvs:
     output: pyproject
     pyproject_dir: python/cuvs
     extras:
@@ -68,7 +68,7 @@ files:
       key: test
     includes:
       - test_python_common
-      - test_cuvs
+      - test_py_cuvs
       - cupy
 channels:
   - rapidsai
@@ -84,12 +84,15 @@ dependencies:
           - &cmake_ver cmake>=3.26.4
           - cython>=3.0.0
           - ninja
-          - scikit-build>=0.13.1
       - output_types: [conda]
         packages:
           - c-compiler
           - cxx-compiler
           - nccl>=2.9.9
+          - scikit-build-core>=0.7.0
+      - output_types: [requirements, pyproject]
+        packages:
+          - scikit-build-core[pyproject]>=0.7.0
     specific:
       - output_types: conda
         matrices:
@@ -105,8 +108,8 @@ dependencies:
               - sysroot_linux-aarch64==2.17
       - output_types: conda
         matrices:
-          - matrix: {cuda: "12.0"}
-            packages: [cuda-version=12.0, cuda-nvcc]
+          - matrix: {cuda: "12.*"}
+            packages: [cuda-nvcc]
           - matrix: {cuda: "11.8", arch: x86_64}
             packages: [nvcc_linux-64=11.8]
           - matrix: {cuda: "11.8", arch: aarch64}
@@ -124,21 +127,23 @@ dependencies:
           - matrix: {cuda: "11.2", arch: aarch64}
             packages: [nvcc_linux-aarch64=11.2]
 
-  build_cuvs:
+  build_py_cuvs:
     common:
       - output_types: [conda]
         packages:
-          - &rmm_conda rmm==24.2.*
+          - &rmm_conda rmm==24.4.*
+          - &pylibraft_conda pylibraft==24.4.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
           # This index is needed for rmm-cu{11,12}.
           - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
           - matrix:
-              cuda: "12.0"
+              cuda: "12.*"
             packages:
               - &cuda_python12 cuda-python>=12.0,<13.0a0
           - matrix: # All CUDA 11 versions
@@ -146,18 +151,15 @@ dependencies:
               - &cuda_python11 cuda-python>=11.7.1,<12.0a0
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.2"}
-            packages: &build_cuvs_packages_cu12
-              - &rmm_cu12 rmm-cu12==24.2.*
-          - {matrix: {cuda: "12.1"}, packages: *build_cuvs_packages_cu12}
-          - {matrix: {cuda: "12.0"}, packages: *build_cuvs_packages_cu12}
-          - matrix: {cuda: "11.8"}
-            packages: &build_cuvs_packages_cu11
-              - &rmm_cu11 rmm-cu11==24.2.*
-          - {matrix: {cuda: "11.5"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.4"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.2"}, packages: *build_cuvs_packages_cu11}
-          - {matrix: null, packages: [*rmm_conda] }
+          - matrix: {cuda: "12.*"}
+            packages:
+              - &rmm_cu12 rmm-cu12==24.4.*
+              - &pylibraft_cu12 pylibraft-cu12==24.4.*
+          - matrix: {cuda: "11.*"}
+            packages:
+              - &rmm_cu11 rmm-cu11==24.4.*
+              - &pylibraft_cu11 pylibraft-cu11==24.4.*
+          - {matrix: null, packages: [*rmm_conda, *pylibraft_conda] }
   checks:
     common:
       - output_types: [conda, requirements]
@@ -169,14 +171,41 @@ dependencies:
         packages:
           - clang==16.0.6
           - clang-tools=16.0.6
-  cudatoolkit:
+  cuda_version:
     specific:
       - output_types: conda
         matrices:
+          - matrix:
+              cuda: "11.2"
+            packages:
+              - cuda-version=11.2
+          - matrix:
+              cuda: "11.4"
+            packages:
+              - cuda-version=11.4
+          - matrix:
+              cuda: "11.5"
+            packages:
+              - cuda-version=11.5
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
           - matrix:
               cuda: "12.0"
             packages:
               - cuda-version=12.0
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
+  cuda:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "12.*"
+            packages:
               - cuda-nvtx-dev
               - cuda-cudart-dev
               - cuda-profiler-api
@@ -187,7 +216,6 @@ dependencies:
           - matrix:
               cuda: "11.8"
             packages:
-              - cuda-version=11.8
               - cudatoolkit
               - cuda-nvtx=11.8
               - cuda-profiler-api=11.8.86
@@ -202,7 +230,6 @@ dependencies:
           - matrix:
               cuda: "11.5"
             packages:
-              - cuda-version=11.5
               - cudatoolkit
               - cuda-nvtx=11.5
               - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
@@ -217,7 +244,6 @@ dependencies:
           - matrix:
               cuda: "11.4"
             packages:
-              - cuda-version=11.4
               - cudatoolkit
               - &cudanvtx114 cuda-nvtx=11.4
               - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
@@ -232,7 +258,6 @@ dependencies:
           - matrix:
               cuda: "11.2"
             packages:
-              - cuda-version=11.2
               - cudatoolkit
               - *cudanvtx114
               - cuda-profiler-api>=11.4.240,<=11.8.86 # use any `11.x` version since pkg is missing several CUDA/arch packages
@@ -255,34 +280,12 @@ dependencies:
     specific:
       - output_types: [requirements, pyproject]
         matrices:
-          # All CUDA 12 + x86_64 versions
-          - matrix: {cuda: "12.2", arch: x86_64}
-            packages: &cupy_packages_cu12_x86_64
-              - &cupy_cu12_x86_64 cupy-cuda12x>=12.0.0
-          - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
-          - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
-          # All CUDA 12 + aarch64 versions
-          - matrix: {cuda: "12.2", arch: aarch64}
-            packages: &cupy_packages_cu12_aarch64
-              - &cupy_cu12_aarch64 cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
-          - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
-          - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
-
-          # All CUDA 11 + x86_64 versions
-          - matrix: {cuda: "11.8", arch: x86_64}
-            packages: &cupy_packages_cu11_x86_64
+          - matrix: {cuda: "12.*"}
+            packages:
+              - cupy-cuda12x>=12.0.0
+          - matrix: {cuda: "11.*"}
+            packages:
               - cupy-cuda11x>=12.0.0
-          - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-          - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-          - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
-
-          # All CUDA 11 + aarch64 versions
-          - matrix: {cuda: "11.8", arch: aarch64}
-            packages: &cupy_packages_cu11_aarch64
-              - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
-          - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
-          - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
           - {matrix: null, packages: [cupy-cuda11x>=12.0.0]}
 
   test_libcuvs:
@@ -324,26 +327,32 @@ dependencies:
             packages:
               - python=3.10
           - matrix:
+              py: "3.11"
             packages:
-              - python>=3.9,<3.11
-  run_cuvs:
+              - python=3.11
+          - matrix:
+            packages:
+              - python>=3.9,<3.12
+  run_py_cuvs:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - &numpy numpy>=1.21
+          - &numpy numpy>=1.23
       - output_types: [conda]
         packages:
           - *rmm_conda
+          - *pylibraft_conda
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
           # This index is needed for cudf and rmm.
           - --extra-index-url=https://pypi.nvidia.com
+          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
     specific:
       - output_types: [conda, requirements, pyproject]
         matrices:
           - matrix:
-              cuda: "12.0"
+              cuda: "12.*"
             packages:
               - *cuda_python12
           - matrix: # All CUDA 11 versions
@@ -351,27 +360,22 @@ dependencies:
               - *cuda_python11
       - output_types: [requirements, pyproject]
         matrices:
-          - matrix: {cuda: "12.2"}
-            packages: &run_cuvs_packages_cu12
-              - *rmm_cu12
-          - {matrix: {cuda: "12.1"}, packages: *run_cuvs_packages_cu12}
-          - {matrix: {cuda: "12.0"}, packages: *run_cuvs_packages_cu12}
-          - matrix: {cuda: "11.8"}
-            packages: &run_cuvs_packages_cu11
-              - *rmm_cu11
-          - {matrix: {cuda: "11.5"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.4"}, packages: *run_cuvs_packages_cu11}
-          - {matrix: {cuda: "11.2"}, packages: *run_cuvs_packages_cu11}
+          - matrix: {cuda: "12.*"}
+            packages:
+              - *pylibraft_cu12
+          - matrix: {cuda: "11.*"}
+            packages:
+              - *pylibraft_cu11
           - {matrix: null, packages: [*rmm_conda]}
   test_python_common:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - pytest
+          - pytest==7.*
           - pytest-cov
-  test_cuvs:
+  test_py_cuvs:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
+          - *pylibraft_conda
           - scikit-learn
-          - scipy
diff --git a/docs/source/build.md b/docs/source/build.md
index ae7734d0e..31de69b46 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a
 mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0
 ```
 
-If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
+If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
 
 ## Installing Python through Pip
 
@@ -315,4 +315,4 @@ The `raft::raft` CMake target is made available when including RAFT into your CM
 |-------------|---------------------|----------------------------------------------------------|----------------------------------------|
 | n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
 | compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
\ No newline at end of file
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c09ab953f..1a5c9dfe8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,9 +67,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '24.02'
+version = '24.04'
 # The full version, including alpha/beta/rc tags.
-release = '24.02.00'
+release = '24.04.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/contributing.md b/docs/source/contributing.md
index 090fd834b..c426ce534 100755
--- a/docs/source/contributing.md
+++ b/docs/source/contributing.md
@@ -1,9 +1,9 @@
 # Contributing
 
-If you are interested in contributing to CUVS, your contributions will fall
+If you are interested in contributing to cuVS, your contributions will fall
 into three categories:
 1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/rapidsai/CUVS/issues/new/choose)
+    - File an [issue](https://github.com/rapidsai/cuvs/issues/new/choose)
     describing what you encountered or what you want to see changed.
     - The RAPIDS team will evaluate the issues and triage them, scheduling
     them for a release. If you believe the issue needs priority attention
@@ -26,10 +26,10 @@ into three categories:
 1. Read the project's [README.md](https://github.com/rapidsai/cuvs)
     to learn how to setup the development environment
 2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/CUVS/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
-    or [help wanted](https://github.com/rapidsai/CUVS/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
+    or [help wanted](https://github.com/rapidsai/cuvs/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
 3. Comment on the issue saying you are going to work on it
 4. Code! Make sure to update unit tests!
-5. When done, [create your pull request](https://github.com/rapidsai/CUVS/compare)
+5. When done, [create your pull request](https://github.com/rapidsai/cuvs/compare)
 6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
 7. Wait for other developers to review your code and update code as needed
 8. Once reviewed and approved, a RAPIDS developer will merge your pull request
diff --git a/docs/source/cpp_api/core_interop.rst b/docs/source/cpp_api/core_interop.rst
index 034030db5..b2ef05f27 100644
--- a/docs/source/cpp_api/core_interop.rst
+++ b/docs/source/cpp_api/core_interop.rst
@@ -6,11 +6,11 @@ Interop
    :class: highlight
 
 
-``#include <raft/core/interop.hpp>``
+``#include <cuvs/core/interop.hpp>``
 
-namespace *raft::core*
+namespace *cuvs::core*
 
  .. doxygengroup:: interop
-     :project: RAFT
+     :project: cuvs
      :members:
      :content-only:
diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md
index c5bcd03f6..d29130add 100644
--- a/docs/source/developer_guide.md
+++ b/docs/source/developer_guide.md
@@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour
 1. Do not split empty functions/records/namespaces.
 2. Two-space indentation everywhere, including the line continuations.
 3. Disable reflowing of comments.
-   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/.clang-format).
+   The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format).
 
 [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter.
 In order to run doxygen as a linter on C++/CUDA code, run
@@ -205,7 +205,7 @@ you can run  `codespell -i 3 -w .` from the repository root directory.
 This will bring up an interactive prompt to select which spelling fixes to apply.
 
 ### #include style
-[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
+[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows:
 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies.
 2. `#include <...>` should be used for referencing everything else
 
@@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list
 ```
 
 ### Copyright header
-[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.02/ci/checks/copyright.py) checks the Copyright header for all git-modified files
+[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files
 
 Manually, you can run the following to bulk-fix the header if only the years need to be updated:
 ```bash
@@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY`
 ## Logging
 
 ### Introduction
-Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.02/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
+Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all.
 
 ### Usage
 ```cpp
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index ca871c575..330270d66 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -11,10 +11,10 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.02/RAPIDS.cmake
-       ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.04/RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake
   )
 endif()
 
-include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+include(${CMAKE_CURRENT_BINARY_DIR}/CUVS_RAPIDS.cmake)
diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt
index ca9da8a9d..5a5efe8f9 100644
--- a/python/cuvs/CMakeLists.txt
+++ b/python/cuvs/CMakeLists.txt
@@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 
 include(../../fetch_rapids.cmake)
 
-set(cuvs_version 24.02.00)
+set(cuvs_version 24.04.00)
 
 # We always need CUDA for cuvs because the cuvs dependency brings in a header-only cuco dependency
 # that enables CUDA unconditionally.
@@ -37,29 +37,44 @@ option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulti
        OFF
 )
 
+message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}")
+
+include(../../fetch_rapids.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-cython-core)
+include(rapids-export)
+include(rapids-find)
+
+rapids_cpm_init()
+
 # If the user requested it we attempt to find CUVS.
 if(FIND_CUVS_CPP)
   find_package(cuvs ${cuvs_version})
+  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 else()
   set(cuvs_FOUND OFF)
 endif()
 
-include(rapids-cython)
-
 if(NOT cuvs_FOUND)
   set(BUILD_TESTS OFF)
+  set(BUILD_C_LIBRARY ON)
+
+  # Statically link dependencies if building wheels
   set(CUDA_STATIC_RUNTIME ON)
+  set(CUVS_USE_RAFT_STATIC ON)
 
   add_subdirectory(../../cpp cuvs-cpp EXCLUDE_FROM_ALL)
 
-  # When building the C++ libraries from source we must copy libcuvs.so alongside the Cython
-  # libraries TODO: when we have a single 'compiled' cuvs library, we shouldn't need this
-  set(cython_lib_dir cuvs_py)
-  install(TARGETS cuvs DESTINATION ${cython_lib_dir})
+  set(cython_lib_dir cuvs)
+  install(TARGETS cuvs cuvs_c DESTINATION ${cython_lib_dir})
 endif()
 
 rapids_cython_init()
 
+add_subdirectory(cuvs/common)
+add_subdirectory(cuvs/neighbors)
+
 if(DEFINED cython_lib_dir)
   rapids_cython_add_rpath_entries(TARGET cuvs PATHS "${cython_lib_dir}")
 endif()
diff --git a/python/cuvs/README.md b/python/cuvs/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/__init__.py b/python/cuvs/cuvs/__init__.py
index 94b3a200b..9f0481cb7 100644
--- a/python/cuvs/cuvs/__init__.py
+++ b/python/cuvs/cuvs/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt
new file mode 100644
index 000000000..2cf1ff19c
--- /dev/null
+++ b/python/cuvs/cuvs/common/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources cydlpack.pyx)
+set(linked_libraries cuvs::cuvs cuvs_c)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX common_
+)
diff --git a/python/cuvs/cuvs/common/__init__.pxd b/python/cuvs/cuvs/common/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/common/__init__.py b/python/cuvs/cuvs/common/__init__.py
new file mode 100644
index 000000000..eb5666659
--- /dev/null
+++ b/python/cuvs/cuvs/common/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .temp_raft import auto_sync_resources
+
+__all__ = ["auto_sync_resources"]
diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd
new file mode 100644
index 000000000..6addbf16e
--- /dev/null
+++ b/python/cuvs/cuvs/common/c_api.pxd
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+
+from cuda.ccudart cimport cudaStream_t
+from libc.stdint cimport uintptr_t
+
+
+cdef extern from "cuvs/core/c_api.h":
+    ctypedef uintptr_t cuvsResources_t
+
+    ctypedef enum cuvsError_t:
+        CUVS_ERROR,
+        CUVS_SUCCESS
+
+    cuvsError_t cuvsResourcesCreate(cuvsResources_t* res)
+    cuvsError_t cuvsResourcesDestroy(cuvsResources_t res)
+    cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream)
diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd
new file mode 100644
index 000000000..73334e500
--- /dev/null
+++ b/python/cuvs/cuvs/common/cydlpack.pxd
@@ -0,0 +1,70 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint64_t
+
+
+cdef extern from "dlpack/dlpack.h" nogil:
+    ctypedef enum DLDeviceType:
+        kDLCPU
+        kDLCUDA
+        kDLCUDAHost
+        kDLOpenCL
+        kDLVulkan
+        kDLMetal
+        kDLVPI
+        kDLROCM
+        kDLROCMHost
+        kDLExtDev
+        kDLCUDAManaged
+        kDLOneAPI
+        kDLWebGPU
+        kDLHexagon
+
+    ctypedef struct DLDevice:
+        DLDeviceType device_type
+        int32_t device_id
+
+    ctypedef enum DLDataTypeCode:
+        kDLInt
+        kDLUInt
+        kDLFloat
+        kDLBfloat
+        kDLComplex
+        kDLBool
+
+    ctypedef struct DLDataType:
+        uint8_t code
+        uint8_t bits
+        uint16_t lanes
+
+    ctypedef struct DLTensor:
+        void* data
+        DLDevice device
+        int32_t ndim
+        DLDataType dtype
+        int64_t* shape
+        int64_t* strides
+        uint64_t byte_offset
+
+    ctypedef struct DLManagedTensor:
+        DLTensor dl_tensor
+        void* manager_ctx
+        void (*deleter)(DLManagedTensor*)  # noqa: E211
+
+
+cdef DLManagedTensor* dlpack_c(ary)
diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx
new file mode 100644
index 000000000..526f6c78e
--- /dev/null
+++ b/python/cuvs/cuvs/common/cydlpack.pyx
@@ -0,0 +1,102 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+import numpy as np
+
+from libc cimport stdlib
+from libc.stdint cimport uintptr_t
+
+
+cdef void deleter(DLManagedTensor* tensor) noexcept:
+    if tensor.manager_ctx is NULL:
+        return
+    stdlib.free(tensor.dl_tensor.shape)
+    tensor.manager_ctx = NULL
+    stdlib.free(tensor)
+
+
+cdef DLManagedTensor* dlpack_c(ary):
+    # todo(dgd): add checking options/parameters
+    cdef DLDeviceType dev_type
+    cdef DLDevice dev
+    cdef DLDataType dtype
+    cdef DLTensor tensor
+    cdef DLManagedTensor* dlm = \
+        <DLManagedTensor*>stdlib.malloc(sizeof(DLManagedTensor))
+
+    if ary.from_cai:
+        dev_type = DLDeviceType.kDLCUDA
+    else:
+        dev_type = DLDeviceType.kDLCPU
+
+    dev.device_type = dev_type
+    dev.device_id = 0
+
+    # todo (dgd): change to nice dict
+    if ary.dtype == np.float32:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 32
+    elif ary.dtype == np.float64:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 64
+    elif ary.dtype == np.int8:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 8
+    elif ary.dtype == np.int32:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 32
+    elif ary.dtype == np.int64:
+        dtype.code = DLDataTypeCode.kDLInt
+        dtype.bits = 64
+    elif ary.dtype == np.uint8:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 8
+    elif ary.dtype == np.uint32:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 32
+    elif ary.dtype == np.uint64:
+        dtype.code = DLDataTypeCode.kDLUInt
+        dtype.bits = 64
+    elif ary.dtype == np.bool_:
+        dtype.code = DLDataTypeCode.kDLFloat
+        dtype.bits = 8
+
+    dtype.lanes = 1
+
+    cdef size_t ndim = len(ary.shape)
+
+    cdef int64_t* shape = <int64_t*>stdlib.malloc(ndim * sizeof(int64_t))
+
+    for i in range(ndim):
+        shape[i] = ary.shape[i]
+
+    cdef uintptr_t tensor_ptr
+    tensor_ptr = <uintptr_t>ary.ai_["data"][0]
+
+    tensor.data = <void*> tensor_ptr
+    tensor.device = dev
+    tensor.dtype = dtype
+    tensor.strides = NULL
+    tensor.ndim = ndim
+    tensor.shape = shape
+    tensor.byte_offset = 0
+
+    dlm.dl_tensor = tensor
+    dlm.manager_ctx = NULL
+    dlm.deleter = deleter
+
+    return dlm
diff --git a/python/cuvs/cuvs/common/temp_raft.py b/python/cuvs/cuvs/common/temp_raft.py
new file mode 100644
index 000000000..67944eeab
--- /dev/null
+++ b/python/cuvs/cuvs/common/temp_raft.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+
+# This file has code that will be upstreamed to RAFT
+
+import functools
+
+from pylibraft.common import DeviceResources
+
+_resources_param_string = """
+     handle : Optional RAFT resource handle for reusing CUDA resources.
+        If a handle isn't supplied, CUDA resources will be
+        allocated inside this function and synchronized before the
+        function exits. If a handle is supplied, you will need to
+        explicitly synchronize yourself by calling `handle.sync()`
+        before accessing the output.
+""".strip()
+
+
+def auto_sync_resources(f):
+    """
+    This is identical to auto_sync_handle except for the proposed name change.
+    """
+
+    @functools.wraps(f)
+    def wrapper(*args, resources=None, **kwargs):
+        sync_resources = resources is None
+        resources = resources if resources is not None else DeviceResources()
+
+        ret_value = f(*args, resources=resources, **kwargs)
+
+        if sync_resources:
+            resources.sync()
+
+        return ret_value
+
+    wrapper.__doc__ = wrapper.__doc__.format(
+        resources_docstring=_resources_param_string
+    )
+    return wrapper
diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt
new file mode 100644
index 000000000..eaf418c60
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt
@@ -0,0 +1,15 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+add_subdirectory(cagra)
diff --git a/python/cuvs/cuvs/neighbors/__init__.pxd b/python/cuvs/cuvs/neighbors/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py
new file mode 100644
index 000000000..1f8f956d9
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from cuvs.neighbors import cagra
+
+__all__ = ["common", "cagra"]
diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
new file mode 100644
index 000000000..377cfe779
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt
@@ -0,0 +1,24 @@
+# =============================================================================
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Set the list of Cython files to build
+set(cython_sources cagra.pyx)
+set(linked_libraries cuvs::cuvs cuvs_c)
+
+# Build all of the Cython targets
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_
+)
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/__init__.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py
new file mode 100644
index 000000000..657c7d366
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .cagra import Index, IndexParams, SearchParams, build_index, search
+
+__all__ = ["Index", "IndexParams", "SearchParams", "build_index", "search"]
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
new file mode 100644
index 000000000..4293bdc07
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd
@@ -0,0 +1,95 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
+
+from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
+from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
+
+
+cdef extern from "cuvs/neighbors/cagra.h" nogil:
+
+    ctypedef enum cuvsCagraGraphBuildAlgo:
+        IVF_PQ
+        NN_DESCENT
+
+    ctypedef struct cuvsCagraIndexParams:
+        size_t intermediate_graph_degree
+        size_t graph_degree
+        cuvsCagraGraphBuildAlgo build_algo
+        size_t nn_descent_niter
+
+    ctypedef cuvsCagraIndexParams* cuvsCagraIndexParams_t
+
+    ctypedef enum cuvsCagraSearchAlgo:
+        SINGLE_CTA,
+        MULTI_CTA,
+        MULTI_KERNEL,
+        AUTO
+
+    ctypedef enum cuvsCagraHashMode:
+        HASH,
+        SMALL,
+        AUTO_HASH
+
+    ctypedef struct cuvsCagraSearchParams:
+        size_t max_queries
+        size_t itopk_size
+        size_t max_iterations
+        cuvsCagraSearchAlgo algo
+        size_t team_size
+        size_t search_width
+        size_t min_iterations
+        size_t thread_block_size
+        cuvsCagraHashMode hashmap_mode
+        size_t hashmap_min_bitlen
+        float hashmap_max_fill_rate
+        uint32_t num_random_samplings
+        uint64_t rand_xor_mask
+
+    ctypedef struct cuvsCagraIndex:
+        uintptr_t addr
+        DLDataType dtype
+
+    ctypedef cuvsCagraIndex* cuvsCagraIndex_t
+
+    cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
+
+    cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t index)
+
+    cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index)
+
+    cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index)
+
+    cuvsError_t cuvsCagraBuild(cuvsResources_t res,
+                               cuvsCagraIndexParams* params,
+                               DLManagedTensor* dataset,
+                               cuvsCagraIndex_t index) except +
+
+    cuvsError_t cuvsCagraSearch(cuvsResources_t res,
+                                cuvsCagraSearchParams* params,
+                                cuvsCagraIndex_t index,
+                                DLManagedTensor* queries,
+                                DLManagedTensor* neighbors,
+                                DLManagedTensor* distances) except +
diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
new file mode 100644
index 000000000..bf17fe6a5
--- /dev/null
+++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx
@@ -0,0 +1,502 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# cython: language_level=3
+
+import numpy as np
+
+cimport cuvs.common.cydlpack
+
+from cuvs.common.temp_raft import auto_sync_resources
+
+from cython.operator cimport dereference as deref
+from libcpp cimport bool, cast
+
+from cuvs.common cimport cydlpack
+
+from pylibraft.common import (
+    DeviceResources,
+    Stream,
+    auto_convert_output,
+    cai_wrapper,
+    device_ndarray,
+)
+from pylibraft.common.cai_wrapper import wrap_array
+from pylibraft.common.interruptible import cuda_interruptible
+from pylibraft.neighbors.common import _check_input_array
+
+from libc.stdint cimport (
+    int8_t,
+    int64_t,
+    uint8_t,
+    uint32_t,
+    uint64_t,
+    uintptr_t,
+)
+
+from cuvs.common.c_api cimport (
+    cuvsError_t,
+    cuvsResources_t,
+    cuvsResourcesCreate,
+)
+
+
+cdef class IndexParams:
+    """
+    Parameters to build index for CAGRA nearest neighbor search
+
+    Parameters
+    ----------
+    metric : string denoting the metric type, default="sqeuclidean"
+        Valid values for metric: ["sqeuclidean"], where
+            - sqeuclidean is the euclidean distance without the square root
+              operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2
+    intermediate_graph_degree : int, default = 128
+
+    graph_degree : int, default = 64
+
+    build_algo: string denoting the graph building algorithm to use, \
+                default = "ivf_pq"
+        Valid values for algo: ["ivf_pq", "nn_descent"], where
+            - ivf_pq will use the IVF-PQ algorithm for building the knn graph
+            - nn_descent (experimental) will use the NN-Descent algorithm for
+              building the knn graph. It is expected to be generally
+              faster than ivf_pq.
+    """
+    cdef cuvsCagraIndexParams* params
+
+    def __init__(self, *,
+                 metric="sqeuclidean",
+                 intermediate_graph_degree=128,
+                 graph_degree=64,
+                 build_algo="ivf_pq",
+                 nn_descent_niter=20):
+
+        cuvsCagraIndexParamsCreate(&self.params)
+
+        # todo (dgd): enable once other metrics are present
+        # and exposed in cuVS C API
+        # self.params.metric = _get_metric(metric)
+        # self.params.metric_arg = 0
+        self.params.intermediate_graph_degree = intermediate_graph_degree
+        self.params.graph_degree = graph_degree
+        if build_algo == "ivf_pq":
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.IVF_PQ
+        elif build_algo == "nn_descent":
+            self.params.build_algo = cuvsCagraGraphBuildAlgo.NN_DESCENT
+        self.params.nn_descent_niter = nn_descent_niter
+
+    # @property
+    # def metric(self):
+        # return self.params.metric
+
+    @property
+    def intermediate_graph_degree(self):
+        return self.params.intermediate_graph_degree
+
+    @property
+    def graph_degree(self):
+        return self.params.graph_degree
+
+    @property
+    def build_algo(self):
+        return self.params.build_algo
+
+    @property
+    def nn_descent_niter(self):
+        return self.params.nn_descent_niter
+
+
+cdef class Index:
+    cdef cuvsCagraIndex_t index
+    cdef bool trained
+
+    def __cinit__(self):
+        cdef cuvsError_t index_create_status
+        index_create_status = cuvsCagraIndexCreate(&self.index)
+        self.trained = False
+
+        if index_create_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Failed to create index.")
+
+    def __dealloc__(self):
+        cdef cuvsError_t index_destroy_status
+        if self.index is not NULL:
+            index_destroy_status = cuvsCagraIndexDestroy(self.index)
+            if index_destroy_status == cuvsError_t.CUVS_ERROR:
+                raise Exception("Failed to deallocate index.")
+
+    @property
+    def trained(self):
+        return self.trained
+
+    def __repr__(self):
+        # todo(dgd): update repr as we expose data through C API
+        attr_str = []
+        return "Index(type=CAGRA, metric=L2" + (", ".join(attr_str)) + ")"
+
+
+@auto_sync_resources
+def build_index(IndexParams index_params, dataset, resources=None):
+    """
+    Build the CAGRA index from the dataset for efficient search.
+
+    The build performs two different steps- first an intermediate knn-graph is
+    constructed, then it's optimized it to create the final graph. The
+    index_params object controls the node degree of these graphs.
+
+    It is required that both the dataset and the optimized graph fit the
+    GPU memory.
+
+    The following distance metrics are supported:
+        - L2
+
+    Parameters
+    ----------
+    index_params : IndexParams object
+    dataset : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    {resources_docstring}
+
+    Returns
+    -------
+    index: cuvs.cagra.Index
+
+    Examples
+    --------
+
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import cagra
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> k = 10
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> build_params = cagra.IndexParams(metric="sqeuclidean")
+    >>> index = cagra.build_index(build_params, dataset)
+    >>> distances, neighbors = cagra.search(cagra.SearchParams(),
+    ...                                      index, dataset,
+    ...                                      k)
+    >>> distances = cp.asarray(distances)
+    >>> neighbors = cp.asarray(neighbors)
+    """
+
+    # todo(dgd): we can make the check of dtype a parameter of wrap_array
+    # in RAFT to make this a single call
+    dataset_ai = wrap_array(dataset)
+    _check_input_array(dataset_ai, [np.dtype('float32'), np.dtype('byte'),
+                                    np.dtype('ubyte')])
+
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
+
+    cdef Index idx = Index()
+    cdef cuvsError_t build_status
+    cdef cydlpack.DLManagedTensor* dataset_dlpack = \
+        cydlpack.dlpack_c(dataset_ai)
+    cdef cuvsCagraIndexParams* params = index_params.params
+
+    with cuda_interruptible():
+        build_status = cuvsCagraBuild(
+            res_,
+            params,
+            dataset_dlpack,
+            idx.index
+        )
+
+        if build_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Index failed to build.")
+        else:
+            idx.trained = True
+
+    return idx
+
+
+cdef class SearchParams:
+    """
+    CAGRA search parameters
+
+    Parameters
+    ----------
+    max_queries: int, default = 0
+        Maximum number of queries to search at the same time (batch size).
+        Auto select when 0.
+    itopk_size: int, default = 64
+        Number of intermediate search results retained during the search.
+        This is the main knob to adjust trade off between accuracy and
+        search speed. Higher values improve the search accuracy.
+    max_iterations: int, default = 0
+        Upper limit of search iterations. Auto select when 0.
+    algo: string denoting the search algorithm to use, default = "auto"
+        Valid values for algo: ["auto", "single_cta", "multi_cta"], where
+            - auto will automatically select the best value based on query size
+            - single_cta is better when query contains larger number of
+              vectors (e.g >10)
+            - multi_cta is better when query contains only a few vectors
+    team_size: int, default = 0
+        Number of threads used to calculate a single distance. 4, 8, 16,
+        or 32.
+    search_width: int, default = 1
+        Number of graph nodes to select as the starting point for the
+        search in each iteration.
+    min_iterations: int, default = 0
+        Lower limit of search iterations.
+    thread_block_size: int, default = 0
+        Thread block size. 0, 64, 128, 256, 512, 1024.
+        Auto selection when 0.
+    hashmap_mode: string denoting the type of hash map to use.
+        It's usually better to allow the algorithm to select this value,
+        default = "auto".
+        Valid values for hashmap_mode: ["auto", "small", "hash"], where
+            - auto will automatically select the best value based on algo
+            - small will use the small shared memory hash table with resetting.
+            - hash will use a single hash table in global memory.
+    hashmap_min_bitlen: int, default = 0
+        Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+    hashmap_max_fill_rate: float, default = 0.5
+        Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+    num_random_samplings: int, default = 1
+        Number of iterations of initial random seed node selection. 1 or
+        more.
+    rand_xor_mask: int, default = 0x128394
+        Bit mask used for initial random seed node selection.
+    """
+    cdef cuvsCagraSearchParams params
+
+    def __init__(self, *,
+                 max_queries=0,
+                 itopk_size=64,
+                 max_iterations=0,
+                 algo="auto",
+                 team_size=0,
+                 search_width=1,
+                 min_iterations=0,
+                 thread_block_size=0,
+                 hashmap_mode="auto",
+                 hashmap_min_bitlen=0,
+                 hashmap_max_fill_rate=0.5,
+                 num_random_samplings=1,
+                 rand_xor_mask=0x128394):
+        self.params.max_queries = max_queries
+        self.params.itopk_size = itopk_size
+        self.params.max_iterations = max_iterations
+        if algo == "single_cta":
+            self.params.algo = cuvsCagraSearchAlgo.SINGLE_CTA
+        elif algo == "multi_cta":
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_CTA
+        elif algo == "multi_kernel":
+            self.params.algo = cuvsCagraSearchAlgo.MULTI_KERNEL
+        elif algo == "auto":
+            self.params.algo = cuvsCagraSearchAlgo.AUTO
+        else:
+            raise ValueError("`algo` value not supported.")
+
+        self.params.team_size = team_size
+        self.params.search_width = search_width
+        self.params.min_iterations = min_iterations
+        self.params.thread_block_size = thread_block_size
+        if hashmap_mode == "hash":
+            self.params.hashmap_mode = cuvsCagraHashMode.HASH
+        elif hashmap_mode == "small":
+            self.params.hashmap_mode = cuvsCagraHashMode.SMALL
+        elif hashmap_mode == "auto":
+            self.params.hashmap_mode = cuvsCagraHashMode.AUTO_HASH
+        else:
+            raise ValueError("`hashmap_mode` value not supported.")
+
+        self.params.hashmap_min_bitlen = hashmap_min_bitlen
+        self.params.hashmap_max_fill_rate = hashmap_max_fill_rate
+        self.params.num_random_samplings = num_random_samplings
+        self.params.rand_xor_mask = rand_xor_mask
+
+    def __repr__(self):
+        attr_str = [attr + "=" + str(getattr(self, attr))
+                    for attr in [
+                        "max_queries", "itopk_size", "max_iterations", "algo",
+                        "team_size", "search_width", "min_iterations",
+                        "thread_block_size", "hashmap_mode",
+                        "hashmap_min_bitlen", "hashmap_max_fill_rate",
+                        "num_random_samplings", "rand_xor_mask"]]
+        return "SearchParams(type=CAGRA, " + (", ".join(attr_str)) + ")"
+
+    @property
+    def max_queries(self):
+        return self.params.max_queries
+
+    @property
+    def itopk_size(self):
+        return self.params.itopk_size
+
+    @property
+    def max_iterations(self):
+        return self.params.max_iterations
+
+    @property
+    def algo(self):
+        return self.params.algo
+
+    @property
+    def team_size(self):
+        return self.params.team_size
+
+    @property
+    def search_width(self):
+        return self.params.search_width
+
+    @property
+    def min_iterations(self):
+        return self.params.min_iterations
+
+    @property
+    def thread_block_size(self):
+        return self.params.thread_block_size
+
+    @property
+    def hashmap_mode(self):
+        return self.params.hashmap_mode
+
+    @property
+    def hashmap_min_bitlen(self):
+        return self.params.hashmap_min_bitlen
+
+    @property
+    def hashmap_max_fill_rate(self):
+        return self.params.hashmap_max_fill_rate
+
+    @property
+    def num_random_samplings(self):
+        return self.params.num_random_samplings
+
+    @property
+    def rand_xor_mask(self):
+        return self.params.rand_xor_mask
+
+
+@auto_sync_resources
+@auto_convert_output
+def search(SearchParams search_params,
+           Index index,
+           queries,
+           k,
+           neighbors=None,
+           distances=None,
+           resources=None):
+    """
+    Find the k nearest neighbors for each query.
+
+    Parameters
+    ----------
+    search_params : SearchParams
+    index : Index
+        Trained CAGRA index.
+    queries : CUDA array interface compliant matrix shape (n_samples, dim)
+        Supported dtype [float, int8, uint8]
+    k : int
+        The number of neighbors.
+    neighbors : Optional CUDA array interface compliant matrix shape
+                (n_queries, k), dtype int64_t. If supplied, neighbor
+                indices will be written here in-place. (default None)
+    distances : Optional CUDA array interface compliant matrix shape
+                (n_queries, k) If supplied, the distances to the
+                neighbors will be written here in-place. (default None)
+    {resources_docstring}
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> from cuvs.neighbors import cagra
+    >>> n_samples = 50000
+    >>> n_features = 50
+    >>> n_queries = 1000
+    >>> dataset = cp.random.random_sample((n_samples, n_features),
+    ...                                   dtype=cp.float32)
+    >>> # Build index
+    >>> index = cagra.build_index(cagra.IndexParams(), dataset)
+    >>> # Search using the built index
+    >>> queries = cp.random.random_sample((n_queries, n_features),
+    ...                                   dtype=cp.float32)
+    >>> k = 10
+    >>> search_params = cagra.SearchParams(
+    ...     max_queries=100,
+    ...     itopk_size=64
+    ... )
+    >>> # Using a pooling allocator reduces overhead of temporary array
+    >>> # creation during search. This is useful if multiple searches
+    >>> # are performad with same query size.
+    >>> distances, neighbors = cagra.search(search_params, index, queries,
+    ...                                     k)
+    >>> neighbors = cp.asarray(neighbors)
+    >>> distances = cp.asarray(distances)
+    """
+    if not index.trained:
+        raise ValueError("Index needs to be built before calling search.")
+
+    cdef cuvsResources_t res_
+    cdef cuvsError_t cstat
+
+    cstat = cuvsResourcesCreate(&res_)
+    if cstat == cuvsError_t.CUVS_ERROR:
+        raise RuntimeError("Error creating Device Reources.")
+
+    # todo(dgd): we can make the check of dtype a parameter of wrap_array
+    # in RAFT to make this a single call
+    queries_cai = wrap_array(queries)
+    _check_input_array(queries_cai, [np.dtype('float32'), np.dtype('byte'),
+                                     np.dtype('ubyte')])
+
+    cdef uint32_t n_queries = queries_cai.shape[0]
+
+    if neighbors is None:
+        neighbors = device_ndarray.empty((n_queries, k), dtype='uint32')
+
+    neighbors_cai = wrap_array(neighbors)
+    _check_input_array(neighbors_cai, [np.dtype('uint32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    if distances is None:
+        distances = device_ndarray.empty((n_queries, k), dtype='float32')
+
+    distances_cai = wrap_array(distances)
+    _check_input_array(distances_cai, [np.dtype('float32')],
+                       exp_rows=n_queries, exp_cols=k)
+
+    cdef cuvsCagraSearchParams* params = &search_params.params
+    cdef cuvsError_t search_status
+    cdef cydlpack.DLManagedTensor* queries_dlpack = \
+        cydlpack.dlpack_c(queries_cai)
+    cdef cydlpack.DLManagedTensor* neighbors_dlpack = \
+        cydlpack.dlpack_c(neighbors_cai)
+    cdef cydlpack.DLManagedTensor* distances_dlpack = \
+        cydlpack.dlpack_c(distances_cai)
+
+    with cuda_interruptible():
+        search_status = cuvsCagraSearch(
+            res_,
+            params,
+            index.index,
+            queries_dlpack,
+            neighbors_dlpack,
+            distances_dlpack
+        )
+
+        if search_status == cuvsError_t.CUVS_ERROR:
+            raise RuntimeError("Search failed.")
+
+    return (distances, neighbors)
diff --git a/python/cuvs/cuvs/test/__init__py b/python/cuvs/cuvs/test/__init__py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/cuvs/cuvs/test/ann_utils.py b/python/cuvs/cuvs/test/ann_utils.py
new file mode 100644
index 000000000..60db7f327
--- /dev/null
+++ b/python/cuvs/cuvs/test/ann_utils.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     h ttp://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+def generate_data(shape, dtype):
+    if dtype == np.byte:
+        x = np.random.randint(-127, 128, size=shape, dtype=np.byte)
+    elif dtype == np.ubyte:
+        x = np.random.randint(0, 255, size=shape, dtype=np.ubyte)
+    else:
+        x = np.random.random_sample(shape).astype(dtype)
+
+    return x
+
+
+def calc_recall(ann_idx, true_nn_idx):
+    assert ann_idx.shape == true_nn_idx.shape
+    n = 0
+    for i in range(ann_idx.shape[0]):
+        n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size
+    recall = n / ann_idx.size
+    return recall
diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py
new file mode 100644
index 000000000..6074eee3a
--- /dev/null
+++ b/python/cuvs/cuvs/test/test_cagra.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     h ttp://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import pytest
+from pylibraft.common import device_ndarray
+from sklearn.neighbors import NearestNeighbors
+from sklearn.preprocessing import normalize
+
+from cuvs.neighbors import cagra
+from cuvs.test.ann_utils import calc_recall, generate_data
+
+
+def run_cagra_build_search_test(
+    n_rows=10000,
+    n_cols=10,
+    n_queries=100,
+    k=10,
+    dtype=np.float32,
+    metric="euclidean",
+    intermediate_graph_degree=128,
+    graph_degree=64,
+    build_algo="ivf_pq",
+    array_type="device",
+    compare=True,
+    inplace=True,
+    add_data_on_build=True,
+    search_params={},
+):
+    dataset = generate_data((n_rows, n_cols), dtype)
+    if metric == "inner_product":
+        dataset = normalize(dataset, norm="l2", axis=1)
+    dataset_device = device_ndarray(dataset)
+
+    build_params = cagra.IndexParams(
+        metric=metric,
+        intermediate_graph_degree=intermediate_graph_degree,
+        graph_degree=graph_degree,
+        build_algo=build_algo,
+    )
+
+    if array_type == "device":
+        index = cagra.build_index(build_params, dataset_device)
+    else:
+        index = cagra.build_index(build_params, dataset)
+
+    if not add_data_on_build:
+        dataset_1 = dataset[: n_rows // 2, :]
+        dataset_2 = dataset[n_rows // 2 :, :]
+        indices_1 = np.arange(n_rows // 2, dtype=np.uint32)
+        indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint32)
+        if array_type == "device":
+            dataset_1_device = device_ndarray(dataset_1)
+            dataset_2_device = device_ndarray(dataset_2)
+            indices_1_device = device_ndarray(indices_1)
+            indices_2_device = device_ndarray(indices_2)
+            index = cagra.extend(index, dataset_1_device, indices_1_device)
+            index = cagra.extend(index, dataset_2_device, indices_2_device)
+        else:
+            index = cagra.extend(index, dataset_1, indices_1)
+            index = cagra.extend(index, dataset_2, indices_2)
+
+    queries = generate_data((n_queries, n_cols), dtype)
+    out_idx = np.zeros((n_queries, k), dtype=np.uint32)
+    out_dist = np.zeros((n_queries, k), dtype=np.float32)
+
+    queries_device = device_ndarray(queries)
+    out_idx_device = device_ndarray(out_idx) if inplace else None
+    out_dist_device = device_ndarray(out_dist) if inplace else None
+
+    search_params = cagra.SearchParams(**search_params)
+
+    ret_output = cagra.search(
+        search_params,
+        index,
+        queries_device,
+        k,
+        neighbors=out_idx_device,
+        distances=out_dist_device,
+    )
+
+    if not inplace:
+        out_dist_device, out_idx_device = ret_output
+
+    if not compare:
+        return
+
+    out_idx = out_idx_device.copy_to_host()
+    out_dist = out_dist_device.copy_to_host()
+
+    # Calculate reference values with sklearn
+    skl_metric = {
+        "sqeuclidean": "sqeuclidean",
+        "inner_product": "cosine",
+        "euclidean": "euclidean",
+    }[metric]
+    nn_skl = NearestNeighbors(
+        n_neighbors=k, algorithm="brute", metric=skl_metric
+    )
+    nn_skl.fit(dataset)
+    skl_idx = nn_skl.kneighbors(queries, return_distance=False)
+
+    recall = calc_recall(out_idx, skl_idx)
+    assert recall > 0.7
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
+@pytest.mark.parametrize("array_type", ["device", "host"])
+@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
+def test_cagra_dataset_dtype_host_device(
+    dtype, array_type, inplace, build_algo
+):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_cagra_build_search_test(
+        dtype=dtype,
+        inplace=inplace,
+        array_type=array_type,
+        build_algo=build_algo,
+    )
+
+
+@pytest.mark.parametrize(
+    "params",
+    [
+        {
+            "intermediate_graph_degree": 64,
+            "graph_degree": 32,
+            "add_data_on_build": True,
+            "k": 1,
+            "metric": "euclidean",
+            "build_algo": "ivf_pq",
+        },
+        {
+            "intermediate_graph_degree": 32,
+            "graph_degree": 16,
+            "add_data_on_build": False,
+            "k": 5,
+            "metric": "sqeuclidean",
+            "build_algo": "ivf_pq",
+        },
+        {
+            "intermediate_graph_degree": 128,
+            "graph_degree": 32,
+            "add_data_on_build": True,
+            "k": 10,
+            "metric": "inner_product",
+            "build_algo": "nn_descent",
+        },
+    ],
+)
+def test_cagra_index_params(params):
+    # Note that inner_product tests use normalized input which we cannot
+    # represent in int8, therefore we test only sqeuclidean metric here.
+    run_cagra_build_search_test(
+        k=params["k"],
+        metric=params["metric"],
+        graph_degree=params["graph_degree"],
+        intermediate_graph_degree=params["intermediate_graph_degree"],
+        compare=False,
+        build_algo=params["build_algo"],
+    )
diff --git a/python/cuvs/cuvs/test/test_doctests.py b/python/cuvs/cuvs/test/test_doctests.py
new file mode 100644
index 000000000..6d56ffaa2
--- /dev/null
+++ b/python/cuvs/cuvs/test/test_doctests.py
@@ -0,0 +1,116 @@
+#
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import doctest
+import inspect
+import io
+
+import pytest
+
+import cuvs.neighbors
+
+# Code adapted from https://github.com/rapidsai/cudf/blob/branch-23.02/python/cudf/cudf/tests/test_doctests.py  # noqa
+
+
+def _name_in_all(parent, name):
+    return name in getattr(parent, "__all__", [])
+
+
+def _is_public_name(parent, name):
+    return not name.startswith("_")
+
+
+def _find_doctests_in_obj(obj, finder=None, criteria=None):
+    """Find all doctests in an object.
+
+    Parameters
+    ----------
+    obj : module or class
+        The object to search for docstring examples.
+    finder : doctest.DocTestFinder, optional
+        The DocTestFinder object to use. If not provided, a DocTestFinder is
+        constructed.
+    criteria : callable, optional
+        Callable indicating whether to recurse over members of the provided
+        object. If not provided, names not defined in the object's ``__all__``
+        property are ignored.
+
+    Yields
+    ------
+    doctest.DocTest
+        The next doctest found in the object.
+    """
+    if finder is None:
+        finder = doctest.DocTestFinder()
+    if criteria is None:
+        criteria = _name_in_all
+    for docstring in finder.find(obj):
+        if docstring.examples:
+            yield docstring
+    for name, member in inspect.getmembers(obj):
+        # Only recurse over members matching the criteria
+        if not criteria(obj, name):
+            continue
+        # Recurse over the public API of modules (objects defined in the
+        # module's __all__)
+        if inspect.ismodule(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_name_in_all
+            )
+        # Recurse over the public API of classes (attributes not prefixed with
+        # an underscore)
+        if inspect.isclass(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_is_public_name
+            )
+
+        # doctest finder seems to dislike cython functions, since
+        # `inspect.isfunction` doesn't return true for them. hack around this
+        if callable(member) and not inspect.isfunction(member):
+            for docstring in finder.find(member):
+                if docstring.examples:
+                    yield docstring
+
+
+# since the root pylibraft module doesn't import submodules (or define an
+# __all__) we are explicitly adding all the submodules we want to run
+# doctests for here
+DOC_STRINGS = list(_find_doctests_in_obj(cuvs.neighbors))
+DOC_STRINGS.extend(_find_doctests_in_obj(cuvs.neighbors.cagra))
+
+
+@pytest.mark.parametrize(
+    "docstring",
+    DOC_STRINGS,
+    ids=lambda docstring: docstring.name,
+)
+def test_docstring(docstring):
+    # We ignore differences in whitespace in the doctest output, and enable
+    # the use of an ellipsis "..." to match any string in the doctest
+    # output. An ellipsis is useful for, e.g., memory addresses or
+    # imprecise floating point values.
+    optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    runner = doctest.DocTestRunner(optionflags=optionflags)
+
+    # Capture stdout and include failing outputs in the traceback.
+    doctest_stdout = io.StringIO()
+    with contextlib.redirect_stdout(doctest_stdout):
+        runner.run(docstring)
+        results = runner.summarize()
+    assert not results.failed, (
+        f"{results.failed} of {results.attempted} doctests failed for "
+        f"{docstring.name}:\n{doctest_stdout.getvalue()}"
+    )
diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml
index cba8d4adf..664cf2969 100644
--- a/python/cuvs/pyproject.toml
+++ b/python/cuvs/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,12 +19,11 @@ requires = [
     "cuda-python>=11.7.1,<12.0a0",
     "cython>=3.0.0",
     "ninja",
-    "rmm==24.2.*",
-    "scikit-build>=0.13.1",
-    "setuptools",
-    "wheel",
+    "pylibraft==24.4.*",
+    "rmm==24.4.*",
+    "scikit-build-core[pyproject]>=0.7.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-build-backend = "setuptools.build_meta"
+build-backend = "scikit_build_core.build"
 
 [project]
 name = "cuvs"
@@ -38,8 +37,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "cuda-python>=11.7.1,<12.0a0",
-    "numpy>=1.21",
-    "rmm==24.2.*",
+    "numpy>=1.23",
+    "rmm==24.4.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -51,10 +50,10 @@ classifiers = [
 [project.optional-dependencies]
 test = [
     "cupy-cuda11x>=12.0.0",
-    "pytest",
+    "pylibraft==24.4.*",
     "pytest-cov",
+    "pytest==7.*",
     "scikit-learn",
-    "scipy",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -109,3 +108,16 @@ skip = [
     "dist",
     "__init__.py",
 ]
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+cmake.build-type = "Release"
+cmake.minimum-version = "3.26.4"
+ninja.make-fallback = true
+sdist.reproducible = true
+wheel.packages = ["cuvs"]
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "cuvs/VERSION"
+regex = "(?P<value>.*)"
diff --git a/python/cuvs/setup.cfg b/python/cuvs/setup.cfg
index 3574b4416..57b4954bc 100644
--- a/python/cuvs/setup.cfg
+++ b/python/cuvs/setup.cfg
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 
 [isort]
 line_length=79
@@ -12,6 +12,7 @@ known_dask=
     distributed
     dask_cuda
 known_rapids=
+    cuvs
     nvtext
     cudf
     cuml
diff --git a/python/cuvs/setup.py b/python/cuvs/setup.py
deleted file mode 100644
index 4e825dab2..000000000
--- a/python/cuvs/setup.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from setuptools import find_packages
-from skbuild import setup
-
-
-def exclude_libcxx_symlink(cmake_manifest):
-    return list(
-        filter(
-            lambda name: not ("include/rapids/libcxx/include" in name),
-            cmake_manifest,
-        )
-    )
-
-
-packages = find_packages(include=["cuvs*"])
-setup(
-    # Don't want libcxx getting pulled into wheel builds.
-    cmake_process_manifest_hook=exclude_libcxx_symlink,
-    packages=packages,
-    package_data={key: ["VERSION", "*.pxd"] for key in packages},
-    zip_safe=False,
-)