Skip to content

Commit

Permalink
Add Rust bindings for CAGRA (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
benfred authored Mar 4, 2024
1 parent 6040aa0 commit 8e6979f
Show file tree
Hide file tree
Showing 21 changed files with 1,240 additions and 1 deletion.
13 changes: 13 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
rust-build:
needs: cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
arch: "amd64"
date: ${{ inputs.date }}
container_image: "rapidsai/ci-conda:latest"
node_type: "gpu-v100-latest-1"
run_script: "ci/build_rust.sh"
sha: ${{ inputs.sha }}
python-build:
needs: [cpp-build]
secrets: inherit
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ jobs:
- conda-python-build
- conda-python-tests
- docs-build
- rust-build
- wheel-build-cuvs
- wheel-tests-cuvs
- devcontainer
Expand Down Expand Up @@ -72,6 +73,16 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
rust-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.02
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_rust.sh"
wheel-build-cuvs:
needs: checks
secrets: inherit
Expand Down
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,67 @@ cuvsCagraIndexParamsDestroy(index_params);
cuvsResourcesDestroy(res);
```

### Rust API

```rust
use cuvs::cagra::{Index, IndexParams, SearchParams};
use cuvs::{ManagedTensor, Resources, Result};

use ndarray::s;
use ndarray_rand::rand_distr::Uniform;
use ndarray_rand::RandomExt;

/// Example showing how to index and search data with CAGRA
fn cagra_example() -> Result<()> {
let res = Resources::new()?;

// Create a new random dataset to index
let n_datapoints = 65536;
let n_features = 512;
let dataset =
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));

// build the cagra index
let build_params = IndexParams::new()?;
let index = Index::build(&res, &build_params, &dataset)?;
println!(
"Indexed {}x{} datapoints into cagra index",
n_datapoints, n_features
);

// use the first 4 points from the dataset as queries : will test that we get them back
// as their own nearest neighbor
let n_queries = 4;
let queries = dataset.slice(s![0..n_queries, ..]);

let k = 10;

// CAGRA search API requires queries and outputs to be on device memory
// copy query data over, and allocate new device memory for the distances/ neighbors
// outputs
let queries = ManagedTensor::from(&queries).to_device(&res)?;
let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?;

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host).to_device(&res)?;

let search_params = SearchParams::new()?;

index.search(&res, &search_params, &queries, &neighbors, &distances)?;

// Copy back to host memory
distances.to_host(&res, &mut distances_host)?;
neighbors.to_host(&res, &mut neighbors_host)?;

// nearest neighbors should be themselves, since queries are from the
// dataset
println!("Neighbors {:?}", neighbors_host);
println!("Distances {:?}", distances_host);
Ok(())
}
```


## Contributing

Expand Down
40 changes: 40 additions & 0 deletions ci/build_rust.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

rapids-dependency-file-generator \
--output conda \
--file_key rust \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n rust

# seeing failures on activating the environment here on unbound locals
# apply workaround from https://github.com/conda/conda/issues/8186#issuecomment-532874667
set +eu
conda activate rust
set -eu

rapids-print-env

# we need to set up LIBCLANG_PATH to allow rust bindgen to work,
# grab it from the conda env
export LIBCLANG_PATH=$(dirname $(find /opt/conda -name libclang.so | head -n 1))
echo "LIBCLANG_PATH=$LIBCLANG_PATH"

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

# installing libcuvs/libraft will speed up the rust build substantially
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcuvs \
libraft

# build and test the rust bindings
cd rust
cargo test
16 changes: 15 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ files:
- cupy
- docs
- py_version
- test_py_cuvs
rust:
output: none
includes:
- build
- cuda
- rust
py_build_py_cuvs:
output: pyproject
pyproject_dir: python/cuvs
Expand Down Expand Up @@ -308,6 +313,15 @@ dependencies:
- recommonmark
- sphinx-copybutton
- sphinx-markdown-tables
rust:
common:
- output_types: [conda]
packages:
- make
- rust
# clang/liblclang only needed for bindgen support
- clang
- libclang
build_wheels:
common:
- output_types: [requirements, pyproject]
Expand Down
16 changes: 16 additions & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[workspace]
members = [
"cuvs",
"cuvs-sys",
]
resolver = "2"

[workspace.package]
version = "0.1.0"
edition = "2021"
repository = "https://github.com/rapidsai/cuvs"
homepage = "https://github.com/rapidsai/cuvs"
description = "RAPIDS vector search library"
authors = ["NVIDIA Corporation"]
license = "Apache-2.0"

16 changes: 16 additions & 0 deletions rust/cuvs-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "cuvs-sys"
description = "Low-level rust bindings to libcuvs"
links = "cuvs"
version.workspace = true
edition.workspace = true
repository.workspace = true
homepage.workspace = true
authors.workspace = true
license.workspace = true

[dependencies]

[build-dependencies]
cmake = ">=0.1"
bindgen = ">=0.69"
112 changes: 112 additions & 0 deletions rust/cuvs-sys/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use std::env;
use std::io::BufRead;
use std::path::PathBuf;

/*
TODO:
* would be nice to use already built versions of libcuvs_c / libcuvs
if they already existed, but this might not be possible here using cmake-rs
(https://github.com/rust-lang/cmake-rs/issues/111)
* figure out how this works with rust packaging: does the c++ code
need to be in a subdirectory? If so would a symlink work here
should we be using static linking ?
*/
fn main() {
// build the cuvs c-api library with cmake, and link it into this crate
let cuvs_build = cmake::Config::new("../../cpp")
.configure_arg("-DBUILD_TESTS:BOOL=OFF")
.configure_arg("-DBUILD_C_LIBRARY:BOOL=ON")
.build();

println!(
"cargo:rustc-link-search=native={}/lib",
cuvs_build.display()
);
println!("cargo:rustc-link-lib=dylib=cuvs_c");
println!("cargo:rustc-link-lib=dylib=cudart");

// we need some extra flags both to link against cuvs, and also to run bindgen
// specifically we need to:
// * -I flags to set the include path to pick up cudaruntime.h during bindgen
// * -rpath-link settings to link to libraft/libcuvs.so etc during the link
// Rather than redefine the logic to set all these things, lets pick up the values from
// the cuvs cmake build in its CMakeCache.txt and set from there
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());

let cmake_cache: Vec<String> = std::io::BufReader::new(
std::fs::File::open(format!("{}/build/CMakeCache.txt", out_path.display()))
.expect("Failed to open cuvs CMakeCache.txt"),
)
.lines()
.map(|x| x.expect("Couldn't parse line from CMakeCache.txt"))
.collect();

let cmake_cxx_flags = cmake_cache
.iter()
.find(|x| x.starts_with("CMAKE_CXX_FLAGS:STRING="))
.expect("failed to find CMAKE_CXX_FLAGS in CMakeCache.txt")
.strip_prefix("CMAKE_CXX_FLAGS:STRING=")
.unwrap();

let cmake_linker_flags = cmake_cache
.iter()
.find(|x| x.starts_with("CMAKE_EXE_LINKER_FLAGS:STRING="))
.expect("failed to find CMAKE_EXE_LINKER_FLAGS in CMakeCache.txt")
.strip_prefix("CMAKE_EXE_LINKER_FLAGS:STRING=")
.unwrap();

// need to propagate the rpath-link settings to dependent crates =(
// (this will get added as DEP_CUVS_CMAKE_LINKER_ARGS in dependent crates)
println!("cargo:cmake_linker_flags={}", cmake_linker_flags);

// add the required rpath-link flags to the cargo build
for flag in cmake_linker_flags.split(' ') {
if flag.starts_with("-Wl,-rpath-link") {
println!("cargo:rustc-link-arg={}", flag);
}
}

// run bindgen to automatically create rust bindings for the cuvs c-api
bindgen::Builder::default()
.header("cuvs_c_wrapper.h")
.clang_arg("-I../../cpp/include")
// needed to find cudaruntime.h
.clang_args(cmake_cxx_flags.split(' '))
// include dlpack from the cmake build dependencies
.clang_arg(format!(
"-I{}/build/_deps/dlpack-src/include/",
out_path.display()
))
// add `must_use' declarations to functions returning cuvsError_t
// (so that if you don't check the error code a compile warning is
// generated)
.must_use_type("cuvsError_t")
// Only generate bindings for cuvs/cagra types and functions
.allowlist_type("(cuvs|cagra|DL).*")
.allowlist_function("(cuvs|cagra).*")
.rustified_enum("(cuvs|cagra|DL).*")
// also need some basic cuda mem functions
// (TODO: should we be adding in RMM support instead here?)
.allowlist_function("(cudaMalloc|cudaFree|cudaMemcpy)")
.rustified_enum("cudaError")
.generate()
.expect("Unable to generate cagra_c bindings")
.write_to_file(out_path.join("cuvs_bindings.rs"))
.expect("Failed to write generated rust bindings");
}
20 changes: 20 additions & 0 deletions rust/cuvs-sys/cuvs_c_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// wrapper file containing all the C-API's we should automatically be creating rust
// bindings for
#include <cuvs/core/c_api.h>
#include <cuvs/neighbors/cagra.h>
Loading

0 comments on commit 8e6979f

Please sign in to comment.