Merge branch 'main' into contributor-guide-pt2

NVIDIA · Oct 3, 2023 · 1646e44 · 1646e44
2 parents e03218b + af59bb6
commit 1646e44
Show file tree

Hide file tree

Showing 213 changed files with 4,527 additions and 6,926 deletions.
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
@@ -2,3 +2,6 @@
 # https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
 
 enabled: true
+additional_trustees:
+  - ahendriksen
+  - gonzalobg
diff --git a/.github/workflows/build-and-test-linux.yml b/.github/workflows/build-and-test-linux.yml
@@ -0,0 +1,39 @@
+name: build and test
+
+defaults:
+  run:
+    shell: bash -exo pipefail {0}
+
+on:
+  workflow_call:
+    inputs:
+      cpu: {type: string, required: true}
+      test_name: {type: string, required: false}
+      build_script: {type: string, required: false}
+      test_script: {type: string, required: false}
+      container_image: {type: string, required: false}
+      run_tests: {type: boolean, required: false, default: true}
+
+jobs:
+  build:
+    name: Build ${{inputs.test_name}}
+    uses: ./.github/workflows/run-as-coder.yml
+    with:
+      name: Build ${{inputs.test_name}}
+      runner: linux-${{inputs.cpu}}-cpu16
+      image:  ${{ inputs.container_image }}
+      command: |
+        ${{ inputs.build_script }}
+
+  test:
+    needs: build
+    if:  ${{ !cancelled() && ( needs.build.result == 'success' || needs.build.result == 'skipped' ) && inputs.run_tests}}
+    name: Test ${{inputs.test_name}}
+    uses: ./.github/workflows/run-as-coder.yml
+    with:
+      name: Test ${{inputs.test_name}}
+      runner: linux-${{inputs.cpu}}-gpu-v100-latest-1
+      image: ${{inputs.container_image}}
+      command: |
+        nvidia-smi
+        ${{ inputs.test_script }}
diff --git a/.github/workflows/build-and-test-windows.yml b/.github/workflows/build-and-test-windows.yml
@@ -0,0 +1,49 @@
+name: Build Windows
+
+on:
+  workflow_call:
+    inputs:
+      test_name: {type: string, required: false}
+      build_script: {type: string, required: false}
+      container_image: {type: string, required: false}
+
+jobs:
+  prepare:
+    name: Build ${{inputs.test_name}}
+    runs-on: windows-2022
+    permissions:
+      id-token: write
+      contents: read
+    env:
+      SCCACHE_BUCKET: rapids-sccache-devs
+      SCCACHE_REGION: us-east-2
+      SCCACHE_IDLE_TIMEOUT: 0
+      SCCACHE_S3_USE_SSL: true
+      SCCACHE_S3_NO_CREDENTIALS: false
+    steps:
+      - name: Get AWS credentials for sccache bucket
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
+          aws-region: us-east-2
+          role-duration-seconds: 43200 # 12 hours
+      - name: Fetch ${{ inputs.container_image }}
+        shell: powershell
+        run: docker pull ${{ inputs.container_image }}
+      - name: Run the tests
+        shell: powershell
+        run: >-
+            docker run ${{ inputs.container_image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('AWS_SESSION_TOKEN','${{env.AWS_SESSION_TOKEN }}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_BUCKET','${{env.SCCACHE_BUCKET}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_REGION','${{env.SCCACHE_REGION}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_IDLE_TIMEOUT','${{env.SCCACHE_IDLE_TIMEOUT}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_USE_SSL','${{env.SCCACHE_S3_USE_SSL}}')
+                                                                    [System.Environment]::SetEnvironmentVariable('SCCACHE_S3_NO_CREDENTIALS','${{env.SCCACHE_S3_NO_CREDENTIALS}}')
+                                                                    git clone https://github.com/NVIDIA/cccl.git;
+                                                                    cd cccl;
+                                                                    git fetch --all;
+                                                                    git checkout ${{github.ref_name}};
+                                                                    ${{inputs.build_script}};"
+
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
diff --git a/.github/workflows/build-examples.yml b/.github/workflows/build-examples.yml
@@ -0,0 +1,27 @@
+name: Build examples
+on:
+  workflow_call:
+    inputs:
+      per_cuda_compiler_matrix: {type: string, required: true}
+      devcontainer_version: {type: string, required: true}
+      is_windows: {type: boolean, required: true}
+
+jobs:
+  # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
+  # ensures that the build/test steps can overlap across different configurations. For example,
+  # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
+  build_examples:
+    name: Build examples
+    if: ${{ !inputs.is_windows }}
+    uses: ./.github/workflows/run-as-coder.yml
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+    with:
+      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
+      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
+      image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      command: |
+        cmake -S . --preset=examples -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} -DCMAKE_CUDA_COMPILER=nvcc
+        ctest --preset=examples
diff --git a/.github/workflows/dispatch-build-and-test.yml b/.github/workflows/dispatch-build-and-test.yml
@@ -3,34 +3,40 @@ name: Dispatch build and test
 on:
   workflow_call:
     inputs:
+      project_name: {type: string, required: true}
       per_cuda_compiler_matrix: {type: string, required: true}
-      build_script: {type: string, required: false}
-      test_script: {type: string, required: false}
       devcontainer_version: {type: string, required: true}
+      is_windows: {type: boolean, required: true}
 
 jobs:
   # Using a matrix to dispatch to the build-and-test reusable workflow for each build configuration
   # ensures that the build/test steps can overlap across different configurations. For example,
   # the build step for CUDA 12.1 + gcc 9.3 can run at the same time as the test step for CUDA 11.0 + clang 11.
-  build_and_test:
-    name: ${{matrix.cpu}}
-    uses: ./.github/workflows/build-and-test.yml
+  build_and_test_linux:
+    name: build and test linux
+    if: ${{ !inputs.is_windows }}
+    uses: ./.github/workflows/build-and-test-linux.yml
     strategy:
       fail-fast: false
       matrix:
         include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
     with:
-      cuda_version: ${{ matrix.cuda }}
-      compiler: ${{ matrix.compiler.name }}
-      compiler_exe: ${{ matrix.compiler.exe }}
-      compiler_version: ${{ matrix.compiler.version }}
-      std: ${{ matrix.std }}
-      gpu_build_archs: ${{ matrix.gpu_build_archs }}
       cpu: ${{ matrix.cpu }}
-      os: ${{ matrix.os }}
-      build_script: ${{ inputs.build_script }}
-      build_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      test_script: ${{ inputs.test_script }}
-      run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') }}
-      test_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
+      build_script: "./ci/build_${{ inputs.project_name }}.sh ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      test_script:  "./ci/test_${{ inputs.project_name }}.sh  ${{matrix.compiler.exe}} ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
+      run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
 
+  build_and_test_windows:
+    name: build and test windows
+    if: ${{ inputs.is_windows }}
+    uses: ./.github/workflows/build-and-test-windows.yml
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
+    with:
+      test_name: ${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
+      build_script: "./ci/windows/build_${{ inputs.project_name }}.ps1 ${{matrix.std}} ${{matrix.gpu_build_archs}}"
+      container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cuda${{matrix.cuda}}-${{matrix.compiler.name}}${{matrix.compiler.version}}-${{matrix.os}}
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -33,23 +33,23 @@ concurrency:
 
 jobs:
   compute-matrix:
-    name: Compute matrix 
+    name: Compute matrix
     runs-on: ubuntu-latest
     outputs:
       DEVCONTAINER_VERSION: ${{steps.set-outputs.outputs.DEVCONTAINER_VERSION}}
       NVCC_FULL_MATRIX: ${{steps.set-outputs.outputs.NVCC_FULL_MATRIX}}
       CUDA_VERSIONS: ${{steps.set-outputs.outputs.CUDA_VERSIONS}}
       HOST_COMPILERS: ${{steps.set-outputs.outputs.HOST_COMPILERS}}
       PER_CUDA_COMPILER_MATRIX: ${{steps.set-outputs.outputs.PER_CUDA_COMPILER_MATRIX}}
-      NVRTC_MATRIX: ${{steps.set-outputs.outputs.matrix}}
+      NVRTC_MATRIX: ${{steps.set-outputs.outputs.NVRTC_MATRIX}}
     steps:
       - name: Checkout repo
         uses: actions/checkout@v3
       - name: Compute matrix outputs
         id: set-outputs
         run: |
           .github/actions/compute-matrix/compute-matrix.sh ci/matrix.yaml pull_request
-      
+
   nvrtc:
     name: NVRTC CUDA${{matrix.cuda}} C++${{matrix.std}}
     needs: compute-matrix
@@ -76,10 +76,10 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "thrust"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_thrust.sh"
-      test_script: "./ci/test_thrust.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   cub:
     name: CUB CUDA${{ matrix.cuda_version }} ${{ matrix.compiler }}
@@ -91,10 +91,10 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "cub"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_cub.sh"
-      test_script: "./ci/test_cub.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   libcudacxx:
     name: libcudacxx CUDA${{ matrix.cuda_version }} ${{ matrix.compiler }}
@@ -106,27 +106,24 @@ jobs:
         cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
         compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
+      project_name: "libcudacxx"
       per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
-      build_script: "./ci/build_libcudacxx.sh"
-      test_script: "./ci/test_libcudacxx.sh"
       devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   examples:
     name: CCCL Examples
     needs: compute-matrix
-    if: ${{ !contains(github.event.head_commit.message, 'skip-tests') }}
+    uses: ./.github/workflows/build-examples.yml
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJSON(needs.compute-matrix.outputs.NVCC_FULL_MATRIX) }}
-    uses: ./.github/workflows/run-as-coder.yml
+        cuda_version: ${{ fromJSON(needs.compute-matrix.outputs.CUDA_VERSIONS) }}
+        compiler: ${{ fromJSON(needs.compute-matrix.outputs.HOST_COMPILERS) }}
     with:
-      name: CCCL Examples CUDA${{matrix.cuda}} ${{matrix.compiler.name}}${{matrix.compiler.version}}
-      runner: linux-${{matrix.cpu}}-gpu-v100-latest-1
-      image: rapidsai/devcontainers:${{needs.compute-matrix.outputs.DEVCONTAINER_VERSION}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
-      command: |
-        cmake -S . --preset=examples -DCCCL_EXAMPLE_CPM_TAG=${GITHUB_SHA} -DCMAKE_CUDA_COMPILER=nvcc
-        ctest --preset=examples
+      per_cuda_compiler_matrix: ${{ toJSON(fromJSON(needs.compute-matrix.outputs.PER_CUDA_COMPILER_MATRIX)[ format('{0}-{1}', matrix.cuda_version, matrix.compiler) ]) }}
+      devcontainer_version: ${{ needs.compute-matrix.outputs.DEVCONTAINER_VERSION }}
+      is_windows: ${{ contains(matrix.compiler, 'cl') }}
 
   # This job is the final job that runs after all other jobs and is used for branch protection status checks.
   # See: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks

diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,5 @@
 *build*/
 .cache
 .aws
-.config
+.config
+_deps/catch2-src/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,6 +2,11 @@
 # 3.21 is the minimum for the developer build.
 cmake_minimum_required(VERSION 3.15)
 
+# sccache cannot handle the -Fd option generationg pdb files
+if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
+  cmake_policy(SET CMP0141 NEW)
+endif()
+
 # Determine whether CCCL is the top-level project or included into
 # another project via add_subdirectory()
 if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_LIST_DIR}")

diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/NVIDIA/cccl?quickstart=1&devcontainer_path=.devcontainer%2Fdevcontainer.json)
+
 # CUDA C++ Core Libraries (CCCL)
 
 Welcome to the CUDA C++ Core Libraries (CCCL) where our mission is to make CUDA C++ more delightful.
@@ -363,3 +365,39 @@ The deprecation period will depend on the impact of the change, but will usually
 ## CI Pipeline Overview
 
 For a detailed overview of the CI pipeline, see [ci-overview.md](ci-overview.md).
+
+## Related Projects
+
+Projects that are related to CCCL's mission to make CUDA C++ more delightful:
+- [cuCollections](https://github.com/NVIDIA/cuCollections) - GPU accelerated data structures like hash tables
+- [NVBench](https://github.com/NVIDIA/nvbench) - Benchmarking library tailored for CUDA applications
+- [stdexec](https://github.com/nvidia/stdexec) - Reference implementation for Senders asynchronous programming model 
+
+## Projects Using CCCL
+
+Does your project use CCCL? [Open a PR to add your project to this list!](https://github.com/NVIDIA/cccl/edit/main/README.md)
+
+- [AmgX](https://github.com/NVIDIA/AMGX) - Multi-grid linear solver library
+- [ColossalAI](https://github.com/hpcaitech/ColossalAI) - Tools for writing distributed deep learning models
+- [cuDF](https://github.com/rapidsai/cudf) - Algorithms and file readers for ETL data analytics
+- [cuGraph](https://github.com/rapidsai/cugraph) - Algorithms for graph analytics
+- [cuML](https://github.com/rapidsai/cuml) - Machine learning algorithms and primitives
+- [CuPy](https://cupy.dev) - NumPy & SciPy for GPU
+- [cuSOLVER](https://developer.nvidia.com/cusolver) - Dense and sparse linear solvers
+- [cuSpatial](https://github.com/rapidsai/cuspatial) - Algorithms for geospatial operations
+- [GooFit](https://github.com/GooFit/GooFit) - Library for maximum-likelihood fits
+- [HeavyDB](https://github.com/heavyai/heavydb) - SQL database engine
+- [HOOMD](https://github.com/glotzerlab/hoomd-blue) - Monte Carlo and molecular dynamics simulations
+- [HugeCTR](https://github.com/NVIDIA-Merlin/HugeCTR) - GPU-accelerated recommender framework
+- [Hydra](https://github.com/MultithreadCorner/Hydra) - High-energy Physics Data Analysis
+- [Hypre](https://github.com/hypre-space/hypre) - Multigrid linear solvers
+- [LightSeq](https://github.com/bytedance/lightseq) - Training and inference for sequence processing and generation
+- [PyTorch](https://github.com/pytorch/pytorch) - Tensor and neural network computations
+- [Qiskit](https://github.com/Qiskit/qiskit-aer) - High performance simulator for quantum circuits
+- [QUDA](https://github.com/lattice/quda) - Lattice quantum chromodynamics (QCD) computations
+- [RAFT](https://github.com/rapidsai/raft) - Algorithms and primitives for machine learning
+- [TensorFlow](https://github.com/tensorflow/tensorflow) - End-to-end platform for machine learning
+- [TensorRT](https://github.com/NVIDIA/TensorRT) - Deep leaning inference
+- [tsne-cuda](https://github.com/CannyLab/tsne-cuda) - Stochastic Neighborhood Embedding library
+- [Visualization Toolkit (VTK)](https://gitlab.kitware.com/vtk/vtk) - Rendering and visualization library
+- [XGBoost](https://github.com/dmlc/xgboost) - Gradient boosting machine learning algorithms
diff --git a/ci/build_common.sh b/ci/build_common.sh
@@ -101,7 +101,7 @@ function configure(){
 function build(){
     local BUILD_NAME=$1
     source "./sccache_stats.sh" start
-    cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL
+    cmake --build $BUILD_DIR --parallel $PARALLEL_LEVEL -v
     echo "${BUILD_NAME} build complete"
     source "./sccache_stats.sh" end
 }