From 7428731e8d5c23b3752c808db15980069bfa90d9 Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Fri, 1 Dec 2023 15:06:31 +0000
Subject: [PATCH 1/5] Remove official support for CUDA 11.0 and 11.1

This is to support using newer CCCL (cuda 11.0 not supported) and simplify the pyflamegpu distribution matrix (11.1).

11.0 is currently builds and passes tests on linux, but does not build on windows.
11.1 currently builds and passes tests on both.

Workarounds and warning specific to these versions are not being removed just incase, and camek will only warn but not error if they are used (as the currently work, just incase 11.2+ is not available somwhere).

Also fixes some typos as and when encountered
---
 .github/ISSUE_TEMPLATE/bug_report.yml     |  2 +-
 .github/workflows/CMake.yml               |  2 +-
 .github/workflows/Draft-Release.yml       |  4 ++--
 .github/workflows/Ubuntu.yml              |  6 +++---
 .github/workflows/Windows-Tests.yml       |  2 +-
 .github/workflows/Windows.yml             |  6 +++---
 CMakeLists.txt                            | 17 +++++++++++------
 README.md                                 |  7 ++-----
 cmake/CUDAArchitectures.cmake             |  2 +-
 cmake/common.cmake                        | 14 +++++++++-----
 src/flamegpu/detail/compute_capability.cu |  4 ++--
 11 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index a97636edc..30cf78080 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -48,7 +48,7 @@ body:
     attributes:
       label: CUDA Versions
       description: 
-      placeholder: e.g. CUDA 11.0, CUDA 12.2
+      placeholder: e.g. CUDA 11.2, CUDA 12.2
     validations:
       required: false
   - type: input
diff --git a/.github/workflows/CMake.yml b/.github/workflows/CMake.yml
index 9975a424e..75e27cc0b 100644
--- a/.github/workflows/CMake.yml
+++ b/.github/workflows/CMake.yml
@@ -26,7 +26,7 @@ jobs:
       # Multiplicative build matrix
       matrix:
         cudacxx:
-          - cuda: "11.0"
+          - cuda: "11.2"
             cuda_arch: "35"
             hostcxx: gcc-8
             os: ubuntu-20.04
diff --git a/.github/workflows/Draft-Release.yml b/.github/workflows/Draft-Release.yml
index 984d609c5..4570d15db 100644
--- a/.github/workflows/Draft-Release.yml
+++ b/.github/workflows/Draft-Release.yml
@@ -49,7 +49,7 @@ jobs:
             cuda_arch: "35-real;90-real;90-virtual"
             hostcxx: gcc-9
             os: ubuntu-20.04
-          - cuda: "11.0"
+          - cuda: "11.2"
             cuda_arch: "35-real;80-real;80-virtual"
             hostcxx: gcc-8
             os: ubuntu-20.04
@@ -202,7 +202,7 @@ jobs:
             cuda_arch: "35-real;90-real;90-virtual"
             hostcxx: "Visual Studio 16 2019"
             os: windows-2019
-          - cuda: "11.0.3"
+          - cuda: "11.2.2"
             cuda_arch: "35-real;80-real;80-virtual"
             hostcxx: "Visual Studio 16 2019"
             os: windows-2019
diff --git a/.github/workflows/Ubuntu.yml b/.github/workflows/Ubuntu.yml
index 646e33a6c..df9a569e7 100644
--- a/.github/workflows/Ubuntu.yml
+++ b/.github/workflows/Ubuntu.yml
@@ -37,7 +37,7 @@ jobs:
             cuda_arch: "35"
             hostcxx: gcc-11
             os: ubuntu-22.04
-          - cuda: "11.0"
+          - cuda: "11.2"
             cuda_arch: "35"
             hostcxx: gcc-8
             os: ubuntu-20.04
@@ -56,7 +56,7 @@ jobs:
         exclude:
           # Exclude VIS=ON for oldest cuda.
           - cudacxx:
-              cuda: "11.0"
+              cuda: "11.2"
             VISUALISATION: "ON"
           # Exclude beltsoff builds for old cuda's
           - cudacxx:
@@ -64,7 +64,7 @@ jobs:
             config:
               name: "Beltsoff"
           - cudacxx:
-              cuda: "11.0"
+              cuda: "11.2"
             config:
               name: "Beltsoff"
           # Exclude beltsoff vis builds to keep the matrix lighter.
diff --git a/.github/workflows/Windows-Tests.yml b/.github/workflows/Windows-Tests.yml
index 4762c62e7..284976693 100644
--- a/.github/workflows/Windows-Tests.yml
+++ b/.github/workflows/Windows-Tests.yml
@@ -31,7 +31,7 @@ jobs:
             cuda_arch: "35"
             hostcxx: "Visual Studio 17 2022"
             os: windows-2022
-          - cuda: "11.0.3"
+          - cuda: "11.2.2"
             cuda_arch: "35"
             hostcxx: "Visual Studio 16 2019"
             os: windows-2019
diff --git a/.github/workflows/Windows.yml b/.github/workflows/Windows.yml
index 4511c3538..8b5b7e270 100644
--- a/.github/workflows/Windows.yml
+++ b/.github/workflows/Windows.yml
@@ -37,7 +37,7 @@ jobs:
             cuda_arch: "35"
             hostcxx: "Visual Studio 17 2022"
             os: windows-2022
-          - cuda: "11.0.3"
+          - cuda: "11.2.2"
             cuda_arch: "35"
             hostcxx: "Visual Studio 16 2019"
             os: windows-2019
@@ -56,7 +56,7 @@ jobs:
         exclude:
           # Exclude VIS=ON for oldest cuda.
           - cudacxx:
-              cuda: "11.0.3"
+              cuda: "11.2.2"
             VISUALISATION: "ON"
           # Exclude beltsoff builds for old cuda's
           - cudacxx:
@@ -64,7 +64,7 @@ jobs:
             config:
               name: "Beltsoff"
           - cudacxx:
-              cuda: "11.0.3"
+              cuda: "11.2.2"
             config:
               name: "Beltsoff"
           # Exclude beltsoff vis builds to keep the matrix lighter.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f634809aa..d004369bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,10 +49,15 @@ if(CMAKE_CUDA_COMPILER)
     flamegpu_set_cuda_architectures()
 endif()
 
-# Set the minimum supported version of CUDA for FLAME GPU, currently 11.0
-set(MINIMUM_SUPPORTED_CUDA_VERSION 11.0)
-# Set the minimum, usable, but deprecated CUDA version. Currently there are no deprecated versions
+# Set the minimum supported version of CUDA for FLAME GPU, currently 11.2
+set(MINIMUM_SUPPORTED_CUDA_VERSION 11.2)
+# Set the minimum, potentially usable, but unsupported CUDA version.
+# Currently 11.0 on linux and 11.1 on windows (due to CCCL support).
+# CUDA 11.1 is not supported to simplify python releases
 set(MINIMUM_CUDA_VERSION 11.0)
+if(WIN32)
+    set(MINIMUM_CUDA_VERSION 11.1)
+endif()
 
 # If the CUDA compiler is too old, trigger a docs only build.
 if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_CUDA_VERSION})
@@ -60,9 +65,9 @@ if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_CUDA_VERSION})
     message(STATUS "Documentation-only build: CUDA ${MINIMUM_SUPPORTED_CUDA_VERSION} or greater is required for compilation.")
 endif()
 
-# If the CUDA compiler is atleast the minimum deprecated version, but less than the minimum actually supported version, issue a dev warning.
+# If the CUDA compiler is at least the minimum (unsupported) version, but less than the minimum actually supported version, issue a warning.
 if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL ${MINIMUM_CUDA_VERSION} AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
-    message(DEPRECATION "Support for CUDA verisons <= ${MINIMUM_SUPPORTED_CUDA_VERSION} is deprecated and will be removed in a future release.")
+    message(WARNING "CUDA versions >= ${MINIMUM_CUDA_VERSION} && < ${MINIMUM_SUPPORTED_CUDA_VERSION} are unsupported buy may work on some platforms.")
 endif()
 
 # If CUDA is not available, or the minimum version is too low only build the docs.
@@ -78,7 +83,7 @@ endif()
 # include for dependent modules
 include(CMakeDependentOption)
 
-# Option to enable building all examples, defaults to ON if FLAMEPGU is the top level cmake, else OFF
+# Option to enable building all examples, defaults to ON if FLAMEGPU is the top level cmake, else OFF
 cmake_dependent_option(FLAMEGPU_BUILD_ALL_EXAMPLES "Enable building all FLAMEGPU examples" ON "FLAMEGPU_PROJECT_IS_TOP_LEVEL" OFF)
 
 # Options to enable building individual examples, if FLAMEGPU_BUILD_ALL_EXAMPLES is off. 
diff --git a/README.md b/README.md
index 10bd8cec3..7310d4d6e 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ Building FLAME GPU has the following requirements. There are also optional depen
 
 + [CMake](https://cmake.org/download/) `>= 3.18`
   + `>= 3.20` if building python bindings using a multi-config generator (Visual Studio, Eclipse or Ninja Multi-Config)
-+ [CUDA](https://developer.nvidia.com/cuda-downloads) `>= 11.0` and a [Compute Capability](https://developer.nvidia.com/cuda-gpus) `>= 3.5` NVIDIA GPU.
++ [CUDA](https://developer.nvidia.com/cuda-downloads) `>= 11.2` and a [Compute Capability](https://developer.nvidia.com/cuda-gpus) `>= 3.5` NVIDIA GPU.
 + C++17 capable C++ compiler (host), compatible with the installed CUDA version
   + [Microsoft Visual Studio 2019 or 2022](https://visualstudio.microsoft.com/) (Windows)
     + *Note:* Visual Studio must be installed before the CUDA toolkit is installed. See the [CUDA installation guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html) for more information.
@@ -247,7 +247,7 @@ Several environmental variables are used or required by FLAME GPU 2.
 
 | Environment Variable                 | Description |
 |--------------------------------------|-------------|
-| `CUDA_PATH`                          | Required when using RunTime Compilation (RTC), pointing to the root of the CUDA Toolkit where NVRTC resides. <br /> i.e. `/usr/local/cuda-11.0/` or `C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.0`. <br /> Alternatively `CUDA_HOME` may be used if `CUDA_PATH` was not set. |
+| `CUDA_PATH`                          | Required when using RunTime Compilation (RTC), pointing to the root of the CUDA Toolkit where NVRTC resides. <br /> i.e. `/usr/local/cuda-11.2/` or `C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2`. <br /> Alternatively `CUDA_HOME` may be used if `CUDA_PATH` was not set. |
 | `FLAMEGPU_INC_DIR`                   | When RTC compilation is required, if the location of the `include` directory cannot be found it must be specified using the `FLAMEGPU_INC_DIR` environment variable. |
 | `FLAMEGPU_TMP_DIR`                   | FLAME GPU may cache some files to a temporary directory on the system, using the temporary directory returned by [`std::filesystem::temp_directory_path`](https://en.cppreference.com/w/cpp/filesystem/temp_directory_path). The location can optionally be overridden using the `FLAMEGPU_TMP_DIR` environment variable. |
 | `FLAMEGPU_RTC_INCLUDE_DIRS`          | A list of include directories that should be provided to the RTC compiler, these should be separated using `;` (Windows) or `:` (Linux). If this variable is not found, the working directory will be used as a default. |
@@ -367,7 +367,4 @@ For a full list of known issues pleases see the [Issue Tracker](https://github.c
 
 + Warnings and a loss of performance due to hash collisions in device code ([#356](https://github.com/FLAMEGPU/FLAMEGPU2/issues/356))
 + Multiple known areas where performance can be improved (e.g. [#449](https://github.com/FLAMEGPU/FLAMEGPU2/issues/449), [#402](https://github.com/FLAMEGPU/FLAMEGPU2/issues/402))
-+ Windows/MSVC builds using CUDA 11.0 may encounter errors when performing incremental builds if the static library has been recompiled. If this presents itself, re-save any `.cu` file in your executable producing project and re-trigger the build.
-+ Debug builds under linux with CUDA 11.0 may encounter cuda errors during `validateIDCollisions`. Consider using an alternate CUDA version if this is required ([#569](https://github.com/FLAMEGPU/FLAMEGPU2/issues/569)).
-+ CUDA 11.0 with GCC 9 may encounter a segmentation fault during compilation of the test suite. Consider using GCC 8 with CUDA 11.0.
 + CUDA 12.2+ suffers from poor RTC compilation times, to be fixed in a future release. ([#1118](https://github.com/FLAMEGPU/FLAMEGPU2/issues/1118)).
diff --git a/cmake/CUDAArchitectures.cmake b/cmake/CUDAArchitectures.cmake
index 27963a332..84eea2418 100644
--- a/cmake/CUDAArchitectures.cmake
+++ b/cmake/CUDAArchitectures.cmake
@@ -215,7 +215,7 @@ function(flamegpu_set_cuda_architectures)
                 endif()
                 message(AUTHOR_WARNING
                     "  ${CMAKE_CURRENT_FUNCTION} failed to parse NVCC --help output for default architecture generation\n"
-                    "  Using ${default_archs} based on CUDA 11.0 to 11.8."
+                    "  Using ${default_archs} based on CUDA 11.2 to 11.8."
                 )
             endif()
             # We actually want real for each arch, then virtual for the final, but only for library-provided values, to only embed one arch worth of ptx.
diff --git a/cmake/common.cmake b/cmake/common.cmake
index f1ed88a17..408ac0a0d 100644
--- a/cmake/common.cmake
+++ b/cmake/common.cmake
@@ -134,22 +134,26 @@ if(FLAMEGPU_ENABLE_NVTX)
     endif()
 endif(FLAMEGPU_ENABLE_NVTX)
 
-# Set the minimum supported cuda version, if not already set. Currently duplicated due to docs only build logic.
-# CUDA 11.0 is current minimum cuda version, and the minimum supported
+# Set the minimum unsupported and minimum supported cuda version, if not already set.
+# Currently duplicated due to docs only build logic.
+# CUDA 11.0/11.1 is current minimum (unsupported but usable) cuda version
 if(NOT DEFINED MINIMUM_CUDA_VERSION)
     set(MINIMUM_CUDA_VERSION 11.0)
+    if(WIN32)
+        set(MINIMUM_CUDA_VERSION 11.1)
+    endif()
     # Require a minimum cuda version
     if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_CUDA_VERSION})
         message(FATAL_ERROR "CUDA version must be at least ${MINIMUM_CUDA_VERSION}")
     endif()
 endif()
-# CUDA 11.0 is the current minimum supported version.
+# CUDA 11.2 is the current minimum supported version.
 if(NOT DEFINED MINIMUM_SUPPORTED_CUDA_VERSION)
-    set(MINIMUM_SUPPORTED_CUDA_VERSION 11.0)
+    set(MINIMUM_SUPPORTED_CUDA_VERSION 11.2)
     # Warn on deprecated cuda version.
     # If the CUDA compiler is atleast the minimum deprecated version, but less than the minimum actually supported version, issue a dev warning.
     if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL ${MINIMUM_CUDA_VERSION} AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS ${MINIMUM_SUPPORTED_CUDA_VERSION})
-        message(DEPRECATION "Support for CUDA verisons <= ${MINIMUM_SUPPORTED_CUDA_VERSION} is deprecated and will be removed in a future release.")
+    message(WARNING "CUDA versions >= ${MINIMUM_CUDA_VERSION} && < ${MINIMUM_SUPPORTED_CUDA_VERSION} are unsupported buy may work on some platforms.")
     endif()
 endif()
 
diff --git a/src/flamegpu/detail/compute_capability.cu b/src/flamegpu/detail/compute_capability.cu
index cfd8f3e12..2f38c69c1 100644
--- a/src/flamegpu/detail/compute_capability.cu
+++ b/src/flamegpu/detail/compute_capability.cu
@@ -71,8 +71,8 @@ std::vector<int> compute_capability::getNVRTCSupportedComputeCapabilties() {
     }
     // If any of the above functions failed, we have no idea what arch's are supported, so assume none are?
     return {};
-// Older CUDA's do not support this, but this is simple to hard-code for CUDA 11.0/11.1  (and our deprected CUDA 10.x).
-// CUDA 11.1 suports 35 to 86
+// Older CUDA's do not support this, but this is simple to hard-code for CUDA 11.0/11.1 (and our CUDA 10.x).
+// CUDA 11.1 supports 35 to 86
 #elif (__CUDACC_VER_MAJOR__ == 11) && __CUDACC_VER_MINOR__ == 1
     return {35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80, 86};
 // CUDA 11.0 supports 35 to 80

From 6389e2a5a6ccac294b30554f40eb998cd36f05b7 Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Tue, 28 Nov 2023 19:26:18 +0000
Subject: [PATCH 2/5] Switch to CCCL 2.2.0+ from Thrust/Cub 1.x

Closes #1021
---
 cmake/common.cmake              |   2 +-
 cmake/dependencies/CCCL.cmake   |  71 +++++++++++++++++
 cmake/dependencies/Thrust.cmake | 130 --------------------------------
 src/CMakeLists.txt              |   5 +-
 4 files changed, 74 insertions(+), 134 deletions(-)
 create mode 100644 cmake/dependencies/CCCL.cmake
 delete mode 100644 cmake/dependencies/Thrust.cmake

diff --git a/cmake/common.cmake b/cmake/common.cmake
index 408ac0a0d..10cf3ed2a 100644
--- a/cmake/common.cmake
+++ b/cmake/common.cmake
@@ -28,7 +28,7 @@ endif()
 
 # Ensure that other dependencies are downloaded and available. 
 # As flamegpu is a static library, linking only only occurs at consumption not generation, so dependent targets must also know of PRIVATE shared library dependencies such as tinyxml2 and rapidjson, as well any intentionally public dependencies (for include dirs)
-include(${CMAKE_CURRENT_LIST_DIR}/dependencies/Thrust.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/dependencies/CCCL.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/dependencies/Jitify.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/dependencies/Tinyxml2.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/dependencies/rapidjson.cmake)
diff --git a/cmake/dependencies/CCCL.cmake b/cmake/dependencies/CCCL.cmake
new file mode 100644
index 000000000..848c38e97
--- /dev/null
+++ b/cmake/dependencies/CCCL.cmake
@@ -0,0 +1,71 @@
+###################################
+# CCCL (Thrust, CUB and libcucxx) #
+###################################
+
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/modules/ ${CMAKE_MODULE_PATH})
+
+include(FetchContent)
+cmake_policy(SET CMP0079 NEW)
+
+# Set the minimum supported CCCL version, and the version to fetch
+# using find_package(version) means it's up to CCCL's cmake to determine if newer versions are compatible, but this will likely need changing for CUDA 13, when CCCL is planned to have a major version bump (and drop CUDA 11 support).
+set(MIN_REQUIRED_CCCL_VERSION 2.2.0)
+set(CCCL_DOWNLOAD_TAG v2.2.0)
+
+# Use the FindCUDATooklit package (CMake > 3.17) to get the CUDA version and CUDA include directories for cub/thrust location hints
+find_package(CUDAToolkit REQUIRED)
+
+# Quietly find CCCL, to check if the version included with CUDA (if CCCL) is sufficiently new.
+# Using CCCL avoids complex cub/thrust version workarounds previously required.
+# However we cannot find thrust due to a missing guard in CCCL's cmake config file, and cannot find cub without finding libcudacxx, so just find libcudacxx quietly.
+# If/when we change the minimum CCCL to 2.3.0 we should be able to remove the `components libcudacxx`. 
+find_package(CCCL ${MIN_REQUIRED_CCCL_VERSION} QUIET COMPONENTS libcudacxx CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
+
+# If CCCL was found, find it again but loudly (with all components)
+if(CCCL_FOUND)
+    # Find the packages again but less quietly (and include all components)
+    find_package(CCCL ${MIN_REQUIRED_CCCL_VERSION} REQUIRED CONFIG COMPONENTS HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
+# If CCCL does need downloading, fetch it and find it (no need to add_subdirectory)
+else()
+    # Declare information about where and what we want from thrust.
+    FetchContent_Declare(
+        cccl
+        GIT_REPOSITORY https://github.com/NVIDIA/CCCL.git
+        GIT_TAG        ${CCCL_DOWNLOAD_TAG}
+        GIT_SHALLOW    1
+        GIT_PROGRESS   ON
+        # UPDATE_DISCONNECTED   ON
+    )
+    # Fetch and populate the content if required.
+    FetchContent_GetProperties(cccl)
+    if(NOT cccl_POPULATED)
+        message(STATUS "Fetching CCCL ${CCCL_DOWNLOAD_TAG}")
+        FetchContent_Populate(cccl)
+        # Use find_package for CCLL, only looking for the fetched version.
+        # This creates a non-system target due to nvcc magic to avoid the cuda toolkit version being used instead, so warnings are not suppressible without push/pop macros.
+        find_package(CCCL REQUIRED CONFIG
+            PATHS "${cccl_SOURCE_DIR}"
+            NO_CMAKE_PATH
+            NO_CMAKE_ENVIRONMENT_PATH
+            NO_SYSTEM_ENVIRONMENT_PATH
+            NO_CMAKE_PACKAGE_REGISTRY
+            NO_CMAKE_SYSTEM_PATH)
+    endif()
+    # Mark some CACHE vars as advanced for a cleaner CMake GUI
+    mark_as_advanced(FETCHCONTENT_QUIET)
+    mark_as_advanced(FETCHCONTENT_BASE_DIR)
+    mark_as_advanced(FETCHCONTENT_FULLY_DISCONNECTED)
+    mark_as_advanced(FETCHCONTENT_UPDATES_DISCONNECTED)
+    mark_as_advanced(FETCHCONTENT_SOURCE_DIR_CCCL)
+    mark_as_advanced(FETCHCONTENT_UPDATES_DISCONNECTED_CCCL)
+endif()
+
+# Unset temporary variables
+unset(MIN_REQUIRED_CCCL_VERSION)
+unset(CCCL_DOWNLOAD_TAG)
+
+# Mark some CACHE vars as advanced for a cleaner CMake GUI
+mark_as_advanced(CCCL_DIR)
+mark_as_advanced(CUB_DIR)
+mark_as_advanced(Thrust_DIR)
+mark_as_advanced(libcudacxx_DIR)
diff --git a/cmake/dependencies/Thrust.cmake b/cmake/dependencies/Thrust.cmake
deleted file mode 100644
index 3f8bc8690..000000000
--- a/cmake/dependencies/Thrust.cmake
+++ /dev/null
@@ -1,130 +0,0 @@
-####################
-# Thrust (and CUB) #
-####################
-
-set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/modules/ ${CMAKE_MODULE_PATH})
-
-include(FetchContent)
-cmake_policy(SET CMP0079 NEW)
-
-# Set the minimum supported cub/thrust version, and the version to fetch
-# Thrust minimum version to 1.16 to avoid windows.h related issues and pull in bug fixes, but fetch the most recent 1.x release otherwise (at the time of writing).
-set(MIN_REQUIRED_THRUST_VERSION 1.16.0)
-set(MIN_REQUIRED_CUB_VERSION ${MIN_REQUIRED_THRUST_VERSION})
-set(THRUST_DOWNLOAD_VERSION 1.17.2)
-
-# Use the FindCUDATooklit package (CMake > 3.17) to get the CUDA version and CUDA include directories for cub/thrust location hints
-find_package(CUDAToolkit REQUIRED)
-
-# Quietly find Thrust and CUB, to check if an appropriate version can be found without downloading.
-# thrust-config.cmake and cub-config.cmake live in different locations with CUDA (on ubuntu) depending on the CUDA version.
-# CUDA 11.3 and 11.4 they can be found in the CUDA Toolkit include directories.
-# CUDA 11.5+ they can be found in lib/cmake or lib64/cmake
-# CUDA 11.6 - 11.8 ships with CUB 1.15.0 which has a bug when windows.h is included prior to CUB, so don't try to find the regular Thrust/CUB in this case. 
-# Ideally we would detect 1.15.0 and then download the correct version of CUB/Thrust, but getting CMake on windows to behave was proving problematic
-if(NOT (MSVC AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.6.0 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.9.0))
-    find_package(Thrust QUIET CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
-    find_package(CUB QUIET CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
-endif()
-
-# By default, assume we have to fetch thrust/cub
-set(FETCH_THRUST_CUB 1)
-# If a useful version was found, find it again less quietly 
-if(Thrust_FOUND AND Thrust_VERSION VERSION_GREATER_EQUAL MIN_REQUIRED_THRUST_VERSION AND CUB_FOUND AND CUB_VERSION VERSION_GREATER_EQUAL MIN_REQUIRED_CUB_VERSION)
-    set(FETCH_THRUST_CUB 0)
-    # Find the packages again but less quietly.
-    find_package(Thrust CONFIG REQUIRED HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
-    find_package(CUB CONFIG REQUIRED HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
-# Otherwise unfind Thrust/CUB.
-else()
-    # Unset a number of thrust / cub cache variables so that re-finding behaves as intended.
-    unset(THRUST_DIR)
-    unset(THRUST_DIR CACHE)
-    unset(THRUST_DEVICE_SYSTEM_OPTIONS)
-    unset(THRUST_DEVICE_SYSTEM_OPTIONS CACHE)
-    unset(THRUST_HOST_SYSTEM_OPTIONS)
-    unset(THRUST_HOST_SYSTEM_OPTIONS CACHE)
-    unset(THRUST_VERSION)
-    unset(THRUST_VERSION CACHE)
-    unset(THRUST_VERSION_COUNT)
-    unset(THRUST_VERSION_COUNT CACHE)
-    unset(THRUST_VERSION_MAJOR)
-    unset(THRUST_VERSION_MAJOR CACHE)
-    unset(THRUST_VERSION_MINOR)
-    unset(THRUST_VERSION_MINOR CACHE)
-    unset(THRUST_VERSION_PATCH)
-    unset(THRUST_VERSION_PATCH CACHE)
-    unset(THRUST_VERSION_TWEAK)
-    unset(THRUST_VERSION_TWEAK CACHE)
-    unset(_THRUST_CMAKE_DIR)
-    unset(_THRUST_CMAKE_DIR CACHE)
-    unset(_THRUST_INCLUDE_DIR)
-    unset(_THRUST_INCLUDE_DIR CACHE) # This is the most important one for Thrust 2.0, which just THRUST_DIR was insufficient for.
-    unset(_THRUST_QUIET)
-    unset(_THRUST_QUIET CACHE)
-    unset(_THRUST_QUIET_FLAG)
-    unset(_THRUST_QUIET_FLAG CACHE)
-    unset(CUB_DIR)
-    unset(CUB_DIR CACHE)
-    unset(_CUB_INCLUDE_DIR)
-    unset(_CUB_INCLUDE_DIR CACHE)
-endif()
-
-# If thrust/cub do need downloading, fetch them, and find them.
-# As they are header only, they can just be found rather than add_subdirectoried.
-if(FETCH_THRUST_CUB)
-    # Declare information about where and what we want from thrust.
-    FetchContent_Declare(
-        thrust
-        GIT_REPOSITORY https://github.com/NVIDIA/thrust.git
-        GIT_TAG        ${THRUST_DOWNLOAD_VERSION}
-        GIT_SHALLOW    1
-        GIT_PROGRESS   ON
-        # UPDATE_DISCONNECTED   ON
-    )
-
-    # Fetch and populate the content if required.
-    FetchContent_GetProperties(thrust)
-    if(NOT thrust_POPULATED)
-        message(STATUS "Fetching Thrust ${THRUST_DOWNLOAD_VERSION}")
-        FetchContent_Populate(thrust)
-        # Use find_package for thrust, only looking for the fetched version.
-        # This creates a non-system target due to nvcc magic to avoid the cuda toolkit version being used instead, so warnings are not suppressable.
-        find_package(Thrust REQUIRED CONFIG
-            PATHS ${thrust_SOURCE_DIR}
-            NO_CMAKE_PATH
-            NO_CMAKE_ENVIRONMENT_PATH
-            NO_SYSTEM_ENVIRONMENT_PATH
-            NO_CMAKE_PACKAGE_REGISTRY
-            NO_CMAKE_SYSTEM_PATH)
-        # Use find_package for cub, only looking for the fetched version.
-        # This creates a non-system target due to nvcc magic to avoid the cuda toolkit version being used instead, so warnings are not suppressable.
-        # Look in the symlinked and non-symlinked locations, preferring non symlinked due to windows (and the symlink being removed from 2.0)
-        find_package(CUB REQUIRED CONFIG
-            PATHS
-                ${thrust_SOURCE_DIR}/dependencies/cub/cub/cmake/
-                ${thrust_SOURCE_DIR}/cub/cmake
-            NO_CMAKE_PATH
-            NO_CMAKE_ENVIRONMENT_PATH
-            NO_SYSTEM_ENVIRONMENT_PATH
-            NO_CMAKE_PACKAGE_REGISTRY
-            NO_CMAKE_SYSTEM_PATH)
-    endif()
-    # Mark some CACHE vars as advnaced for a cleaner CMake GUI
-    mark_as_advanced(FETCHCONTENT_QUIET)
-    mark_as_advanced(FETCHCONTENT_BASE_DIR)
-    mark_as_advanced(FETCHCONTENT_FULLY_DISCONNECTED)
-    mark_as_advanced(FETCHCONTENT_UPDATES_DISCONNECTED) 
-    mark_as_advanced(FETCHCONTENT_SOURCE_DIR_THRUST)
-    mark_as_advanced(FETCHCONTENT_UPDATES_DISCONNECTED_THRUST)
-endif()
-
-# Unset temporary variables
-unset(FETCH_THRUST_CUB)
-unset(MIN_REQUIRED_THRUST_VERSION)
-unset(MIN_REQUIRED_CUB_VERSION)
-unset(THRUST_DOWNLOAD_VERSION)
-
-# Mark some CACHE vars as advnaced for a cleaner CMake GUI
-mark_as_advanced(CUB_DIR)
-mark_as_advanced(Thrust_DIR)
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d7a791166..e3dec5f0f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -642,9 +642,8 @@ set_property(TARGET ${PROJECT_NAME}  PROPERTY CUDA_SEPARABLE_COMPILATION ON)
 
 # Link against dependency targets / directories.
 
-# Cub and thrust targets are not imported targets, so they do not use -isystem, so warnings must be suppressed as pragmas as requied. This is due to nvcc magic preventing isystem from being reliable with them. 
-target_link_libraries(${PROJECT_NAME} PUBLIC CUB::CUB)
-target_link_libraries(${PROJECT_NAME} PUBLIC Thrust::Thrust)
+# CCCL for Cub and thrust (and libcudacxx) targets are not imported targets, so they do not use -isystem, so warnings must be suppressed as pragmas if required. This is due to nvcc magic implicit include directory search ordering
+target_link_libraries(${PROJECT_NAME} PUBLIC CCCL::CCCL)
 
 # tinyxml2 static library
 target_link_libraries(${PROJECT_NAME} PRIVATE Tinyxml2::tinyxml2)

From b3323d44381beb71c1cedee237c3eb959dcb2629 Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Thu, 14 Dec 2023 12:48:43 +0000
Subject: [PATCH 3/5] DO NOT MERGE: Testing cccl branch/v2.3.0 with msvc fix
 backported - but not cmake

---
 cmake/dependencies/CCCL.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/dependencies/CCCL.cmake b/cmake/dependencies/CCCL.cmake
index 848c38e97..806788bbb 100644
--- a/cmake/dependencies/CCCL.cmake
+++ b/cmake/dependencies/CCCL.cmake
@@ -9,8 +9,8 @@ cmake_policy(SET CMP0079 NEW)
 
 # Set the minimum supported CCCL version, and the version to fetch
 # using find_package(version) means it's up to CCCL's cmake to determine if newer versions are compatible, but this will likely need changing for CUDA 13, when CCCL is planned to have a major version bump (and drop CUDA 11 support).
-set(MIN_REQUIRED_CCCL_VERSION 2.2.0)
-set(CCCL_DOWNLOAD_TAG v2.2.0)
+set(MIN_REQUIRED_CCCL_VERSION 2.3.0)
+set(CCCL_DOWNLOAD_TAG branch/2.3.x) # @todo - this should be changed to v2.3.0 when possible 
 
 # Use the FindCUDATooklit package (CMake > 3.17) to get the CUDA version and CUDA include directories for cub/thrust location hints
 find_package(CUDAToolkit REQUIRED)
@@ -18,7 +18,7 @@ find_package(CUDAToolkit REQUIRED)
 # Quietly find CCCL, to check if the version included with CUDA (if CCCL) is sufficiently new.
 # Using CCCL avoids complex cub/thrust version workarounds previously required.
 # However we cannot find thrust due to a missing guard in CCCL's cmake config file, and cannot find cub without finding libcudacxx, so just find libcudacxx quietly.
-# If/when we change the minimum CCCL to 2.3.0 we should be able to remove the `components libcudacxx`. 
+# The fix for this was merged in upstream, but unclear if for the 2.3.x or 2.4.x release we should be able to remove the `components libcudacxx`. 
 find_package(CCCL ${MIN_REQUIRED_CCCL_VERSION} QUIET COMPONENTS libcudacxx CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
 
 # If CCCL was found, find it again but loudly (with all components)
@@ -32,7 +32,7 @@ else()
         cccl
         GIT_REPOSITORY https://github.com/NVIDIA/CCCL.git
         GIT_TAG        ${CCCL_DOWNLOAD_TAG}
-        GIT_SHALLOW    1
+        GIT_SHALLOW    0 # @todo - set this back to 1.
         GIT_PROGRESS   ON
         # UPDATE_DISCONNECTED   ON
     )

From 4ea6196e1144c0b2a750ea0073c25fa7992029e5 Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Thu, 29 Feb 2024 11:56:00 +0000
Subject: [PATCH 4/5] Switch to CCCL v2.3.0 tagged commit - this does not
 include the fixes we need. Waiting for 2.3.2 or 2.4.0

---
 cmake/dependencies/CCCL.cmake | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cmake/dependencies/CCCL.cmake b/cmake/dependencies/CCCL.cmake
index 806788bbb..fe2f11aa7 100644
--- a/cmake/dependencies/CCCL.cmake
+++ b/cmake/dependencies/CCCL.cmake
@@ -10,7 +10,7 @@ cmake_policy(SET CMP0079 NEW)
 # Set the minimum supported CCCL version, and the version to fetch
 # using find_package(version) means it's up to CCCL's cmake to determine if newer versions are compatible, but this will likely need changing for CUDA 13, when CCCL is planned to have a major version bump (and drop CUDA 11 support).
 set(MIN_REQUIRED_CCCL_VERSION 2.3.0)
-set(CCCL_DOWNLOAD_TAG branch/2.3.x) # @todo - this should be changed to v2.3.0 when possible 
+set(CCCL_DOWNLOAD_TAG v2.3.0)
 
 # Use the FindCUDATooklit package (CMake > 3.17) to get the CUDA version and CUDA include directories for cub/thrust location hints
 find_package(CUDAToolkit REQUIRED)
@@ -18,7 +18,8 @@ find_package(CUDAToolkit REQUIRED)
 # Quietly find CCCL, to check if the version included with CUDA (if CCCL) is sufficiently new.
 # Using CCCL avoids complex cub/thrust version workarounds previously required.
 # However we cannot find thrust due to a missing guard in CCCL's cmake config file, and cannot find cub without finding libcudacxx, so just find libcudacxx quietly.
-# The fix for this was merged in upstream, but unclear if for the 2.3.x or 2.4.x release we should be able to remove the `components libcudacxx`. 
+# The fix for this was merged into branch/2.3.x, but was not included in the v2.3.0 tagged commit.
+# @todo - wait for and test 2.3.2/2.4.0.
 find_package(CCCL ${MIN_REQUIRED_CCCL_VERSION} QUIET COMPONENTS libcudacxx CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
 
 # If CCCL was found, find it again but loudly (with all components)

From dc75bb691b7d8f0d01a6001569303597cc1c5e9c Mon Sep 17 00:00:00 2001
From: Peter Heywood <peethwd@gmail.com>
Date: Wed, 13 Mar 2024 16:54:00 +0000
Subject: [PATCH 5/5] Require CCCL 2.3.2 for CMake and MSVC fixes

---
 cmake/dependencies/CCCL.cmake | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cmake/dependencies/CCCL.cmake b/cmake/dependencies/CCCL.cmake
index fe2f11aa7..41fc7727b 100644
--- a/cmake/dependencies/CCCL.cmake
+++ b/cmake/dependencies/CCCL.cmake
@@ -9,8 +9,8 @@ cmake_policy(SET CMP0079 NEW)
 
 # Set the minimum supported CCCL version, and the version to fetch
 # using find_package(version) means it's up to CCCL's cmake to determine if newer versions are compatible, but this will likely need changing for CUDA 13, when CCCL is planned to have a major version bump (and drop CUDA 11 support).
-set(MIN_REQUIRED_CCCL_VERSION 2.3.0)
-set(CCCL_DOWNLOAD_TAG v2.3.0)
+set(MIN_REQUIRED_CCCL_VERSION 2.3.2)
+set(CCCL_DOWNLOAD_TAG v2.3.2)
 
 # Use the FindCUDATooklit package (CMake > 3.17) to get the CUDA version and CUDA include directories for cub/thrust location hints
 find_package(CUDAToolkit REQUIRED)
@@ -18,8 +18,7 @@ find_package(CUDAToolkit REQUIRED)
 # Quietly find CCCL, to check if the version included with CUDA (if CCCL) is sufficiently new.
 # Using CCCL avoids complex cub/thrust version workarounds previously required.
 # However we cannot find thrust due to a missing guard in CCCL's cmake config file, and cannot find cub without finding libcudacxx, so just find libcudacxx quietly.
-# The fix for this was merged into branch/2.3.x, but was not included in the v2.3.0 tagged commit.
-# @todo - wait for and test 2.3.2/2.4.0.
+# The fix for this was first included in the 2.3.2 release
 find_package(CCCL ${MIN_REQUIRED_CCCL_VERSION} QUIET COMPONENTS libcudacxx CONFIG HINTS ${CUDAToolkit_INCLUDE_DIRS} ${CUDAToolkit_LIBRARY_DIR}/cmake)
 
 # If CCCL was found, find it again but loudly (with all components)