Drop CTK 11.x from CI (#3275)

* Add cuda12.0-gcc7 devcontainer * Move MSVC2017 jobs to CTK 12.6 Those is the only combination where rapidsai has devcontainers * Add /Zc:__cplusplus for the libcudacxx tests * Only add excape hatch for affected CTKs * Workaround missing cudaLaunchKernelEx on MSVC cudaLaunchKernelEx requires C++11, but unfortunately <cuda_runtime.h> checks this using the __cplusplus macro, which is reported wrongly for MSVC. CTK 12.3 fixed this by additionally detecting _MSV_VER. As a workaround, we provide our own copy of cudaLaunchKernelEx when it is not available from the CTK. * Workaround nvcc+MSVC issue * Regenerate devcontainers Fixes: #3249 Co-authored-by: Michael Schellenberger Costa <miscco@nvidia.com>
NVIDIA · Jan 9, 2025 · 97f4c34 · 97f4c34
1 parent acfdf80
commit 97f4c34
Show file tree

Hide file tree

Showing 10 changed files with 45 additions and 191 deletions.
diff --git a/.devcontainer/cuda11.1-gcc9/devcontainer.json b/.devcontainer/cuda11.1-gcc9/devcontainer.json
diff --git a/.devcontainer/cuda11.1-llvm9/devcontainer.json b/.devcontainer/cuda11.1-llvm9/devcontainer.json
diff --git a/.devcontainer/cuda11.8-gcc11/devcontainer.json b/.devcontainer/cuda11.8-gcc11/devcontainer.json
diff --git a/...container/cuda11.1-gcc7/devcontainer.json → ...container/cuda12.0-gcc7/devcontainer.json b/...container/cuda11.1-gcc7/devcontainer.json → ...container/cuda12.0-gcc7/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc7-cuda11.1",
+  "image": "rapidsai/devcontainers:25.02-cpp-gcc7-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
@@ -15,11 +15,11 @@
     "SCCACHE_BUCKET": "rapids-sccache-devs",
     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc7",
-    "CCCL_CUDA_VERSION": "11.1",
+    "DEVCONTAINER_NAME": "cuda12.0-gcc7",
+    "CCCL_CUDA_VERSION": "12.0",
     "CCCL_HOST_COMPILER": "gcc",
     "CCCL_HOST_COMPILER_VERSION": "7",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc7",
+    "CCCL_BUILD_INFIX": "cuda12.0-gcc7",
     "CCCL_CUDA_EXTENDED": "false"
   },
   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
@@ -50,5 +50,5 @@
       }
     }
   },
-  "name": "cuda11.1-gcc7"
+  "name": "cuda12.0-gcc7"
 }
diff --git a/...container/cuda11.1-gcc8/devcontainer.json → ...container/cuda12.0-gcc8/devcontainer.json b/...container/cuda11.1-gcc8/devcontainer.json → ...container/cuda12.0-gcc8/devcontainer.json
@@ -1,6 +1,6 @@
 {
   "shutdownAction": "stopContainer",
-  "image": "rapidsai/devcontainers:25.02-cpp-gcc8-cuda11.1",
+  "image": "rapidsai/devcontainers:25.02-cpp-gcc8-cuda12.0",
   "hostRequirements": {
     "gpu": "optional"
   },
@@ -15,11 +15,11 @@
     "SCCACHE_BUCKET": "rapids-sccache-devs",
     "AWS_ROLE_ARN": "arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs",
     "HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
-    "DEVCONTAINER_NAME": "cuda11.1-gcc8",
-    "CCCL_CUDA_VERSION": "11.1",
+    "DEVCONTAINER_NAME": "cuda12.0-gcc8",
+    "CCCL_CUDA_VERSION": "12.0",
     "CCCL_HOST_COMPILER": "gcc",
     "CCCL_HOST_COMPILER_VERSION": "8",
-    "CCCL_BUILD_INFIX": "cuda11.1-gcc8",
+    "CCCL_BUILD_INFIX": "cuda12.0-gcc8",
     "CCCL_CUDA_EXTENDED": "false"
   },
   "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
@@ -50,5 +50,5 @@
       }
     }
   },
-  "name": "cuda11.1-gcc8"
+  "name": "cuda12.0-gcc8"
 }
diff --git a/README.md b/README.md
@@ -219,18 +219,16 @@ CCCL users are encouraged to capitalize on the latest enhancements and ["live at
 For a seamless experience, you can upgrade CCCL independently of the entire CUDA Toolkit.
 This is possible because CCCL maintains backward compatibility with the latest patch release of every minor CTK release from both the current and previous major version series.
 In some exceptional cases, the minimum supported minor version of the CUDA Toolkit release may need to be newer than the oldest release within its major version series.
-For instance, CCCL requires a minimum supported version of 11.1 from the 11.x series due to an unavoidable compiler issue present in CTK 11.0.
 
 When a new major CTK is released, we drop support for the oldest supported major version.
 
 | CCCL Version | Supports CUDA Toolkit Version                  |
 |--------------|------------------------------------------------|
 | 2.x          | 11.1 - 11.8, 12.x (only latest patch releases) |
-| 3.x (Future) | 12.x, 13.x  (only latest patch releases)       |
+| 3.x          | 12.x, 13.x  (only latest patch releases)       |
 
 [Well-behaved code](#compatibility-guidelines) using the latest CCCL should compile and run successfully with any supported CTK version.
 Exceptions may occur for new features that depend on new CTK features, so those features would not work on older versions of the CTK.
-For example, C++20 support was not added to `nvcc` until CUDA 12.0, so CCCL features that depend on C++20 would not work with CTK 11.x.
 
 Users can integrate a newer version of CCCL into an older CTK, but not the other way around.
 This means an older version of CCCL is not compatible with a newer CTK.
@@ -287,7 +285,7 @@ Note that some features may only support certain architectures/Compute Capabilit
 CCCL's testing strategy strikes a balance between testing as many configurations as possible and maintaining reasonable CI times.
 
 For CUDA Toolkit versions, testing is done against both the oldest and the newest supported versions.
-For instance, if the latest version of the CUDA Toolkit is 12.3, tests are conducted against 11.1 and 12.3.
+For instance, if the latest version of the CUDA Toolkit is 12.6, tests are conducted against 11.1 and 12.6.
 For each CUDA version, builds are completed against all supported host compilers with all supported C++ dialects.
 
 The testing strategy and matrix are constantly evolving.

diff --git a/ci/matrix.yaml b/ci/matrix.yaml
@@ -10,8 +10,8 @@ workflows:
   override:
 
   pull_request:
-    # Old CTK
-    - {jobs: ['build'], std: 'minmax', ctk: '11.1', cxx: ['gcc7', 'gcc9', 'clang9']}
+    # Old CTK/compiler
+    - {jobs: ['build'], std: 'minmax', ctk: '12.0', cxx: ['gcc7', 'gcc9', 'clang9', 'msvc2019']}
     # Current CTK build-only
     - {jobs: ['build'], std: [11, 14], cxx: ['gcc7', 'clang9']}
     - {jobs: ['build'], std: 'max', cxx: ['gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
@@ -41,7 +41,6 @@ workflows:
     # verify-codegen:
     - {jobs: ['verify_codegen'], project: 'libcudacxx'}
     # cudax has different CTK reqs:
-    - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 17,       cxx: ['gcc9', 'clang9']}
     - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20,       cxx: ['msvc14.36']}
     - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['gcc10', 'gcc11', 'gcc12']}
     - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20,       cxx: ['clang10', 'clang11', 'clang12', 'clang13']}
@@ -55,7 +54,6 @@ workflows:
     # Python and c/parallel jobs:
     - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
     # cccl-infra:
-    - {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc7',  'clang9']}
     - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
     - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc',   'clang']}
 
@@ -64,9 +62,9 @@ workflows:
     - {jobs: ['limited'], project: 'cub', std: 17}
     - {jobs: ['test_gpu'],  project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
     - {jobs: ['test_gpu'],  project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
-    # Old CTK
-    - {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang9']}
-    - {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
+    # Old CTK/compiler
+    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2019']}
+    - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'}
     # Current CTK build-only
     - {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
     - {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
@@ -116,9 +114,6 @@ workflows:
   exclude:
     # GPU runners are not available on Windows.
     - {jobs: ['test', 'test_gpu', 'test_nolid', 'test_lid0', 'test_lid1', 'test_lid2'], cxx: ['msvc2019', 'msvc14.36', 'msvc2022']}
-    # Ubuntu 18.04 is EOL and we only use it to get access to CTK 11.1 containers for CUDA testing.
-    # Disable non-CUDA tests on this platform.
-    - {jobs: ['test_cpu'], ctk: '11.1'}
 
 
 #############################################################################################
@@ -131,8 +126,6 @@ devcontainer_version: '25.02'
 all_stds: [11, 14, 17, 20]
 
 ctk_versions:
-  11.1: { stds: [11, 14, 17,   ] }
-  11.8: { stds: [11, 14, 17,   ] }
   12.0: { stds: [11, 14, 17, 20] }
   12.5: { stds: [11, 14, 17, 20] }
   12.6: { stds: [11, 14, 17, 20], aka: 'curr' }

diff --git a/libcudacxx/test/libcudacxx/CMakeLists.txt b/libcudacxx/test/libcudacxx/CMakeLists.txt
@@ -49,9 +49,14 @@ if (NOT MSVC AND NOT ${CMAKE_CUDA_COMPILER_ID} STREQUAL "Clang")
   set(LIBCUDACXX_WARNING_LEVEL "--compiler-options=-Wall --compiler-options=-Wextra")
 endif()
 
-# sccache cannot handle the -Fd option generationg pdb files
 if (MSVC)
+  # sccache cannot handle the -Fd option generationg pdb files
   set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT Embedded)
+
+  # We want to use cudaLaunchKernelEx which is guarded by __cplusplus
+  if ("${CMAKE_CUDA_COMPILER_VERSION}" LESS "12.3.0")
+    string(APPEND LIBCUDACXX_TEST_COMPILER_FLAGS " -Xcompiler=/Zc:__cplusplus")
+  endif()
 endif()
 
 if (CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)

diff --git a/thrust/testing/functional.cu b/thrust/testing/functional.cu
@@ -211,8 +211,8 @@ THRUST_DISABLE_BROKEN_GCC_VECTORIZER void TestIdentityFunctional()
 
   // value categories when casting to different type
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(3.14)), int&&>::value, "");
-  // unfortunately, old versions of MSVC pick the `const int&` overload instead of `int&&`
-#if !_CCCL_COMPILER(MSVC, <, 19, 29)
+  // unfortunately, old versions of MSVC or nvcc in MSVC mode pick the `const int&` overload instead of `int&&`
+#if !_CCCL_COMPILER(MSVC, <, 19, 29) && !(_CCCL_COMPILER(MSVC) && _CCCL_CUDA_COMPILER(NVCC, <, 12, 1))
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(d)), int&&>::value, "");
   static_assert(::cuda::std::is_same<decltype(thrust::identity<int>{}(as_const(d))), int&&>::value, "");
 #endif

diff --git a/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h b/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h
@@ -67,6 +67,22 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron
       , stream(stream_)
   {}
 
+  // cudaLaunchKernelEx requires C++11, but unfortunately <cuda_runtime.h> checks this using the __cplusplus macro,
+  // which is reported wrongly for MSVC. CTK 12.3 fixed this by additionally detecting _MSV_VER. As a workaround, we
+  // provide our own copy of cudaLaunchKernelEx when it is not available from the CTK.
+#if _CCCL_COMPILER(MSVC) && _CCCL_CUDACC_BELOW(12, 3)
+  // Copied from <cuda_runtime.h>
+  template <typename... ExpTypes, typename... ActTypes>
+  static cudaError_t _CCCL_HOST
+  cudaLaunchKernelEx_MSVC_workaround(const cudaLaunchConfig_t* config, void (*kernel)(ExpTypes...), ActTypes&&... args)
+  {
+    return [&](ExpTypes... coercedArgs) {
+      void* pArgs[] = {&coercedArgs...};
+      return ::cudaLaunchKernelExC(config, (const void*) kernel, pArgs);
+    }(std::forward<ActTypes>(args)...);
+  }
+#endif
+
   template <class K, class... Args>
   cudaError_t _CCCL_HOST doit_host(K k, Args const&... args) const
   {
@@ -84,7 +100,11 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron
       config.stream           = stream;
       config.attrs            = attribute;
       config.numAttrs         = 1;
+#  if _CCCL_COMPILER(MSVC) && _CCCL_CUDACC_BELOW(12, 3)
+      cudaLaunchKernelEx_MSVC_workaround(&config, k, args...);
+#  else
       cudaLaunchKernelEx(&config, k, args...);
+#  endif
     }
     else
 #endif // _CCCL_HAS_PDL