From 1d940d4252b26cccf081dfa3cae8c57413ed769f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 19 Aug 2021 11:14:49 +0200 Subject: [PATCH 1/2] Extend the cms::cuda::ScopedSetDevice interface Add a default constructor that stores the current devices, without changing it. Add a set() method to change the current device, without affecting the stored value for the original device. --- .../CUDAUtilities/interface/ScopedSetDevice.h | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h b/HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h index 9b296dd390ea3..44480bf0eb5c6 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h +++ b/HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h @@ -9,20 +9,35 @@ namespace cms { namespace cuda { class ScopedSetDevice { public: - explicit ScopedSetDevice(int newDevice) { - cudaCheck(cudaGetDevice(&prevDevice_)); - cudaCheck(cudaSetDevice(newDevice)); + // Store the original device, without setting a new one + ScopedSetDevice() { + // Store the original device + cudaCheck(cudaGetDevice(&originalDevice_)); } + // Store the original device, and set a new current device + explicit ScopedSetDevice(int device) : ScopedSetDevice() { + // Change the current device + set(device); + } + + // Restore the original device ~ScopedSetDevice() { // Intentionally don't check the return value to avoid // exceptions to be thrown. If this call fails, the process is // doomed anyway. - cudaSetDevice(prevDevice_); + cudaSetDevice(originalDevice_); + } + + // Set a new current device, without changing the original device + // that will be restored when this object is destroyed + void set(int device) { + // Change the current device + cudaCheck(cudaSetDevice(device)); } private: - int prevDevice_; + int originalDevice_; }; } // namespace cuda } // namespace cms From 097fe4ab00feb29e17d53db33993ad2533c65d8f Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 19 Aug 2021 11:17:47 +0200 Subject: [PATCH 2/2] Fix uploading the EventSetup conditions to multiple CUDA devices Associate to the correct CUDA device the events used to track if the conditions have been transferred to each device. --- HeterogeneousCore/CUDACore/interface/ESProduct.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/CUDACore/interface/ESProduct.h b/HeterogeneousCore/CUDACore/interface/ESProduct.h index 676d3e9d1c0d9..8740095292380 100644 --- a/HeterogeneousCore/CUDACore/interface/ESProduct.h +++ b/HeterogeneousCore/CUDACore/interface/ESProduct.h @@ -9,6 +9,7 @@ #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/CUDAServices/interface/numberOfDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/EventCache.h" +#include "HeterogeneousCore/CUDAUtilities/interface/ScopedSetDevice.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" #include "HeterogeneousCore/CUDAUtilities/interface/eventWorkHasCompleted.h" @@ -19,10 +20,13 @@ namespace cms { class ESProduct { public: ESProduct() : gpuDataPerDevice_(numberOfDevices()) { + cms::cuda::ScopedSetDevice scopedDevice; for (size_t i = 0; i < gpuDataPerDevice_.size(); ++i) { + scopedDevice.set(i); gpuDataPerDevice_[i].m_event = getEventCache().get(); } } + ~ESProduct() = default; // transferAsync should be a function of (T&, cudaStream_t) @@ -30,12 +34,10 @@ namespace cms { // to the CUDA stream template const T& dataForCurrentDeviceAsync(cudaStream_t cudaStream, F transferAsync) const { - auto device = currentDevice(); - + int device = currentDevice(); auto& data = gpuDataPerDevice_[device]; - // If GPU data has already been filled, we can return it - // immediately + // If the GPU data has already been filled, we can return it immediately if (not data.m_filled.load()) { // It wasn't, so need to fill it std::scoped_lock lk{data.m_mutex}; @@ -103,4 +105,4 @@ namespace cms { } // namespace cuda } // namespace cms -#endif +#endif // HeterogeneousCore_CUDACore_ESProduct_h