Skip to content

Commit

Permalink
Replace use of API wrapper stream and event with plain CUDA, part 1 (#…
Browse files Browse the repository at this point in the history
…389)

Replace cuda::stream_t<> with cudaStream_t in client code
Replace cuda::event_t with cudaEvent_t in the client code
Clean up BuildFiles
  • Loading branch information
makortel authored and fwyzard committed Oct 26, 2019
1 parent 55ce902 commit b2ce037
Show file tree
Hide file tree
Showing 106 changed files with 705 additions and 652 deletions.
4 changes: 2 additions & 2 deletions CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

#include <cuda/api_wrappers.h>
#include <cuda_runtime.h>

class BeamSpotCUDA {
public:
Expand All @@ -21,7 +21,7 @@ class BeamSpotCUDA {
};

BeamSpotCUDA() = default;
BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream);
BeamSpotCUDA(Data const* data_h, cudaStream_t stream);

Data const* data() const { return data_d_.get(); }

Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cuda::stream_t<>& stream) {
BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cudaStream_t stream) {
data_d_ = cudautils::make_device_unique<Data>(stream);
cuda::memory::async::copy(data_d_.get(), data_h, sizeof(Data), stream.id());
cuda::memory::async::copy(data_d_.get(), data_h, sizeof(Data), stream);
}
16 changes: 11 additions & 5 deletions CUDADataFormats/Common/interface/CUDAProductBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,17 @@ class CUDAProductBase {

int device() const { return device_; }

const cuda::stream_t<>& stream() const { return *stream_; }
cuda::stream_t<>& stream() { return *stream_; }

const cuda::event_t* event() const { return event_.get(); }
cuda::event_t* event() { return event_.get(); }
// cudaStream_t is a pointer to a thread-safe object, for which a
// mutable access is needed even if the CUDAScopedContext itself
// would be const. Therefore it is ok to return a non-const
// pointer from a const method here.
cudaStream_t stream() const { return stream_->id(); }

// cudaEvent_t is a pointer to a thread-safe object, for which a
// mutable access is needed even if the CUDAScopedContext itself
// would be const. Therefore it is ok to return a non-const
// pointer from a const method here.
cudaEvent_t event() const { return event_ ? event_->id() : nullptr; }

protected:
explicit CUDAProductBase(int device, std::shared_ptr<cuda::stream_t<>> stream)
Expand Down
42 changes: 21 additions & 21 deletions CUDADataFormats/Common/interface/HeterogeneousSoA.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ class HeterogeneousSoA {
auto *operator-> () { return get(); }

// in reality valid only for GPU version...
cudautils::host::unique_ptr<T> toHostAsync(cuda::stream_t<> &stream) const {
cudautils::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
assert(dm_ptr);
auto ret = cudautils::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}

Expand All @@ -56,27 +56,27 @@ namespace cudaCompat {
using unique_ptr = cudautils::device::unique_ptr<T>;

template <typename T>
static auto make_unique(cuda::stream_t<> &stream) {
static auto make_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
}

template <typename T>
static auto make_unique(size_t size, cuda::stream_t<> &stream) {
static auto make_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
}

template <typename T>
static auto make_host_unique(cuda::stream_t<> &stream) {
static auto make_host_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cuda::stream_t<> &stream) {
static auto make_device_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cuda::stream_t<> &stream) {
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
}
};
Expand All @@ -86,22 +86,22 @@ namespace cudaCompat {
using unique_ptr = cudautils::host::unique_ptr<T>;

template <typename T>
static auto make_unique(cuda::stream_t<> &stream) {
static auto make_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
}

template <typename T>
static auto make_host_unique(cuda::stream_t<> &stream) {
static auto make_host_unique(cudaStream_t stream) {
return cudautils::make_host_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(cuda::stream_t<> &stream) {
static auto make_device_unique(cudaStream_t stream) {
return cudautils::make_device_unique<T>(stream);
}

template <typename T>
static auto make_device_unique(size_t size, cuda::stream_t<> &stream) {
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cudautils::make_device_unique<T>(size, stream);
}
};
Expand All @@ -111,27 +111,27 @@ namespace cudaCompat {
using unique_ptr = std::unique_ptr<T>;

template <typename T>
static auto make_unique(cuda::stream_t<> &) {
static auto make_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_unique(size_t size, cuda::stream_t<> &) {
static auto make_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}

template <typename T>
static auto make_host_unique(cuda::stream_t<> &) {
static auto make_host_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_device_unique(cuda::stream_t<> &) {
static auto make_device_unique(cudaStream_t) {
return std::make_unique<T>();
}

template <typename T>
static auto make_device_unique(size_t size, cuda::stream_t<> &) {
static auto make_device_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}
};
Expand All @@ -151,28 +151,28 @@ class HeterogeneousSoAImpl {
HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;

explicit HeterogeneousSoAImpl(unique_ptr<T> &&p) : m_ptr(std::move(p)) {}
explicit HeterogeneousSoAImpl(cuda::stream_t<> &stream);
explicit HeterogeneousSoAImpl(cudaStream_t stream);

T const *get() const { return m_ptr.get(); }

T *get() { return m_ptr.get(); }

cudautils::host::unique_ptr<T> toHostAsync(cuda::stream_t<> &stream) const;
cudautils::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;

private:
unique_ptr<T> m_ptr; //!
};

template <typename T, typename Traits>
HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cuda::stream_t<> &stream) {
HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {
m_ptr = Traits::template make_unique<T>(stream);
}

// in reality valid only for GPU version...
template <typename T, typename Traits>
cudautils::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cuda::stream_t<> &stream) const {
cudautils::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}

Expand Down
3 changes: 2 additions & 1 deletion CUDADataFormats/Common/src/CUDAProductBase.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "CUDADataFormats/Common/interface/CUDAProductBase.h"
#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h"

bool CUDAProductBase::isAvailable() const {
// In absence of event, the product was available already at the end
// of produce() of the producer.
if (not event_) {
return true;
}
return event_->has_occurred();
return cudautils::eventIsOccurred(event_->id());
}
2 changes: 1 addition & 1 deletion CUDADataFormats/Common/test/test_CUDAProduct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ TEST_CASE("Use of CUDAProduct template", "[CUDACore]") {
SECTION("Construct from CUDAScopedContext") {
REQUIRE(data.isValid());
REQUIRE(data.device() == defaultDevice);
REQUIRE(data.stream().id() == ctx.stream().id());
REQUIRE(data.stream() == ctx.stream());
REQUIRE(data.event() != nullptr);
}

Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/SiPixelCluster/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<use name="DataFormats/Common"/>
<use name="CUDADataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda-api-wrappers"/>
<use name="cuda"/>
<use name="rootcore"/>

<export>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

#include <cuda_runtime.h>

class SiPixelClustersCUDA {
public:
SiPixelClustersCUDA() = default;
explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<> &stream);
explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream);
~SiPixelClustersCUDA() = default;

SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete;
Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) {
SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) {
moduleStart_d = cudautils::make_device_unique<uint32_t[]>(maxClusters + 1, stream);
clusInModule_d = cudautils::make_device_unique<uint32_t[]>(maxClusters, stream);
moduleId_d = cudautils::make_device_unique<uint32_t[]>(maxClusters, stream);
Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/SiPixelDigi/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<use name="DataFormats/Common"/>
<use name="DataFormats/SiPixelRawData"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda-api-wrappers"/>
<use name="cuda"/>
<use name="rootcore"/>

<export>
Expand Down
8 changes: 4 additions & 4 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"

#include <cuda/api_wrappers.h>
#include <cuda_runtime.h>

class SiPixelDigiErrorsCUDA {
public:
SiPixelDigiErrorsCUDA() = default;
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream);
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream);
~SiPixelDigiErrorsCUDA() = default;

SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
Expand All @@ -27,9 +27,9 @@ class SiPixelDigiErrorsCUDA {

using HostDataError =
std::pair<GPU::SimpleVector<PixelErrorCompact>, cudautils::host::unique_ptr<PixelErrorCompact[]>>;
HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

void copyErrorToHostAsync(cuda::stream_t<>& stream);
void copyErrorToHostAsync(cudaStream_t stream);

private:
cudautils::device::unique_ptr<PixelErrorCompact[]> data_d;
Expand Down
14 changes: 7 additions & 7 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

#include <cuda/api_wrappers.h>
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

#include <cuda_runtime.h>

class SiPixelDigisCUDA {
public:
SiPixelDigisCUDA() = default;
explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<> &stream);
explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
~SiPixelDigisCUDA() = default;

SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
Expand Down Expand Up @@ -50,10 +50,10 @@ class SiPixelDigisCUDA {
uint32_t const *c_pdigi() const { return pdigi_d.get(); }
uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }

cudautils::host::unique_ptr<uint16_t[]> adcToHostAsync(cuda::stream_t<> &stream) const;
cudautils::host::unique_ptr<int32_t[]> clusToHostAsync(cuda::stream_t<> &stream) const;
cudautils::host::unique_ptr<uint32_t[]> pdigiToHostAsync(cuda::stream_t<> &stream) const;
cudautils::host::unique_ptr<uint32_t[]> rawIdArrToHostAsync(cuda::stream_t<> &stream) const;
cudautils::host::unique_ptr<uint16_t[]> adcToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<int32_t[]> clusToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint32_t[]> pdigiToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint32_t[]> rawIdArrToHostAsync(cudaStream_t stream) const;

class DeviceConstView {
public:
Expand Down
6 changes: 3 additions & 3 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include <cassert>

SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream)
SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream)
: formatterErrors_h(std::move(errors)) {
error_d = cudautils::make_device_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
data_d = cudautils::make_device_unique<PixelErrorCompact[]>(maxFedWords, stream);
Expand All @@ -22,11 +22,11 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterE
cudautils::copyAsync(error_d, error_h, stream);
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) {
void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudautils::copyAsync(error_h, error_d, stream);
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const {
SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
Expand Down
10 changes: 5 additions & 5 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) {
SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) {
xx_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
yy_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
adc_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
Expand All @@ -25,25 +25,25 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream)
cudautils::copyAsync(view_d, view, stream);
}

cudautils::host::unique_ptr<uint16_t[]> SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const {
cudautils::host::unique_ptr<uint16_t[]> SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint16_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, adc_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<int32_t[]> SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const {
cudautils::host::unique_ptr<int32_t[]> SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<int32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, clus_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const {
cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, pdigi_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const {
cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream);
return ret;
Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/Track/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<use name="cuda-api-wrappers"/>
<use name="cuda"/>
<use name="rootcore"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/TrackingRecHit/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<use name="cuda-api-wrappers"/>
<use name="cuda"/>
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
Expand Down
Loading

0 comments on commit b2ce037

Please sign in to comment.