Skip to content

Commit

Permalink
Synchronise with CMSSW_10_6_0_pre2
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Mar 26, 2019
2 parents 1313262 + ea82bb5 commit 586c0c0
Show file tree
Hide file tree
Showing 338 changed files with 28,972 additions and 384 deletions.
7 changes: 7 additions & 0 deletions CUDADataFormats/Common/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<use name="cuda-api-wrappers"/>
<use name="FWCore/ServiceRegistry"/>
<use name="HeterogeneousCore/CUDAServices"/>

<export>
<lib name="1"/>
</export>
51 changes: 51 additions & 0 deletions CUDADataFormats/Common/interface/CUDAProduct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#ifndef CUDADataFormats_Common_CUDAProduct_h
#define CUDADataFormats_Common_CUDAProduct_h

#include <memory>

#include <cuda/api_wrappers.h>

#include "CUDADataFormats/Common/interface/CUDAProductBase.h"

namespace edm {
template <typename T> class Wrapper;
}

/**
* The purpose of this class is to wrap CUDA data to edm::Event in a
* way which forces correct use of various utilities.
*
* The non-default construction has to be done with CUDAScopedContext
* (in order to properly register the CUDA event).
*
* The default constructor is needed only for the ROOT dictionary generation.
*
* The CUDA event is in practice needed only for stream-stream
* synchronization, but someone with long-enough lifetime has to own
* it. Here is a somewhat natural place. If overhead is too much, we
* can e.g. make CUDAService own them (creating them on demand) and
* use them only where synchronization between streams is needed.
*/
template <typename T>
class CUDAProduct: public CUDAProductBase {
public:
CUDAProduct() = default; // Needed only for ROOT dictionary generation

CUDAProduct(const CUDAProduct&) = delete;
CUDAProduct& operator=(const CUDAProduct&) = delete;
CUDAProduct(CUDAProduct&&) = default;
CUDAProduct& operator=(CUDAProduct&&) = default;

private:
friend class CUDAScopedContext;
friend class edm::Wrapper<CUDAProduct<T>>;

explicit CUDAProduct(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event, T data):
CUDAProductBase(device, std::move(stream), std::move(event)),
data_(std::move(data))
{}

T data_; //!
};

#endif
41 changes: 41 additions & 0 deletions CUDADataFormats/Common/interface/CUDAProductBase.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef CUDADataFormats_Common_CUDAProductBase_h
#define CUDADataFormats_Common_CUDAProductBase_h

#include <memory>

#include <cuda/api_wrappers.h>

/**
* Base class for all instantiations of CUDA<T> to hold the
* non-T-dependent members.
*/
class CUDAProductBase {
public:
CUDAProductBase() = default; // Needed only for ROOT dictionary generation

bool isValid() const { return stream_.get() != nullptr; }
bool isAvailable() const;

int device() const { return device_; }

const cuda::stream_t<>& stream() const { return *stream_; }
cuda::stream_t<>& stream() { return *stream_; }
const std::shared_ptr<cuda::stream_t<>>& streamPtr() const { return stream_; }

const cuda::event_t *event() const { return event_.get(); }
cuda::event_t *event() { return event_.get(); }

protected:
explicit CUDAProductBase(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event);

private:
// The cuda::stream_t is really shared among edm::Event products, so
// using shared_ptr also here
std::shared_ptr<cuda::stream_t<>> stream_; //!
// shared_ptr because of caching in CUDAService
std::shared_ptr<cuda::event_t> event_; //!

int device_ = -1; //!
};

#endif
19 changes: 19 additions & 0 deletions CUDADataFormats/Common/src/CUDAProductBase.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "CUDADataFormats/Common/interface/CUDAProductBase.h"

#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"

CUDAProductBase::CUDAProductBase(int device, std::shared_ptr<cuda::stream_t<>> stream, std::shared_ptr<cuda::event_t> event):
stream_(std::move(stream)),
event_(std::move(event)),
device_(device)
{}

bool CUDAProductBase::isAvailable() const {
// In absence of event, the product was available already at the end
// of produce() of the producer.
if(not event_) {
return true;
}
return event_->has_occurred();
}
5 changes: 5 additions & 0 deletions CUDADataFormats/Common/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<bin file="test*.cc" name="testCUDADataFormatsCommon">
<use name="HeterogeneousCore/CUDACore"/>
<use name="catch2"/>
<use name="cuda"/>
</bin>
69 changes: 69 additions & 0 deletions CUDADataFormats/Common/test/test_CUDAProduct.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "catch.hpp"

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"

#include <cuda_runtime_api.h>

namespace cudatest {
class TestCUDAScopedContext {
public:
static
CUDAScopedContext make(int dev, bool createEvent) {
auto device = cuda::device::get(dev);
std::unique_ptr<cuda::event_t> event;
if(createEvent) {
event = std::make_unique<cuda::event_t>(device.create_event());
}
return CUDAScopedContext(dev,
std::make_unique<cuda::stream_t<>>(device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream)),
std::move(event));
}
};
}

TEST_CASE("Use of CUDAProduct template", "[CUDACore]") {
SECTION("Default constructed") {
auto foo = CUDAProduct<int>();
REQUIRE(!foo.isValid());

auto bar = std::move(foo);
}

exitSansCUDADevices();

constexpr int defaultDevice = 0;
{
auto ctx = cudatest::TestCUDAScopedContext::make(defaultDevice, true);
std::unique_ptr<CUDAProduct<int>> dataPtr = ctx.wrap(10);
auto& data = *dataPtr;

SECTION("Construct from CUDAScopedContext") {
REQUIRE(data.isValid());
REQUIRE(data.device() == defaultDevice);
REQUIRE(data.stream().id() == ctx.stream().id());
REQUIRE(data.event() != nullptr);
}

SECTION("Move constructor") {
auto data2 = CUDAProduct<int>(std::move(data));
REQUIRE(data2.isValid());
REQUIRE(!data.isValid());
}

SECTION("Move assignment") {
CUDAProduct<int> data2;
data2 = std::move(data);
REQUIRE(data2.isValid());
REQUIRE(!data.isValid());
}
}

// Destroy and clean up all resources so that the next test can
// assume to start from a clean state.
cudaCheck(cudaSetDevice(defaultDevice));
cudaCheck(cudaDeviceSynchronize());
cudaDeviceReset();
}
2 changes: 2 additions & 0 deletions CUDADataFormats/Common/test/test_main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
9 changes: 9 additions & 0 deletions CUDADataFormats/SiPixelCluster/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<use name="FWCore/ServiceRegistry"/>
<use name="HeterogeneousCore/CUDAServices"/>
<use name="cuda-api-wrappers"/>
<use name="rootcore"/>

<export>
<lib name="1"/>
</export>

76 changes: 76 additions & 0 deletions CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

#include <cuda/api_wrappers.h>

class SiPixelClustersCUDA {
public:
SiPixelClustersCUDA() = default;
explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream);
~SiPixelClustersCUDA() = default;

SiPixelClustersCUDA(const SiPixelClustersCUDA&) = delete;
SiPixelClustersCUDA& operator=(const SiPixelClustersCUDA&) = delete;
SiPixelClustersCUDA(SiPixelClustersCUDA&&) = default;
SiPixelClustersCUDA& operator=(SiPixelClustersCUDA&&) = default;

void setNClusters(uint32_t nClusters) {
nClusters_h = nClusters;
}

uint32_t nClusters() const { return nClusters_h; }

uint32_t *moduleStart() { return moduleStart_d.get(); }
uint32_t *clusInModule() { return clusInModule_d.get(); }
uint32_t *moduleId() { return moduleId_d.get(); }
uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }

uint32_t const *moduleStart() const { return moduleStart_d.get(); }
uint32_t const *clusInModule() const { return clusInModule_d.get(); }
uint32_t const *moduleId() const { return moduleId_d.get(); }
uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }

uint32_t const *c_moduleStart() const { return moduleStart_d.get(); }
uint32_t const *c_clusInModule() const { return clusInModule_d.get(); }
uint32_t const *c_moduleId() const { return moduleId_d.get(); }
uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); }

class DeviceConstView {
public:
DeviceConstView() = default;

#ifdef __CUDACC__
__device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_+i); }
__device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_+i); }
__device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_+i); }
__device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_+i); }
#endif

friend SiPixelClustersCUDA;

private:
uint32_t const *moduleStart_;
uint32_t const *clusInModule_;
uint32_t const *moduleId_;
uint32_t const *clusModuleStart_;
};

DeviceConstView *view() const { return view_d.get(); }

private:
cudautils::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
cudautils::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
cudautils::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module

// originally from rechits
cudautils::device::unique_ptr<uint32_t[]> clusModuleStart_d;

cudautils::device::unique_ptr<DeviceConstView> view_d; // "me" pointer

uint32_t nClusters_h;
};

#endif
23 changes: 23 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"

#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) {
edm::Service<CUDAService> cs;

moduleStart_d = cs->make_device_unique<uint32_t[]>(maxClusters+1, stream);
clusInModule_d = cs->make_device_unique<uint32_t[]>(maxClusters, stream);
moduleId_d = cs->make_device_unique<uint32_t[]>(maxClusters, stream);
clusModuleStart_d = cs->make_device_unique<uint32_t[]>(maxClusters+1, stream);

auto view = cs->make_host_unique<DeviceConstView>(stream);
view->moduleStart_ = moduleStart_d.get();
view->clusInModule_ = clusInModule_d.get();
view->moduleId_ = moduleId_d.get();
view->clusModuleStart_ = clusModuleStart_d.get();

view_d = cs->make_device_unique<DeviceConstView>(stream);
cudautils::copyAsync(view_d, view, stream);
}
8 changes: 8 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef CUDADataFormats_SiPixelCluster_classes_h
#define CUDADataFormats_SiPixelCluster_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

#endif
4 changes: 4 additions & 0 deletions CUDADataFormats/SiPixelCluster/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<lcgdict>
<class name="CUDAProduct<SiPixelClustersCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<SiPixelClustersCUDA>>" persistent="false"/>
</lcgdict>
9 changes: 9 additions & 0 deletions CUDADataFormats/SiPixelDigi/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<use name="DataFormats/SiPixelRawData"/>
<use name="FWCore/ServiceRegistry"/>
<use name="HeterogeneousCore/CUDAServices"/>
<use name="cuda-api-wrappers"/>
<use name="rootcore"/>

<export>
<lib name="1"/>
</export>
40 changes: 40 additions & 0 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h

#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"

#include <cuda/api_wrappers.h>

class SiPixelDigiErrorsCUDA {
public:
SiPixelDigiErrorsCUDA() = default;
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream);
~SiPixelDigiErrorsCUDA() = default;

SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete;
SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default;
SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default;

const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }

GPU::SimpleVector<PixelErrorCompact> *error() { return error_d.get(); }
GPU::SimpleVector<PixelErrorCompact> const *error() const { return error_d.get(); }
GPU::SimpleVector<PixelErrorCompact> const *c_error() const { return error_d.get(); }

using HostDataError = std::pair<GPU::SimpleVector<PixelErrorCompact>, cudautils::host::unique_ptr<PixelErrorCompact[]>>;
HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const;

void copyErrorToHostAsync(cuda::stream_t<>& stream);

private:
cudautils::device::unique_ptr<PixelErrorCompact[]> data_d;
cudautils::device::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_d;
cudautils::host::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_h;
PixelFormatterErrors formatterErrors_h;
};

#endif
Loading

0 comments on commit 586c0c0

Please sign in to comment.