Skip to content

Commit

Permalink
Synchronise with CMSSW_11_0_0_pre13
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Nov 30, 2019
2 parents 91be707 + a2ead18 commit 62a224f
Show file tree
Hide file tree
Showing 453 changed files with 41,735 additions and 687 deletions.
8 changes: 8 additions & 0 deletions CUDADataFormats/BeamSpot/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<use name="rootcore"/>
<use name="CUDADataFormats/Common"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
32 changes: 32 additions & 0 deletions CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

#include <cuda_runtime.h>

class BeamSpotCUDA {
public:
// alignas(128) doesn't really make sense as there is only one
// beamspot per event?
struct Data {
float x, y, z; // position
// TODO: add covariance matrix

float sigmaZ;
float beamWidthX, beamWidthY;
float dxdz, dydz;
float emittanceX, emittanceY;
float betaStar;
};

BeamSpotCUDA() = default;
BeamSpotCUDA(Data const* data_h, cudaStream_t stream);

Data const* data() const { return data_d_.get(); }

private:
cudautils::device::unique_ptr<Data> data_d_;
};

#endif
9 changes: 9 additions & 0 deletions CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"

BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cudaStream_t stream) {
data_d_ = cudautils::make_device_unique<Data>(stream);
cudaCheck(cudaMemcpyAsync(data_d_.get(), data_h, sizeof(Data), cudaMemcpyHostToDevice, stream));
}
8 changes: 8 additions & 0 deletions CUDADataFormats/BeamSpot/src/classes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef CUDADataFormats_BeamSpot_classes_h
#define CUDADataFormats_BeamSpot_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

#endif
4 changes: 4 additions & 0 deletions CUDADataFormats/BeamSpot/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<lcgdict>
<class name="CUDAProduct<BeamSpotCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<BeamSpotCUDA>>" persistent="false"/>
</lcgdict>
5 changes: 5 additions & 0 deletions CUDADataFormats/Common/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
12 changes: 12 additions & 0 deletions CUDADataFormats/Common/interface/ArrayShadow.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef CUDADataFormatsCommonArrayShadow_H
#define CUDADataFormatsCommonArrayShadow_H
#include <array>

template <typename A>
struct ArrayShadow {
using T = typename A::value_type;
constexpr static auto size() { return std::tuple_size<A>::value; }
T data[std::tuple_size<A>::value];
};

#endif
55 changes: 55 additions & 0 deletions CUDADataFormats/Common/interface/CUDAProduct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#ifndef CUDADataFormats_Common_CUDAProduct_h
#define CUDADataFormats_Common_CUDAProduct_h

#include <memory>

#include "CUDADataFormats/Common/interface/CUDAProductBase.h"

namespace edm {
template <typename T>
class Wrapper;
}
namespace impl {
class CUDAScopedContextGetterBase;
}

/**
* The purpose of this class is to wrap CUDA data to edm::Event in a
* way which forces correct use of various utilities.
*
* The non-default construction has to be done with CUDAScopedContext
* (in order to properly register the CUDA event).
*
* The default constructor is needed only for the ROOT dictionary generation.
*
* The CUDA event is in practice needed only for stream-stream
* synchronization, but someone with long-enough lifetime has to own
* it. Here is a somewhat natural place. If overhead is too much, we
* can use them only where synchronization between streams is needed.
*/
template <typename T>
class CUDAProduct : public CUDAProductBase {
public:
CUDAProduct() = default; // Needed only for ROOT dictionary generation

CUDAProduct(const CUDAProduct&) = delete;
CUDAProduct& operator=(const CUDAProduct&) = delete;
CUDAProduct(CUDAProduct&&) = default;
CUDAProduct& operator=(CUDAProduct&&) = default;

private:
friend class impl::CUDAScopedContextGetterBase;
friend class CUDAScopedContextProduce;
friend class edm::Wrapper<CUDAProduct<T>>;

explicit CUDAProduct(int device, cudautils::SharedStreamPtr stream, T data)
: CUDAProductBase(device, std::move(stream)), data_(std::move(data)) {}

template <typename... Args>
explicit CUDAProduct(int device, cudautils::SharedStreamPtr stream, Args&&... args)
: CUDAProductBase(device, std::move(stream)), data_(std::forward<Args>(args)...) {}

T data_; //!
};

#endif
90 changes: 90 additions & 0 deletions CUDADataFormats/Common/interface/CUDAProductBase.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#ifndef CUDADataFormats_Common_CUDAProductBase_h
#define CUDADataFormats_Common_CUDAProductBase_h

#include <atomic>
#include <memory>

#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SharedEventPtr.h"

namespace impl {
class CUDAScopedContextBase;
}

/**
* Base class for all instantiations of CUDA<T> to hold the
* non-T-dependent members.
*/
class CUDAProductBase {
public:
CUDAProductBase() = default; // Needed only for ROOT dictionary generation
~CUDAProductBase();

CUDAProductBase(const CUDAProductBase&) = delete;
CUDAProductBase& operator=(const CUDAProductBase&) = delete;
CUDAProductBase(CUDAProductBase&& other)
: stream_{std::move(other.stream_)},
event_{std::move(other.event_)},
mayReuseStream_{other.mayReuseStream_.load()},
device_{other.device_} {}
CUDAProductBase& operator=(CUDAProductBase&& other) {
stream_ = std::move(other.stream_);
event_ = std::move(other.event_);
mayReuseStream_ = other.mayReuseStream_.load();
device_ = other.device_;
return *this;
}

bool isValid() const { return stream_.get() != nullptr; }
bool isAvailable() const;

int device() const { return device_; }

// cudaStream_t is a pointer to a thread-safe object, for which a
// mutable access is needed even if the CUDAScopedContext itself
// would be const. Therefore it is ok to return a non-const
// pointer from a const method here.
cudaStream_t stream() const { return stream_.get(); }

// cudaEvent_t is a pointer to a thread-safe object, for which a
// mutable access is needed even if the CUDAScopedContext itself
// would be const. Therefore it is ok to return a non-const
// pointer from a const method here.
cudaEvent_t event() const { return event_ ? event_.get() : nullptr; }

protected:
explicit CUDAProductBase(int device, cudautils::SharedStreamPtr stream)
: stream_{std::move(stream)}, device_{device} {}

private:
friend class impl::CUDAScopedContextBase;
friend class CUDAScopedContextProduce;

// The following functions are intended to be used only from CUDAScopedContext
void setEvent(cudautils::SharedEventPtr event) { event_ = std::move(event); }
const cudautils::SharedStreamPtr& streamPtr() const { return stream_; }

bool mayReuseStream() const {
bool expected = true;
bool changed = mayReuseStream_.compare_exchange_strong(expected, false);
// If the current thread is the one flipping the flag, it may
// reuse the stream.
return changed;
}

// The cudaStream_t is really shared among edm::Event products, so
// using shared_ptr also here
cudautils::SharedStreamPtr stream_; //!
// shared_ptr because of caching in CUDAEventCache
cudautils::SharedEventPtr event_; //!

// This flag tells whether the CUDA stream may be reused by a
// consumer or not. The goal is to have a "chain" of modules to
// queue their work to the same stream.
mutable std::atomic<bool> mayReuseStream_ = true; //!

// The CUDA device associated with this product
int device_ = -1; //!
};

#endif
Loading

0 comments on commit 62a224f

Please sign in to comment.