Skip to content

Commit

Permalink
Merge pull request #39428 from makortel/alpakaFramework
Browse files Browse the repository at this point in the history
Evolution of the Alpaka "gpu framework"
  • Loading branch information
cmsbuild authored Nov 22, 2022
2 parents 97b79e2 + edb7cda commit 08fcc8e
Show file tree
Hide file tree
Showing 61 changed files with 2,329 additions and 108 deletions.
76 changes: 76 additions & 0 deletions DataFormats/Common/interface/DeviceProduct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#ifndef DataFormats_Common_interface_DeviceProduct_h
#define DataFormats_Common_interface_DeviceProduct_h

#include <cassert>
#include <memory>

namespace edm {
class DeviceProductBase {
public:
DeviceProductBase() = default;
~DeviceProductBase() = default;

// TODO: in principle this function is an implementation detail
template <typename M>
M const& metadata() const {
// TODO: I believe the assertion could be removed safely after
// the data dependence and scheduling systems would guarantee
// that the an EDModule in a given execution space can access
// only to the EDProducts in a memory space compatible with the
// execution space.
//
// On the other hand, with Alpaka (likely with others) the
// getSynchronized() does additional checks so the added cost is
// probably not that much?
assert(typeid(M) == *metadataType_);
return *static_cast<M const*>(metadata_.get());
}

protected:
template <typename M>
explicit DeviceProductBase(std::shared_ptr<M> metadata)
: metadata_(std::move(metadata)), metadataType_(&typeid(M)) {}

private:
std::shared_ptr<void const> metadata_;
std::type_info const* metadataType_;
};

/**
* A wrapper for Event Data product in device memory accompanied
* with some device-specific metadata. Not intended to be used directly by
* developers (except in ROOT dictionary declarations in
* classes_def.xml similar to edm::Wrapper).
*/
template <typename T>
class DeviceProduct : public DeviceProductBase {
public:
DeviceProduct() = default;

template <typename M, typename... Args>
explicit DeviceProduct(std::shared_ptr<M> metadata, Args&&... args)
: DeviceProductBase(std::move(metadata)), data_(std::forward<Args>(args)...) {}

DeviceProduct(const DeviceProduct&) = delete;
DeviceProduct& operator=(const DeviceProduct&) = delete;
DeviceProduct(DeviceProduct&&) = default;
DeviceProduct& operator=(DeviceProduct&&) = default;

/**
* Get the actual data product after the metadata object has
* synchronized the access. The synchronization details depend on
* the metadata type, which the caller must know. All the
* arguments are passed to M::synchronize() function.
*/
template <typename M, typename... Args>
T const& getSynchronized(Args&&... args) const {
auto const& md = metadata<M>();
md.synchronize(std::forward<Args>(args)...);
return data_;
}

private:
T data_; //!
};
} // namespace edm
#endif
17 changes: 17 additions & 0 deletions DataFormats/Portable/interface/alpaka/PortableCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "DataFormats/Portable/interface/PortableHostCollection.h"
#include "DataFormats/Portable/interface/PortableDeviceCollection.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/TransferToHost.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {

Expand Down Expand Up @@ -39,4 +40,20 @@ namespace traits {

} // namespace traits

namespace cms::alpakatools {
// TODO: Is this the right place for the specialization? Or should it be in PortableDeviceProduct?
template <typename T>
struct TransferToHost<ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection<T>> {
using HostDataType = ::PortableHostCollection<T>;

template <typename TQueue>
static HostDataType transferAsync(TQueue& queue,
ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection<T> const& deviceData) {
HostDataType hostData(deviceData->metadata().size(), queue);
alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer());
return hostData;
}
};
} // namespace cms::alpakatools

#endif // DataFormats_Portable_interface_alpaka_PortableDeviceCollection_h
1 change: 1 addition & 0 deletions DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "DataFormats/Common/interface/DeviceProduct.h"
#include "DataFormats/Common/interface/Wrapper.h"
#include "DataFormats/Portable/interface/Product.h"
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<lcgdict>
<class name="alpaka_cuda_async::portabletest::TestDeviceCollection" persistent="false"/>
<class name="edm::Wrapper<alpaka_cuda_async::portabletest::TestDeviceCollection>" persistent="false"/>

<class name="cms::alpakatools::Product<alpaka_cuda_async::Queue, alpaka_cuda_async::portabletest::TestDeviceCollection>" persistent="false"/>
<class name="edm::Wrapper<cms::alpakatools::Product<alpaka_cuda_async::Queue, alpaka_cuda_async::portabletest::TestDeviceCollection>>" persistent="false"/>
<class name="edm::DeviceProduct<alpaka_cuda_async::portabletest::TestDeviceCollection>" persistent="false"/>
<class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::portabletest::TestDeviceCollection>>" persistent="false"/>
</lcgdict>
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,4 @@
]]>
</read>
<class name="edm::Wrapper<portabletest::TestHostCollection>" splitLevel="0"/>

<class name="cms::alpakatools::Product<alpaka_serial_sync::Queue, portabletest::TestHostCollection>" persistent="false"/>
<class name="edm::Wrapper<cms::alpakatools::Product<alpaka_serial_sync::Queue, portabletest::TestHostCollection>>" persistent="false"/>
</lcgdict>
1 change: 1 addition & 0 deletions HeterogeneousCore/AlpakaCore/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<use name="FWCore/Framework"/>
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
<export>
<lib name="1"/>
</export>
24 changes: 24 additions & 0 deletions HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_DeviceProductType_h
#define HeterogeneousCore_AlpakaCore_interface_alpaka_DeviceProductType_h

#include "DataFormats/Common/interface/DeviceProduct.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::detail {
/**
* This "trait" class abstracts the actual product type put in the
* edm::Event.
*/
template <typename TProduct>
struct DeviceProductType {
#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
// host synchronous backends can use TProduct directly
using type = TProduct;
#else
// all device and asynchronous backends need to be wrapped
using type = edm::DeviceProduct<TProduct>;
#endif
};
} // namespace ALPAKA_ACCELERATOR_NAMESPACE::detail

#endif
43 changes: 43 additions & 0 deletions HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDGetToken_h
#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDGetToken_h

#include "DataFormats/Common/interface/DeviceProduct.h"
#include "FWCore/Utilities/interface/EDGetToken.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/DeviceProductType.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE::device {
class Event;
/**
* The device::EDGetToken is similar to edm::EDGetTokenT, but is
* intended for Event data products in the device memory space
* defined by the backend (i.e. ALPAKA_ACCELERATOR_NAMESPACE). It
* can be used only to get data from a device::Event.
*
* A specific token class is motivated with
* - enforce stronger the type-deducing consumes(). Consumes() with
* explicit type will fail anyway in general, but succeeds on one
* of the backends. With a specific token type the explicit-type
* consumes() would fail always.
*- to avoid using device::EDGetToken with edm::Event
*/
template <typename TProduct>
class EDGetToken {
using ProductType = typename detail::DeviceProductType<TProduct>::type;

public:
constexpr EDGetToken() = default;

template <typename TAdapter>
constexpr EDGetToken(TAdapter&& iAdapter) : token_(std::forward<TAdapter>(iAdapter)) {}

private:
friend class Event;

auto const& underlyingToken() const { return token_; }

edm::EDGetTokenT<ProductType> token_;
};
} // namespace ALPAKA_ACCELERATOR_NAMESPACE::device

#endif
99 changes: 99 additions & 0 deletions HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadata_h
#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadata_h

#include <atomic>
#include <memory>

#include <alpaka/alpaka.hpp>

#include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h"
#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/HostOnlyTask.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {
/**
* The EDMetadata class provides the exact synchronization
* mechanisms for Event data products for backends with asynchronous
* Queue. These include
* - adding a notification for edm::WaitingTaskWithArenaHolder
* - recording an Event
* - synchronizing an Event data product and a consuming EDModule
*
* For synchronous backends the EDMetadata acts as an owner of the
* Queue object, as no further synchronization is needed.
*
* EDMetadata is used as the Metadata class for
* edm::DeviceProduct<T>, and is an implementation detail (not
* visible to user code).
*
* TODO: What to do with device-synchronous backends? The data
* product needs to be wrapped into the edm::DeviceProduct, but the
* EDMetadata class used there does not need anything except "dummy"
* implementation of synchronize(). The question is clearly
* solvable, so maybe leave it to the time we would actually need
* one?
*/

#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
// Host backends with a synchronous queue

class EDMetadata {
public:
EDMetadata(std::shared_ptr<Queue> queue) : queue_(std::move(queue)) {}

Device device() const { return alpaka::getDev(*queue_); }

// Alpaka operations do not accept a temporary as an argument
// TODO: Returning non-const reference here is BAD
Queue& queue() const { return *queue_; }

void recordEvent() {}

private:
std::shared_ptr<Queue> queue_;
};

// TODO: else if device backends with a synchronous queue

#else
// All backends with an asynchronous queue

class EDMetadata {
public:
EDMetadata(std::shared_ptr<Queue> queue, std::shared_ptr<Event> event)
: queue_(std::move(queue)), event_(std::move(event)) {}
~EDMetadata();

Device device() const { return alpaka::getDev(*queue_); }

// Alpaka operations do not accept a temporary as an argument
// TODO: Returning non-const reference here is BAD
Queue& queue() const { return *queue_; }

void enqueueCallback(edm::WaitingTaskWithArenaHolder holder);

void recordEvent() { alpaka::enqueue(*queue_, *event_); }

/**
* Synchronizes 'consumer' metadata wrt. 'this' in the event product
*/
void synchronize(EDMetadata& consumer, bool tryReuseQueue) const;

private:
/**
* Returns a shared_ptr to the Queue if it can be reused, or a
* null shared_ptr if not
*/
std::shared_ptr<Queue> tryReuseQueue_() const;

std::shared_ptr<Queue> queue_;
std::shared_ptr<Event> event_;
// This flag tells whether the Queue may be reused by a
// consumer or not. The goal is to have a "chain" of modules to
// queue their work to the same queue.
mutable std::atomic<bool> mayReuseQueue_ = true;
};
#endif
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataAcquireSentry_h
#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataAcquireSentry_h

#include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h"
#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {
namespace detail {
/**
* Helper class to be used in acquire()
*
* TODO: not really a sentry as it doesn't do anything special in its destructor. Better name?
*/
class EDMetadataAcquireSentry {
public:
// TODO: WaitingTaskWithArenaHolder not really needed for host synchronous case
// Constructor overload to be called from acquire()
EDMetadataAcquireSentry(edm::StreamID stream, edm::WaitingTaskWithArenaHolder holder);

// Constructor overload to be called from registerTransformAsync()
EDMetadataAcquireSentry(Device const& device, edm::WaitingTaskWithArenaHolder holder);

EDMetadataAcquireSentry(EDMetadataAcquireSentry const&) = delete;
EDMetadataAcquireSentry& operator=(EDMetadataAcquireSentry const&) = delete;
EDMetadataAcquireSentry(EDMetadataAcquireSentry&&) = delete;
EDMetadataAcquireSentry& operator=(EDMetadataAcquireSentry&&) = delete;

std::shared_ptr<EDMetadata> metadata() { return metadata_; }

#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
// all synchronous backends
std::shared_ptr<EDMetadata> finish() { return std::move(metadata_); }
#else
// all asynchronous backends
std::shared_ptr<EDMetadata> finish();
#endif

private:
std::shared_ptr<EDMetadata> metadata_;

edm::WaitingTaskWithArenaHolder waitingTaskHolder_;
};
} // namespace detail
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

#endif
37 changes: 37 additions & 0 deletions HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadataSentry.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataSentry_h
#define HeterogeneousCore_AlpakaCore_interface_alpaka_EDMetadataSentry_h

#include "FWCore/Utilities/interface/StreamID.h"
#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h"

namespace ALPAKA_ACCELERATOR_NAMESPACE {
namespace detail {
/**
* Helper class to be used in produce()
*
* TODO: not really a sentry as it doesn't do anything special in its destructor. Better name?
*/
class EDMetadataSentry {
public:
// For normal module
EDMetadataSentry(edm::StreamID stream);

// For ExternalWork-module's produce()
EDMetadataSentry(std::shared_ptr<EDMetadata> metadata) : metadata_(std::move(metadata)) {}

EDMetadataSentry(EDMetadataSentry const&) = delete;
EDMetadataSentry& operator=(EDMetadataSentry const&) = delete;
EDMetadataSentry(EDMetadataSentry&&) = delete;
EDMetadataSentry& operator=(EDMetadataSentry&&) = delete;

std::shared_ptr<EDMetadata> metadata() { return metadata_; }

void finish();

private:
std::shared_ptr<EDMetadata> metadata_;
};
} // namespace detail
} // namespace ALPAKA_ACCELERATOR_NAMESPACE

#endif
Loading

0 comments on commit 08fcc8e

Please sign in to comment.