diff --git a/CUDADataFormats/BeamSpot/BuildFile.xml b/CUDADataFormats/BeamSpot/BuildFile.xml
new file mode 100644
index 0000000000000..75f3d15738429
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
new file mode 100644
index 0000000000000..36b152b64dfc1
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
@@ -0,0 +1,32 @@
+#ifndef CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
+#define CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+
+#include
+
+class BeamSpotCUDA {
+public:
+ // alignas(128) doesn't really make sense as there is only one
+ // beamspot per event?
+ struct Data {
+ float x, y, z; // position
+ // TODO: add covariance matrix
+
+ float sigmaZ;
+ float beamWidthX, beamWidthY;
+ float dxdz, dydz;
+ float emittanceX, emittanceY;
+ float betaStar;
+ };
+
+ BeamSpotCUDA() = default;
+ BeamSpotCUDA(Data const* data_h, cudaStream_t stream);
+
+ Data const* data() const { return data_d_.get(); }
+
+private:
+ cudautils::device::unique_ptr data_d_;
+};
+
+#endif
diff --git a/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc b/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
new file mode 100644
index 0000000000000..a297ae11dc327
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/BeamSpotCUDA.cc
@@ -0,0 +1,9 @@
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
+
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+
+BeamSpotCUDA::BeamSpotCUDA(Data const* data_h, cudaStream_t stream) {
+ data_d_ = cudautils::make_device_unique(stream);
+ cudaCheck(cudaMemcpyAsync(data_d_.get(), data_h, sizeof(Data), cudaMemcpyHostToDevice, stream));
+}
diff --git a/CUDADataFormats/BeamSpot/src/classes.h b/CUDADataFormats/BeamSpot/src/classes.h
new file mode 100644
index 0000000000000..62f990c0ba3b3
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/classes.h
@@ -0,0 +1,8 @@
+#ifndef CUDADataFormats_BeamSpot_classes_h
+#define CUDADataFormats_BeamSpot_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/BeamSpot/src/classes_def.xml b/CUDADataFormats/BeamSpot/src/classes_def.xml
new file mode 100644
index 0000000000000..29a0eafa04005
--- /dev/null
+++ b/CUDADataFormats/BeamSpot/src/classes_def.xml
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/CUDADataFormats/Common/BuildFile.xml b/CUDADataFormats/Common/BuildFile.xml
new file mode 100644
index 0000000000000..98033aab4d99d
--- /dev/null
+++ b/CUDADataFormats/Common/BuildFile.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/CUDADataFormats/Common/interface/ArrayShadow.h b/CUDADataFormats/Common/interface/ArrayShadow.h
new file mode 100644
index 0000000000000..2e1b85cbfd04d
--- /dev/null
+++ b/CUDADataFormats/Common/interface/ArrayShadow.h
@@ -0,0 +1,12 @@
+#ifndef CUDADataFormatsCommonArrayShadow_H
+#define CUDADataFormatsCommonArrayShadow_H
+#include
+
+template
+struct ArrayShadow {
+ using T = typename A::value_type;
+ constexpr static auto size() { return std::tuple_size::value; }
+ T data[std::tuple_size::value];
+};
+
+#endif
diff --git a/CUDADataFormats/Common/interface/CUDAProduct.h b/CUDADataFormats/Common/interface/CUDAProduct.h
new file mode 100644
index 0000000000000..75c9c80e7f206
--- /dev/null
+++ b/CUDADataFormats/Common/interface/CUDAProduct.h
@@ -0,0 +1,55 @@
+#ifndef CUDADataFormats_Common_CUDAProduct_h
+#define CUDADataFormats_Common_CUDAProduct_h
+
+#include
+
+#include "CUDADataFormats/Common/interface/CUDAProductBase.h"
+
+namespace edm {
+ template
+ class Wrapper;
+}
+namespace impl {
+ class CUDAScopedContextGetterBase;
+}
+
+/**
+ * The purpose of this class is to wrap CUDA data to edm::Event in a
+ * way which forces correct use of various utilities.
+ *
+ * The non-default construction has to be done with CUDAScopedContext
+ * (in order to properly register the CUDA event).
+ *
+ * The default constructor is needed only for the ROOT dictionary generation.
+ *
+ * The CUDA event is in practice needed only for stream-stream
+ * synchronization, but someone with long-enough lifetime has to own
+ * it. Here is a somewhat natural place. If overhead is too much, we
+ * can use them only where synchronization between streams is needed.
+ */
+template
+class CUDAProduct : public CUDAProductBase {
+public:
+ CUDAProduct() = default; // Needed only for ROOT dictionary generation
+
+ CUDAProduct(const CUDAProduct&) = delete;
+ CUDAProduct& operator=(const CUDAProduct&) = delete;
+ CUDAProduct(CUDAProduct&&) = default;
+ CUDAProduct& operator=(CUDAProduct&&) = default;
+
+private:
+ friend class impl::CUDAScopedContextGetterBase;
+ friend class CUDAScopedContextProduce;
+ friend class edm::Wrapper>;
+
+ explicit CUDAProduct(int device, cudautils::SharedStreamPtr stream, T data)
+ : CUDAProductBase(device, std::move(stream)), data_(std::move(data)) {}
+
+ template
+ explicit CUDAProduct(int device, cudautils::SharedStreamPtr stream, Args&&... args)
+ : CUDAProductBase(device, std::move(stream)), data_(std::forward(args)...) {}
+
+ T data_; //!
+};
+
+#endif
diff --git a/CUDADataFormats/Common/interface/CUDAProductBase.h b/CUDADataFormats/Common/interface/CUDAProductBase.h
new file mode 100644
index 0000000000000..219b7e619de7f
--- /dev/null
+++ b/CUDADataFormats/Common/interface/CUDAProductBase.h
@@ -0,0 +1,90 @@
+#ifndef CUDADataFormats_Common_CUDAProductBase_h
+#define CUDADataFormats_Common_CUDAProductBase_h
+
+#include
+#include
+
+#include "HeterogeneousCore/CUDAUtilities/interface/SharedStreamPtr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/SharedEventPtr.h"
+
+namespace impl {
+ class CUDAScopedContextBase;
+}
+
+/**
+ * Base class for all instantiations of CUDA to hold the
+ * non-T-dependent members.
+ */
+class CUDAProductBase {
+public:
+ CUDAProductBase() = default; // Needed only for ROOT dictionary generation
+ ~CUDAProductBase();
+
+ CUDAProductBase(const CUDAProductBase&) = delete;
+ CUDAProductBase& operator=(const CUDAProductBase&) = delete;
+ CUDAProductBase(CUDAProductBase&& other)
+ : stream_{std::move(other.stream_)},
+ event_{std::move(other.event_)},
+ mayReuseStream_{other.mayReuseStream_.load()},
+ device_{other.device_} {}
+ CUDAProductBase& operator=(CUDAProductBase&& other) {
+ stream_ = std::move(other.stream_);
+ event_ = std::move(other.event_);
+ mayReuseStream_ = other.mayReuseStream_.load();
+ device_ = other.device_;
+ return *this;
+ }
+
+ bool isValid() const { return stream_.get() != nullptr; }
+ bool isAvailable() const;
+
+ int device() const { return device_; }
+
+ // cudaStream_t is a pointer to a thread-safe object, for which a
+ // mutable access is needed even if the CUDAScopedContext itself
+ // would be const. Therefore it is ok to return a non-const
+ // pointer from a const method here.
+ cudaStream_t stream() const { return stream_.get(); }
+
+ // cudaEvent_t is a pointer to a thread-safe object, for which a
+ // mutable access is needed even if the CUDAScopedContext itself
+ // would be const. Therefore it is ok to return a non-const
+ // pointer from a const method here.
+ cudaEvent_t event() const { return event_ ? event_.get() : nullptr; }
+
+protected:
+ explicit CUDAProductBase(int device, cudautils::SharedStreamPtr stream)
+ : stream_{std::move(stream)}, device_{device} {}
+
+private:
+ friend class impl::CUDAScopedContextBase;
+ friend class CUDAScopedContextProduce;
+
+ // The following functions are intended to be used only from CUDAScopedContext
+ void setEvent(cudautils::SharedEventPtr event) { event_ = std::move(event); }
+ const cudautils::SharedStreamPtr& streamPtr() const { return stream_; }
+
+ bool mayReuseStream() const {
+ bool expected = true;
+ bool changed = mayReuseStream_.compare_exchange_strong(expected, false);
+ // If the current thread is the one flipping the flag, it may
+ // reuse the stream.
+ return changed;
+ }
+
+ // The cudaStream_t is really shared among edm::Event products, so
+ // using shared_ptr also here
+ cudautils::SharedStreamPtr stream_; //!
+ // shared_ptr because of caching in CUDAEventCache
+ cudautils::SharedEventPtr event_; //!
+
+ // This flag tells whether the CUDA stream may be reused by a
+ // consumer or not. The goal is to have a "chain" of modules to
+ // queue their work to the same stream.
+ mutable std::atomic mayReuseStream_ = true; //!
+
+ // The CUDA device associated with this product
+ int device_ = -1; //!
+};
+
+#endif
diff --git a/CUDADataFormats/Common/interface/HeterogeneousSoA.h b/CUDADataFormats/Common/interface/HeterogeneousSoA.h
new file mode 100644
index 0000000000000..907b7647a3452
--- /dev/null
+++ b/CUDADataFormats/Common/interface/HeterogeneousSoA.h
@@ -0,0 +1,187 @@
+#ifndef CUDADataFormatsCommonHeterogeneousSoA_H
+#define CUDADataFormatsCommonHeterogeneousSoA_H
+
+#include
+
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+// a heterogeneous unique pointer...
+template
+class HeterogeneousSoA {
+public:
+ using Product = T;
+
+ HeterogeneousSoA() = default; // make root happy
+ ~HeterogeneousSoA() = default;
+ HeterogeneousSoA(HeterogeneousSoA &&) = default;
+ HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;
+
+ explicit HeterogeneousSoA(cudautils::device::unique_ptr &&p) : dm_ptr(std::move(p)) {}
+ explicit HeterogeneousSoA(cudautils::host::unique_ptr &&p) : hm_ptr(std::move(p)) {}
+ explicit HeterogeneousSoA(std::unique_ptr &&p) : std_ptr(std::move(p)) {}
+
+ auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
+
+ auto const &operator*() const { return *get(); }
+
+ auto const *operator-> () const { return get(); }
+
+ auto *get() { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
+
+ auto &operator*() { return *get(); }
+
+ auto *operator-> () { return get(); }
+
+ // in reality valid only for GPU version...
+ cudautils::host::unique_ptr toHostAsync(cudaStream_t stream) const {
+ assert(dm_ptr);
+ auto ret = cudautils::make_host_unique(stream);
+ cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
+ return ret;
+ }
+
+private:
+ // a union wan't do it, a variant will not be more efficienct
+ cudautils::device::unique_ptr dm_ptr; //!
+ cudautils::host::unique_ptr hm_ptr; //!
+ std::unique_ptr std_ptr; //!
+};
+
+namespace cudaCompat {
+
+ struct GPUTraits {
+ template
+ using unique_ptr = cudautils::device::unique_ptr;
+
+ template
+ static auto make_unique(cudaStream_t stream) {
+ return cudautils::make_device_unique(stream);
+ }
+
+ template
+ static auto make_unique(size_t size, cudaStream_t stream) {
+ return cudautils::make_device_unique(size, stream);
+ }
+
+ template
+ static auto make_host_unique(cudaStream_t stream) {
+ return cudautils::make_host_unique(stream);
+ }
+
+ template
+ static auto make_device_unique(cudaStream_t stream) {
+ return cudautils::make_device_unique(stream);
+ }
+
+ template
+ static auto make_device_unique(size_t size, cudaStream_t stream) {
+ return cudautils::make_device_unique(size, stream);
+ }
+ };
+
+ struct HostTraits {
+ template
+ using unique_ptr = cudautils::host::unique_ptr;
+
+ template
+ static auto make_unique(cudaStream_t stream) {
+ return cudautils::make_host_unique(stream);
+ }
+
+ template
+ static auto make_host_unique(cudaStream_t stream) {
+ return cudautils::make_host_unique(stream);
+ }
+
+ template
+ static auto make_device_unique(cudaStream_t stream) {
+ return cudautils::make_device_unique(stream);
+ }
+
+ template
+ static auto make_device_unique(size_t size, cudaStream_t stream) {
+ return cudautils::make_device_unique(size, stream);
+ }
+ };
+
+ struct CPUTraits {
+ template
+ using unique_ptr = std::unique_ptr;
+
+ template
+ static auto make_unique(cudaStream_t) {
+ return std::make_unique();
+ }
+
+ template
+ static auto make_unique(size_t size, cudaStream_t) {
+ return std::make_unique(size);
+ }
+
+ template
+ static auto make_host_unique(cudaStream_t) {
+ return std::make_unique();
+ }
+
+ template
+ static auto make_device_unique(cudaStream_t) {
+ return std::make_unique();
+ }
+
+ template
+ static auto make_device_unique(size_t size, cudaStream_t) {
+ return std::make_unique(size);
+ }
+ };
+
+} // namespace cudaCompat
+
+// a heterogeneous unique pointer (of a different sort) ...
+template
+class HeterogeneousSoAImpl {
+public:
+ template
+ using unique_ptr = typename Traits::template unique_ptr;
+
+ HeterogeneousSoAImpl() = default; // make root happy
+ ~HeterogeneousSoAImpl() = default;
+ HeterogeneousSoAImpl(HeterogeneousSoAImpl &&) = default;
+ HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;
+
+ explicit HeterogeneousSoAImpl(unique_ptr &&p) : m_ptr(std::move(p)) {}
+ explicit HeterogeneousSoAImpl(cudaStream_t stream);
+
+ T const *get() const { return m_ptr.get(); }
+
+ T *get() { return m_ptr.get(); }
+
+ cudautils::host::unique_ptr toHostAsync(cudaStream_t stream) const;
+
+private:
+ unique_ptr m_ptr; //!
+};
+
+template
+HeterogeneousSoAImpl::HeterogeneousSoAImpl(cudaStream_t stream) {
+ m_ptr = Traits::template make_unique(stream);
+}
+
+// in reality valid only for GPU version...
+template
+cudautils::host::unique_ptr HeterogeneousSoAImpl::toHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(stream);
+ cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
+ return ret;
+}
+
+template
+using HeterogeneousSoAGPU = HeterogeneousSoAImpl;
+template
+using HeterogeneousSoACPU = HeterogeneousSoAImpl;
+template
+using HeterogeneousSoAHost = HeterogeneousSoAImpl;
+
+#endif
diff --git a/CUDADataFormats/Common/interface/HostProduct.h b/CUDADataFormats/Common/interface/HostProduct.h
new file mode 100644
index 0000000000000..17ad98ba403a4
--- /dev/null
+++ b/CUDADataFormats/Common/interface/HostProduct.h
@@ -0,0 +1,29 @@
+#ifndef CUDADataFormatsCommonHostProduct_H
+#define CUDADataFormatsCommonHostProduct_H
+
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+// a heterogeneous unique pointer...
+template
+class HostProduct {
+public:
+ HostProduct() = default; // make root happy
+ ~HostProduct() = default;
+ HostProduct(HostProduct&&) = default;
+ HostProduct& operator=(HostProduct&&) = default;
+
+ explicit HostProduct(cudautils::host::unique_ptr&& p) : hm_ptr(std::move(p)) {}
+ explicit HostProduct(std::unique_ptr&& p) : std_ptr(std::move(p)) {}
+
+ auto const* get() const { return hm_ptr ? hm_ptr.get() : std_ptr.get(); }
+
+ auto const& operator*() const { return *get(); }
+
+ auto const* operator-> () const { return get(); }
+
+private:
+ cudautils::host::unique_ptr hm_ptr; //!
+ std::unique_ptr std_ptr; //!
+};
+
+#endif
diff --git a/CUDADataFormats/Common/src/CUDAProductBase.cc b/CUDADataFormats/Common/src/CUDAProductBase.cc
new file mode 100644
index 0000000000000..72302d3165676
--- /dev/null
+++ b/CUDADataFormats/Common/src/CUDAProductBase.cc
@@ -0,0 +1,27 @@
+#include "CUDADataFormats/Common/interface/CUDAProductBase.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/eventIsOccurred.h"
+
+bool CUDAProductBase::isAvailable() const {
+ // In absence of event, the product was available already at the end
+ // of produce() of the producer.
+ if (not event_) {
+ return true;
+ }
+ return cudautils::eventIsOccurred(event_.get());
+}
+
+CUDAProductBase::~CUDAProductBase() {
+ // Make sure that the production of the product in the GPU is
+ // complete before destructing the product. This is to make sure
+ // that the EDM stream does not move to the next event before all
+ // asynchronous processing of the current is complete.
+ if (event_) {
+ // TODO: a callback notifying a WaitingTaskHolder (or similar)
+ // would avoid blocking the CPU, but would also require more work.
+ //
+ // Intentionally not checking the return value to avoid throwing
+ // exceptions. If this call would fail, we should get failures
+ // elsewhere as well.
+ cudaEventSynchronize(event_.get());
+ }
+}
diff --git a/CUDADataFormats/Common/test/BuildFile.xml b/CUDADataFormats/Common/test/BuildFile.xml
new file mode 100644
index 0000000000000..5e804fe80a736
--- /dev/null
+++ b/CUDADataFormats/Common/test/BuildFile.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/CUDADataFormats/Common/test/test_CUDAProduct.cc b/CUDADataFormats/Common/test/test_CUDAProduct.cc
new file mode 100644
index 0000000000000..e674ca2bf694f
--- /dev/null
+++ b/CUDADataFormats/Common/test/test_CUDAProduct.cc
@@ -0,0 +1,66 @@
+#include "catch.hpp"
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/CUDAStreamCache.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/CUDAEventCache.h"
+
+#include
+
+namespace cudatest {
+ class TestCUDAScopedContext {
+ public:
+ static CUDAScopedContextProduce make(int dev, bool createEvent) {
+ cudautils::SharedEventPtr event;
+ if (createEvent) {
+ event = cudautils::getCUDAEventCache().getCUDAEvent();
+ }
+ return CUDAScopedContextProduce(dev, cudautils::getCUDAStreamCache().getCUDAStream(), std::move(event));
+ }
+ };
+} // namespace cudatest
+
+TEST_CASE("Use of CUDAProduct template", "[CUDACore]") {
+ SECTION("Default constructed") {
+ auto foo = CUDAProduct();
+ REQUIRE(!foo.isValid());
+
+ auto bar = std::move(foo);
+ }
+
+ exitSansCUDADevices();
+
+ constexpr int defaultDevice = 0;
+ cudaCheck(cudaSetDevice(defaultDevice));
+ {
+ auto ctx = cudatest::TestCUDAScopedContext::make(defaultDevice, true);
+ std::unique_ptr> dataPtr = ctx.wrap(10);
+ auto& data = *dataPtr;
+
+ SECTION("Construct from CUDAScopedContext") {
+ REQUIRE(data.isValid());
+ REQUIRE(data.device() == defaultDevice);
+ REQUIRE(data.stream() == ctx.stream());
+ REQUIRE(data.event() != nullptr);
+ }
+
+ SECTION("Move constructor") {
+ auto data2 = CUDAProduct(std::move(data));
+ REQUIRE(data2.isValid());
+ REQUIRE(!data.isValid());
+ }
+
+ SECTION("Move assignment") {
+ CUDAProduct data2;
+ data2 = std::move(data);
+ REQUIRE(data2.isValid());
+ REQUIRE(!data.isValid());
+ }
+ }
+
+ cudaCheck(cudaSetDevice(defaultDevice));
+ cudaCheck(cudaDeviceSynchronize());
+ // Note: CUDA resources are cleaned up by the destructors of the global cache objects
+}
diff --git a/CUDADataFormats/Common/test/test_main.cc b/CUDADataFormats/Common/test/test_main.cc
new file mode 100644
index 0000000000000..0c7c351f437f5
--- /dev/null
+++ b/CUDADataFormats/Common/test/test_main.cc
@@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include "catch.hpp"
diff --git a/CUDADataFormats/EcalRecHitSoA/BuildFile.xml b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml
new file mode 100644
index 0000000000000..794d2bf7abead
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit_soa.h b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit_soa.h
new file mode 100644
index 0000000000000..e11c13ebdf4c2
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit_soa.h
@@ -0,0 +1,73 @@
+#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_soa_h
+#define CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_soa_h
+
+#include
+#include
+
+#include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
+
+#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h"
+
+namespace ecal {
+
+ namespace Tag {
+
+ struct soa {};
+ struct ptr {};
+
+ } // namespace Tag
+
+ template
+ struct type_wrapper {
+ //#ifndef ECAL_MULTIFIT_DONOT_USE_PINNED_MEM
+ // using type = std::vector>;
+ //#else
+ using type = std::vector;
+ //#endif
+ };
+
+ template
+ struct type_wrapper {
+ using type = T*;
+ };
+
+ template
+ struct UncalibratedRecHit {
+ UncalibratedRecHit() = default;
+ UncalibratedRecHit(const UncalibratedRecHit&) = default;
+ UncalibratedRecHit& operator=(const UncalibratedRecHit&) = default;
+
+ UncalibratedRecHit(UncalibratedRecHit&&) = default;
+ UncalibratedRecHit& operator=(UncalibratedRecHit&&) = default;
+
+ // TODO: std::array causes root's dictionary problems
+ typename type_wrapper::type amplitudesAll;
+ // typename type_wrapper, L>::type amplitudesAll;
+ typename type_wrapper::type amplitude;
+ typename type_wrapper::type chi2;
+ typename type_wrapper::type pedestal;
+ typename type_wrapper::type jitter;
+ typename type_wrapper::type jitterError;
+ typename type_wrapper::type did;
+ typename type_wrapper::type flags;
+
+ template
+ typename std::enable_if::value, void>::type resize(size_t size) {
+ amplitudesAll.resize(size * EcalDataFrame::MAXSAMPLES);
+ amplitude.resize(size);
+ pedestal.resize(size);
+ chi2.resize(size);
+ did.resize(size);
+ flags.resize(size);
+ jitter.resize(size);
+ jitterError.resize(size);
+ }
+ };
+
+ using SoAUncalibratedRecHitCollection = UncalibratedRecHit;
+
+} // namespace ecal
+
+#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalUncalibratedRecHit_soa_h
diff --git a/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h
new file mode 100644
index 0000000000000..5667a9225f29d
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h
@@ -0,0 +1,13 @@
+#ifndef CUDADataFormats_EcalRecHitSoA_interface_RecoTypes
+#define CUDADataFormats_EcalRecHitSoA_interface_RecoTypes
+
+namespace ecal {
+ namespace reco {
+
+ using ComputationScalarType = float;
+ using StorageScalarType = float;
+
+ } // namespace reco
+} // namespace ecal
+
+#endif
diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes.h b/CUDADataFormats/EcalRecHitSoA/src/classes.h
new file mode 100644
index 0000000000000..8ad6b8d684b9a
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/src/classes.h
@@ -0,0 +1,2 @@
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit_soa.h"
diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml
new file mode 100644
index 0000000000000..461460835a723
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml
new file mode 100644
index 0000000000000..5e401d215c4eb
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
new file mode 100644
index 0000000000000..d3650e164d44e
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
@@ -0,0 +1,73 @@
+#ifndef CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
+#define CUDADataFormats_SiPixelCluster_interface_SiPixelClustersCUDA_h
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+
+#include
+
+class SiPixelClustersCUDA {
+public:
+ SiPixelClustersCUDA() = default;
+ explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream);
+ ~SiPixelClustersCUDA() = default;
+
+ SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete;
+ SiPixelClustersCUDA &operator=(const SiPixelClustersCUDA &) = delete;
+ SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default;
+ SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default;
+
+ void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; }
+
+ uint32_t nClusters() const { return nClusters_h; }
+
+ uint32_t *moduleStart() { return moduleStart_d.get(); }
+ uint32_t *clusInModule() { return clusInModule_d.get(); }
+ uint32_t *moduleId() { return moduleId_d.get(); }
+ uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }
+
+ uint32_t const *moduleStart() const { return moduleStart_d.get(); }
+ uint32_t const *clusInModule() const { return clusInModule_d.get(); }
+ uint32_t const *moduleId() const { return moduleId_d.get(); }
+ uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }
+
+ uint32_t const *c_moduleStart() const { return moduleStart_d.get(); }
+ uint32_t const *c_clusInModule() const { return clusInModule_d.get(); }
+ uint32_t const *c_moduleId() const { return moduleId_d.get(); }
+ uint32_t const *c_clusModuleStart() const { return clusModuleStart_d.get(); }
+
+ class DeviceConstView {
+ public:
+ // DeviceConstView() = default;
+
+ __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); }
+ __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); }
+ __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); }
+ __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); }
+
+ friend SiPixelClustersCUDA;
+
+ // private:
+ uint32_t const *moduleStart_;
+ uint32_t const *clusInModule_;
+ uint32_t const *moduleId_;
+ uint32_t const *clusModuleStart_;
+ };
+
+ DeviceConstView *view() const { return view_d.get(); }
+
+private:
+ cudautils::device::unique_ptr moduleStart_d; // index of the first pixel of each module
+ cudautils::device::unique_ptr clusInModule_d; // number of clusters found in each module
+ cudautils::device::unique_ptr moduleId_d; // module id of each module
+
+ // originally from rechits
+ cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module
+
+ cudautils::device::unique_ptr view_d; // "me" pointer
+
+ uint32_t nClusters_h;
+};
+
+#endif
diff --git a/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h
new file mode 100644
index 0000000000000..1430606ab6678
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h
@@ -0,0 +1,32 @@
+#ifndef CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h
+#define CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h
+
+#include
+
+namespace pixelGPUConstants {
+#ifdef GPU_SMALL_EVENTS
+ constexpr uint32_t maxNumberOfHits = 24 * 1024;
+#else
+ constexpr uint32_t maxNumberOfHits =
+ 48 * 1024; // data at pileup 50 has 18300 +/- 3500 hits; 40000 is around 6 sigma away
+#endif
+} // namespace pixelGPUConstants
+
+namespace gpuClustering {
+#ifdef GPU_SMALL_EVENTS
+ constexpr uint32_t maxHitsInIter() { return 64; }
+#else
+ // optimized for real data PU 50
+ constexpr uint32_t maxHitsInIter() { return 160; }
+#endif
+ constexpr uint32_t maxHitsInModule() { return 1024; }
+
+ constexpr uint32_t MaxNumModules = 2000;
+ constexpr int32_t MaxNumClustersPerModules = maxHitsInModule();
+ constexpr uint32_t MaxHitsInModule = maxHitsInModule(); // as above
+ constexpr uint32_t MaxNumClusters = pixelGPUConstants::maxNumberOfHits;
+ constexpr uint16_t InvId = 9999; // must be > MaxNumModules
+
+} // namespace gpuClustering
+
+#endif // CUDADataFormats_SiPixelCluster_interface_gpuClusteringConstants_h
diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
new file mode 100644
index 0000000000000..c814cd4a2e131
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
@@ -0,0 +1,21 @@
+#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+
+SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) {
+ moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream);
+ clusInModule_d = cudautils::make_device_unique(maxClusters, stream);
+ moduleId_d = cudautils::make_device_unique(maxClusters, stream);
+ clusModuleStart_d = cudautils::make_device_unique(maxClusters + 1, stream);
+
+ auto view = cudautils::make_host_unique(stream);
+ view->moduleStart_ = moduleStart_d.get();
+ view->clusInModule_ = clusInModule_d.get();
+ view->moduleId_ = moduleId_d.get();
+ view->clusModuleStart_ = clusModuleStart_d.get();
+
+ view_d = cudautils::make_device_unique(stream);
+ cudautils::copyAsync(view_d, view, stream);
+}
diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h
new file mode 100644
index 0000000000000..08d46244adc7d
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/src/classes.h
@@ -0,0 +1,8 @@
+#ifndef CUDADataFormats_SiPixelCluster_classes_h
+#define CUDADataFormats_SiPixelCluster_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml
new file mode 100644
index 0000000000000..ba0706ac4b8aa
--- /dev/null
+++ b/CUDADataFormats/SiPixelCluster/src/classes_def.xml
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml
new file mode 100644
index 0000000000000..ee357e2d4e157
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
new file mode 100644
index 0000000000000..7c18d58a3fc12
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
@@ -0,0 +1,41 @@
+#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
+#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
+
+#include "DataFormats/SiPixelDigi/interface/PixelErrors.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
+
+#include
+
+class SiPixelDigiErrorsCUDA {
+public:
+ SiPixelDigiErrorsCUDA() = default;
+ explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream);
+ ~SiPixelDigiErrorsCUDA() = default;
+
+ SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
+ SiPixelDigiErrorsCUDA& operator=(const SiPixelDigiErrorsCUDA&) = delete;
+ SiPixelDigiErrorsCUDA(SiPixelDigiErrorsCUDA&&) = default;
+ SiPixelDigiErrorsCUDA& operator=(SiPixelDigiErrorsCUDA&&) = default;
+
+ const PixelFormatterErrors& formatterErrors() const { return formatterErrors_h; }
+
+ GPU::SimpleVector* error() { return error_d.get(); }
+ GPU::SimpleVector const* error() const { return error_d.get(); }
+ GPU::SimpleVector const* c_error() const { return error_d.get(); }
+
+ using HostDataError =
+ std::pair, cudautils::host::unique_ptr>;
+ HostDataError dataErrorToHostAsync(cudaStream_t stream) const;
+
+ void copyErrorToHostAsync(cudaStream_t stream);
+
+private:
+ cudautils::device::unique_ptr data_d;
+ cudautils::device::unique_ptr> error_d;
+ cudautils::host::unique_ptr> error_h;
+ PixelFormatterErrors formatterErrors_h;
+};
+
+#endif
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
new file mode 100644
index 0000000000000..47efe634ad93d
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
@@ -0,0 +1,98 @@
+#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
+#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+
+#include
+
+class SiPixelDigisCUDA {
+public:
+ SiPixelDigisCUDA() = default;
+ explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
+ ~SiPixelDigisCUDA() = default;
+
+ SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
+ SiPixelDigisCUDA &operator=(const SiPixelDigisCUDA &) = delete;
+ SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default;
+ SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default;
+
+ void setNModulesDigis(uint32_t nModules, uint32_t nDigis) {
+ nModules_h = nModules;
+ nDigis_h = nDigis;
+ }
+
+ uint32_t nModules() const { return nModules_h; }
+ uint32_t nDigis() const { return nDigis_h; }
+
+ uint16_t *xx() { return xx_d.get(); }
+ uint16_t *yy() { return yy_d.get(); }
+ uint16_t *adc() { return adc_d.get(); }
+ uint16_t *moduleInd() { return moduleInd_d.get(); }
+ int32_t *clus() { return clus_d.get(); }
+ uint32_t *pdigi() { return pdigi_d.get(); }
+ uint32_t *rawIdArr() { return rawIdArr_d.get(); }
+
+ uint16_t const *xx() const { return xx_d.get(); }
+ uint16_t const *yy() const { return yy_d.get(); }
+ uint16_t const *adc() const { return adc_d.get(); }
+ uint16_t const *moduleInd() const { return moduleInd_d.get(); }
+ int32_t const *clus() const { return clus_d.get(); }
+ uint32_t const *pdigi() const { return pdigi_d.get(); }
+ uint32_t const *rawIdArr() const { return rawIdArr_d.get(); }
+
+ uint16_t const *c_xx() const { return xx_d.get(); }
+ uint16_t const *c_yy() const { return yy_d.get(); }
+ uint16_t const *c_adc() const { return adc_d.get(); }
+ uint16_t const *c_moduleInd() const { return moduleInd_d.get(); }
+ int32_t const *c_clus() const { return clus_d.get(); }
+ uint32_t const *c_pdigi() const { return pdigi_d.get(); }
+ uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }
+
+ cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const;
+
+ class DeviceConstView {
+ public:
+ // DeviceConstView() = default;
+
+ __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); }
+ __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); }
+ __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); }
+ __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); }
+ __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); }
+
+ friend class SiPixelDigisCUDA;
+
+ // private:
+ uint16_t const *xx_;
+ uint16_t const *yy_;
+ uint16_t const *adc_;
+ uint16_t const *moduleInd_;
+ int32_t const *clus_;
+ };
+
+ const DeviceConstView *view() const { return view_d.get(); }
+
+private:
+ // These are consumed by downstream device code
+ cudautils::device::unique_ptr xx_d; // local coordinates of each pixel
+ cudautils::device::unique_ptr yy_d; //
+ cudautils::device::unique_ptr adc_d; // ADC of each pixel
+ cudautils::device::unique_ptr moduleInd_d; // module id of each pixel
+ cudautils::device::unique_ptr clus_d; // cluster id of each pixel
+ cudautils::device::unique_ptr view_d; // "me" pointer
+
+ // These are for CPU output; should we (eventually) place them to a
+ // separate product?
+ cudautils::device::unique_ptr pdigi_d;
+ cudautils::device::unique_ptr rawIdArr_d;
+
+ uint32_t nModules_h = 0;
+ uint32_t nDigis_h = 0;
+};
+
+#endif
diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
new file mode 100644
index 0000000000000..7640348c15f08
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
@@ -0,0 +1,42 @@
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/memsetAsync.h"
+
+#include
+
+SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream)
+ : formatterErrors_h(std::move(errors)) {
+ error_d = cudautils::make_device_unique>(stream);
+ data_d = cudautils::make_device_unique(maxFedWords, stream);
+
+ cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream);
+
+ error_h = cudautils::make_host_unique>(stream);
+ GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
+ assert(error_h->empty());
+ assert(error_h->capacity() == static_cast(maxFedWords));
+
+ cudautils::copyAsync(error_d, error_h, stream);
+}
+
+void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
+ cudautils::copyAsync(error_h, error_d, stream);
+}
+
+SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
+ // On one hand size() could be sufficient. On the other hand, if
+ // someone copies the SimpleVector<>, (s)he might expect the data
+ // buffer to actually have space for capacity() elements.
+ auto data = cudautils::make_host_unique(error_h->capacity(), stream);
+
+ // but transfer only the required amount
+ if (not error_h->empty()) {
+ cudautils::copyAsync(data, data_d, error_h->size(), stream);
+ }
+ auto err = *error_h;
+ err.set_data(data.get());
+ return HostDataError(std::move(err), std::move(data));
+}
diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
new file mode 100644
index 0000000000000..a8aab7ab5a4b8
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
@@ -0,0 +1,50 @@
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
+
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+
+SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) {
+ xx_d = cudautils::make_device_unique(maxFedWords, stream);
+ yy_d = cudautils::make_device_unique(maxFedWords, stream);
+ adc_d = cudautils::make_device_unique(maxFedWords, stream);
+ moduleInd_d = cudautils::make_device_unique(maxFedWords, stream);
+ clus_d = cudautils::make_device_unique(maxFedWords, stream);
+
+ pdigi_d = cudautils::make_device_unique(maxFedWords, stream);
+ rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream);
+
+ auto view = cudautils::make_host_unique(stream);
+ view->xx_ = xx_d.get();
+ view->yy_ = yy_d.get();
+ view->adc_ = adc_d.get();
+ view->moduleInd_ = moduleInd_d.get();
+ view->clus_ = clus_d.get();
+
+ view_d = cudautils::make_device_unique(stream);
+ cudautils::copyAsync(view_d, view, stream);
+}
+
+cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(nDigis(), stream);
+ cudautils::copyAsync(ret, adc_d, nDigis(), stream);
+ return ret;
+}
+
+cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(nDigis(), stream);
+ cudautils::copyAsync(ret, clus_d, nDigis(), stream);
+ return ret;
+}
+
+cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(nDigis(), stream);
+ cudautils::copyAsync(ret, pdigi_d, nDigis(), stream);
+ return ret;
+}
+
+cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(nDigis(), stream);
+ cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream);
+ return ret;
+}
diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h
new file mode 100644
index 0000000000000..41b135640b883
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/src/classes.h
@@ -0,0 +1,9 @@
+#ifndef CUDADataFormats_SiPixelDigi_classes_h
+#define CUDADataFormats_SiPixelDigi_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml
new file mode 100644
index 0000000000000..9d6816ed3b14c
--- /dev/null
+++ b/CUDADataFormats/SiPixelDigi/src/classes_def.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml
new file mode 100644
index 0000000000000..e3f9a0910bbd8
--- /dev/null
+++ b/CUDADataFormats/Track/BuildFile.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
new file mode 100644
index 0000000000000..bd4ec059f6e9c
--- /dev/null
+++ b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
@@ -0,0 +1,74 @@
+#ifndef CUDADataFormatsTrackTrackHeterogeneous_H
+#define CUDADataFormatsTrackTrackHeterogeneous_H
+
+#include "CUDADataFormats/Track/interface/TrajectoryStateSoA.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
+
+#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
+
+namespace trackQuality {
+ enum Quality : uint8_t { bad = 0, dup, loose, strict, tight, highPurity };
+}
+
+template
+class TrackSoAT {
+public:
+ static constexpr int32_t stride() { return S; }
+
+ using Quality = trackQuality::Quality;
+ using hindex_type = uint16_t;
+ using HitContainer = OneToManyAssoc;
+
+ // Always check quality is at least loose!
+ // CUDA does not support enums in __lgc ...
+ eigenSoA::ScalarSoA m_quality;
+ constexpr Quality quality(int32_t i) const { return (Quality)(m_quality(i)); }
+ constexpr Quality &quality(int32_t i) { return (Quality &)(m_quality(i)); }
+ constexpr Quality const *qualityData() const { return (Quality const *)(m_quality.data()); }
+ constexpr Quality *qualityData() { return (Quality *)(m_quality.data()); }
+
+ // this is chi2/ndof as not necessarely all hits are used in the fit
+ eigenSoA::ScalarSoA chi2;
+
+ constexpr int nHits(int i) const { return detIndices.size(i); }
+
+ // State at the Beam spot
+ // phi,tip,1/pt,cotan(theta),zip
+ TrajectoryStateSoA stateAtBS;
+ eigenSoA::ScalarSoA eta;
+ eigenSoA::ScalarSoA pt;
+ constexpr float charge(int32_t i) const { return std::copysign(1.f, stateAtBS.state(i)(2)); }
+ constexpr float phi(int32_t i) const { return stateAtBS.state(i)(0); }
+ constexpr float tip(int32_t i) const { return stateAtBS.state(i)(1); }
+ constexpr float zip(int32_t i) const { return stateAtBS.state(i)(4); }
+
+ // state at the detector of the outermost hit
+ // representation to be decided...
+ // not yet filled on GPU
+ // TrajectoryStateSoA stateAtOuterDet;
+
+ HitContainer hitIndices;
+ HitContainer detIndices;
+
+ // total number of tracks (including those not fitted)
+ uint32_t m_nTracks;
+};
+
+namespace pixelTrack {
+
+#ifdef GPU_SMALL_EVENTS
+ constexpr uint32_t maxNumber() { return 2 * 1024; }
+#else
+ constexpr uint32_t maxNumber() { return 32 * 1024; }
+#endif
+
+ using TrackSoA = TrackSoAT;
+ using TrajectoryState = TrajectoryStateSoA;
+ using HitContainer = TrackSoA::HitContainer;
+ using Quality = trackQuality::Quality;
+
+} // namespace pixelTrack
+
+using PixelTrackHeterogeneous = HeterogeneousSoA;
+
+#endif // CUDADataFormatsTrackTrackSoA_H
diff --git a/CUDADataFormats/Track/interface/TrajectoryStateSoA.h b/CUDADataFormats/Track/interface/TrajectoryStateSoA.h
new file mode 100644
index 0000000000000..7cd2e93fb914e
--- /dev/null
+++ b/CUDADataFormats/Track/interface/TrajectoryStateSoA.h
@@ -0,0 +1,59 @@
+#ifndef CUDADataFormatsTrackTrajectoryStateSOA_H
+#define CUDADataFormatsTrackTrajectoryStateSOA_H
+
+#include
+#include "HeterogeneousCore/CUDAUtilities/interface/eigenSoA.h"
+
+template
+struct TrajectoryStateSoA {
+ using Vector5f = Eigen::Matrix;
+ using Vector15f = Eigen::Matrix;
+
+ using Vector5d = Eigen::Matrix;
+ using Matrix5d = Eigen::Matrix;
+
+ static constexpr int32_t stride() { return S; }
+
+ eigenSoA::MatrixSoA state;
+ eigenSoA::MatrixSoA covariance;
+
+ template
+ __host__ __device__ inline void copyFromCircle(
+ V3 const& cp, M3 const& ccov, V2 const& lp, M2 const& lcov, float b, int32_t i) {
+ state(i) << cp.template cast(), lp.template cast();
+ state(i)(2) *= b;
+ auto cov = covariance(i);
+ cov(0) = ccov(0, 0);
+ cov(1) = ccov(0, 1);
+ cov(2) = b * float(ccov(0, 2));
+ cov(4) = cov(3) = 0;
+ cov(5) = ccov(1, 1);
+ cov(6) = b * float(ccov(1, 2));
+ cov(8) = cov(7) = 0;
+ cov(9) = b * b * float(ccov(2, 2));
+ cov(11) = cov(10) = 0;
+ cov(12) = lcov(0, 0);
+ cov(13) = lcov(0, 1);
+ cov(14) = lcov(1, 1);
+ }
+
+ template
+ __host__ __device__ inline void copyFromDense(V5 const& v, M5 const& cov, int32_t i) {
+ state(i) = v.template cast();
+ for (int j = 0, ind = 0; j < 5; ++j)
+ for (auto k = j; k < 5; ++k)
+ covariance(i)(ind++) = cov(j, k);
+ }
+
+ template
+ __host__ __device__ inline void copyToDense(V5& v, M5& cov, int32_t i) const {
+ v = state(i).template cast();
+ for (int j = 0, ind = 0; j < 5; ++j) {
+ cov(j, j) = covariance(i)(ind++);
+ for (auto k = j + 1; k < 5; ++k)
+ cov(k, j) = cov(j, k) = covariance(i)(ind++);
+ }
+ }
+};
+
+#endif // CUDADataFormatsTrackTrajectoryStateSOA_H
diff --git a/CUDADataFormats/Track/src/classes.h b/CUDADataFormats/Track/src/classes.h
new file mode 100644
index 0000000000000..699e45ede05d4
--- /dev/null
+++ b/CUDADataFormats/Track/src/classes.h
@@ -0,0 +1,10 @@
+#ifndef CUDADataFormats__src_classes_h
+#define CUDADataFormats__src_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/Common/interface/HostProduct.h"
+#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
+#include "CUDADataFormats/Common/interface/ArrayShadow.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/Track/src/classes_def.xml b/CUDADataFormats/Track/src/classes_def.xml
new file mode 100644
index 0000000000000..a4c2e766582dd
--- /dev/null
+++ b/CUDADataFormats/Track/src/classes_def.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml
new file mode 100644
index 0000000000000..598b345d4709d
--- /dev/null
+++ b/CUDADataFormats/Track/test/BuildFile.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp
new file mode 100644
index 0000000000000..d6ff539a642b0
--- /dev/null
+++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp
@@ -0,0 +1 @@
+#include "TrajectoryStateSOA_t.h"
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu
new file mode 100644
index 0000000000000..d6ff539a642b0
--- /dev/null
+++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu
@@ -0,0 +1 @@
+#include "TrajectoryStateSOA_t.h"
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
new file mode 100644
index 0000000000000..03c51c39acdfb
--- /dev/null
+++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
@@ -0,0 +1,75 @@
+#include "CUDADataFormats/Track/interface/TrajectoryStateSoA.h"
+
+using Vector5d = Eigen::Matrix;
+using Matrix5d = Eigen::Matrix;
+
+__host__ __device__ Matrix5d loadCov(Vector5d const& e) {
+ Matrix5d cov;
+ for (int i = 0; i < 5; ++i)
+ cov(i, i) = e(i) * e(i);
+ for (int i = 0; i < 5; ++i) {
+ for (int j = 0; j < i; ++j) {
+ double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j)); // this makes the matrix pos defined
+ cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v;
+ cov(j, i) = cov(i, j);
+ }
+ }
+ return cov;
+}
+
+using TS = TrajectoryStateSoA<128>;
+
+__global__ void testTSSoA(TS* pts, int n) {
+ assert(n <= 128);
+
+ Vector5d par0;
+ par0 << 0.2, 0.1, 3.5, 0.8, 0.1;
+ Vector5d e0;
+ e0 << 0.01, 0.01, 0.035, -0.03, -0.01;
+ auto cov0 = loadCov(e0);
+
+ TS& ts = *pts;
+
+ int first = threadIdx.x + blockIdx.x * blockDim.x;
+
+ for (int i = first; i < n; i += blockDim.x * gridDim.x) {
+ ts.copyFromDense(par0, cov0, i);
+ Vector5d par1;
+ Matrix5d cov1;
+ ts.copyToDense(par1, cov1, i);
+ Vector5d delV = par1 - par0;
+ Matrix5d delM = cov1 - cov0;
+ for (int j = 0; j < 5; ++j) {
+ assert(std::abs(delV(j)) < 1.e-5);
+ for (auto k = j; k < 5; ++k) {
+ assert(cov0(k, j) == cov0(j, k));
+ assert(cov1(k, j) == cov1(j, k));
+ assert(std::abs(delM(k, j)) < 1.e-5);
+ }
+ }
+ }
+}
+
+#ifdef __CUDACC__
+#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#endif
+
+int main() {
+#ifdef __CUDACC__
+ exitSansCUDADevices();
+#endif
+
+ TS ts;
+
+#ifdef __CUDACC__
+ TS* ts_d;
+ cudaCheck(cudaMalloc(&ts_d, sizeof(TS)));
+ testTSSoA<<<1, 64>>>(ts_d, 128);
+ cudaCheck(cudaGetLastError());
+ cudaCheck(cudaMemcpy(&ts, ts_d, sizeof(TS), cudaMemcpyDefault));
+ cudaCheck(cudaDeviceSynchronize());
+#else
+ testTSSoA(&ts, 128);
+#endif
+}
diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml
new file mode 100644
index 0000000000000..8dc569d40b6c4
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h
new file mode 100644
index 0000000000000..f6b715b3e743e
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h
@@ -0,0 +1 @@
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
new file mode 100644
index 0000000000000..aa551f21b4aad
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
@@ -0,0 +1,150 @@
+#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
+#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
+
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h"
+#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
+
+template
+class TrackingRecHit2DHeterogeneous {
+public:
+ template
+ using unique_ptr = typename Traits::template unique_ptr;
+
+ using Hist = TrackingRecHit2DSOAView::Hist;
+
+ TrackingRecHit2DHeterogeneous() = default;
+
+ explicit TrackingRecHit2DHeterogeneous(uint32_t nHits,
+ pixelCPEforGPU::ParamsOnGPU const* cpeParams,
+ uint32_t const* hitsModuleStart,
+ cudaStream_t stream);
+
+ ~TrackingRecHit2DHeterogeneous() = default;
+
+ TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete;
+ TrackingRecHit2DHeterogeneous& operator=(const TrackingRecHit2DHeterogeneous&) = delete;
+ TrackingRecHit2DHeterogeneous(TrackingRecHit2DHeterogeneous&&) = default;
+ TrackingRecHit2DHeterogeneous& operator=(TrackingRecHit2DHeterogeneous&&) = default;
+
+ TrackingRecHit2DSOAView* view() { return m_view.get(); }
+ TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
+
+ auto nHits() const { return m_nHits; }
+
+ auto hitsModuleStart() const { return m_hitsModuleStart; }
+ auto hitsLayerStart() { return m_hitsLayerStart; }
+ auto phiBinner() { return m_hist; }
+ auto iphi() { return m_iphi; }
+
+ // only the local coord and detector index
+ cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const;
+
+private:
+ static constexpr uint32_t n16 = 4;
+ static constexpr uint32_t n32 = 9;
+ static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious
+
+ unique_ptr m_store16; //!
+ unique_ptr m_store32; //!
+
+ unique_ptr m_HistStore; //!
+ unique_ptr m_AverageGeometryStore; //!
+
+ unique_ptr m_view; //!
+
+ uint32_t m_nHits;
+
+ uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU!
+
+ // needed as kernel params...
+ Hist* m_hist;
+ uint32_t* m_hitsLayerStart;
+ int16_t* m_iphi;
+};
+
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+template
+TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits,
+ pixelCPEforGPU::ParamsOnGPU const* cpeParams,
+ uint32_t const* hitsModuleStart,
+ cudaStream_t stream)
+ : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) {
+ auto view = Traits::template make_host_unique(stream);
+
+ view->m_nHits = nHits;
+ m_view = Traits::template make_device_unique(stream);
+ m_AverageGeometryStore = Traits::template make_device_unique(stream);
+ view->m_averageGeometry = m_AverageGeometryStore.get();
+ view->m_cpeParams = cpeParams;
+ view->m_hitsModuleStart = hitsModuleStart;
+
+ // if empy do not bother
+ if (0 == nHits) {
+ if
+#ifndef __CUDACC__
+ constexpr
+#endif
+ (std::is_same::value) {
+ cudautils::copyAsync(m_view, view, stream);
+ } else {
+ m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
+ }
+ return;
+ }
+
+ // the single arrays are not 128 bit alligned...
+ // the hits are actually accessed in order only in building
+ // if ordering is relevant they may have to be stored phi-ordered by layer or so
+ // this will break 1to1 correspondence with cluster and module locality
+ // so unless proven VERY inefficient we keep it ordered as generated
+ m_store16 = Traits::template make_device_unique(nHits * n16, stream);
+ m_store32 = Traits::template make_device_unique(nHits * n32 + 11, stream);
+ m_HistStore = Traits::template make_device_unique(stream);
+
+ auto get16 = [&](int i) { return m_store16.get() + i * nHits; };
+ auto get32 = [&](int i) { return m_store32.get() + i * nHits; };
+
+ // copy all the pointers
+ m_hist = view->m_hist = m_HistStore.get();
+
+ view->m_xl = get32(0);
+ view->m_yl = get32(1);
+ view->m_xerr = get32(2);
+ view->m_yerr = get32(3);
+
+ view->m_xg = get32(4);
+ view->m_yg = get32(5);
+ view->m_zg = get32(6);
+ view->m_rg = get32(7);
+
+ m_iphi = view->m_iphi = reinterpret_cast(get16(0));
+
+ view->m_charge = reinterpret_cast(get32(8));
+ view->m_xsize = reinterpret_cast(get16(2));
+ view->m_ysize = reinterpret_cast(get16(3));
+ view->m_detInd = get16(1);
+
+ m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32));
+
+ // transfer view
+ if
+#ifndef __CUDACC__
+ constexpr
+#endif
+ (std::is_same::value) {
+ cudautils::copyAsync(m_view, view, stream);
+ } else {
+ m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
+ }
+}
+
+using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous;
+using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous;
+using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous;
+using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous;
+
+#endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h
new file mode 100644
index 0000000000000..8e6d99e81238a
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h
@@ -0,0 +1,100 @@
+#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h
+#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DSOAView_h
+
+#include
+
+#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h"
+
+namespace pixelCPEforGPU {
+ struct ParamsOnGPU;
+}
+
+class TrackingRecHit2DSOAView {
+public:
+ static constexpr uint32_t maxHits() { return gpuClustering::MaxNumClusters; }
+ using hindex_type = uint16_t; // if above is <=2^16
+
+ using Hist = HistoContainer;
+
+ using AverageGeometry = phase1PixelTopology::AverageGeometry;
+
+ template
+ friend class TrackingRecHit2DHeterogeneous;
+
+ __device__ __forceinline__ uint32_t nHits() const { return m_nHits; }
+
+ __device__ __forceinline__ float& xLocal(int i) { return m_xl[i]; }
+ __device__ __forceinline__ float xLocal(int i) const { return __ldg(m_xl + i); }
+ __device__ __forceinline__ float& yLocal(int i) { return m_yl[i]; }
+ __device__ __forceinline__ float yLocal(int i) const { return __ldg(m_yl + i); }
+
+ __device__ __forceinline__ float& xerrLocal(int i) { return m_xerr[i]; }
+ __device__ __forceinline__ float xerrLocal(int i) const { return __ldg(m_xerr + i); }
+ __device__ __forceinline__ float& yerrLocal(int i) { return m_yerr[i]; }
+ __device__ __forceinline__ float yerrLocal(int i) const { return __ldg(m_yerr + i); }
+
+ __device__ __forceinline__ float& xGlobal(int i) { return m_xg[i]; }
+ __device__ __forceinline__ float xGlobal(int i) const { return __ldg(m_xg + i); }
+ __device__ __forceinline__ float& yGlobal(int i) { return m_yg[i]; }
+ __device__ __forceinline__ float yGlobal(int i) const { return __ldg(m_yg + i); }
+ __device__ __forceinline__ float& zGlobal(int i) { return m_zg[i]; }
+ __device__ __forceinline__ float zGlobal(int i) const { return __ldg(m_zg + i); }
+ __device__ __forceinline__ float& rGlobal(int i) { return m_rg[i]; }
+ __device__ __forceinline__ float rGlobal(int i) const { return __ldg(m_rg + i); }
+
+ __device__ __forceinline__ int16_t& iphi(int i) { return m_iphi[i]; }
+ __device__ __forceinline__ int16_t iphi(int i) const { return __ldg(m_iphi + i); }
+
+ __device__ __forceinline__ int32_t& charge(int i) { return m_charge[i]; }
+ __device__ __forceinline__ int32_t charge(int i) const { return __ldg(m_charge + i); }
+ __device__ __forceinline__ int16_t& clusterSizeX(int i) { return m_xsize[i]; }
+ __device__ __forceinline__ int16_t clusterSizeX(int i) const { return __ldg(m_xsize + i); }
+ __device__ __forceinline__ int16_t& clusterSizeY(int i) { return m_ysize[i]; }
+ __device__ __forceinline__ int16_t clusterSizeY(int i) const { return __ldg(m_ysize + i); }
+ __device__ __forceinline__ uint16_t& detectorIndex(int i) { return m_detInd[i]; }
+ __device__ __forceinline__ uint16_t detectorIndex(int i) const { return __ldg(m_detInd + i); }
+
+ __device__ __forceinline__ pixelCPEforGPU::ParamsOnGPU const& cpeParams() const { return *m_cpeParams; }
+
+ __device__ __forceinline__ uint32_t hitsModuleStart(int i) const { return __ldg(m_hitsModuleStart + i); }
+
+ __device__ __forceinline__ uint32_t* hitsLayerStart() { return m_hitsLayerStart; }
+ __device__ __forceinline__ uint32_t const* hitsLayerStart() const { return m_hitsLayerStart; }
+
+ __device__ __forceinline__ Hist& phiBinner() { return *m_hist; }
+ __device__ __forceinline__ Hist const& phiBinner() const { return *m_hist; }
+
+ __device__ __forceinline__ AverageGeometry& averageGeometry() { return *m_averageGeometry; }
+ __device__ __forceinline__ AverageGeometry const& averageGeometry() const { return *m_averageGeometry; }
+
+private:
+ // local coord
+ float *m_xl, *m_yl;
+ float *m_xerr, *m_yerr;
+
+ // global coord
+ float *m_xg, *m_yg, *m_zg, *m_rg;
+ int16_t* m_iphi;
+
+ // cluster properties
+ int32_t* m_charge;
+ int16_t* m_xsize;
+ int16_t* m_ysize;
+ uint16_t* m_detInd;
+
+ // supporting objects
+ AverageGeometry* m_averageGeometry; // owned (corrected for beam spot: not sure where to host it otherwise)
+ pixelCPEforGPU::ParamsOnGPU const* m_cpeParams; // forwarded from setup, NOT owned
+ uint32_t const* m_hitsModuleStart; // forwarded from clusters
+
+ uint32_t* m_hitsLayerStart;
+
+ Hist* m_hist;
+
+ uint32_t m_nHits;
+};
+
+#endif
diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc
new file mode 100644
index 0000000000000..e6f223bfec4e3
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc
@@ -0,0 +1,19 @@
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+template <>
+cudautils::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(4 * nHits(), stream);
+ cudautils::copyAsync(ret, m_store32, 4 * nHits(), stream);
+ return ret;
+}
+
+template <>
+cudautils::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const {
+ auto ret = cudautils::make_host_unique(2001, stream);
+ cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, 4 * 2001, cudaMemcpyDefault, stream));
+ return ret;
+}
diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h
new file mode 100644
index 0000000000000..90cfd0945d76e
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/src/classes.h
@@ -0,0 +1,9 @@
+#ifndef CUDADataFormats_SiPixelCluster_src_classes_h
+#define CUDADataFormats_SiPixelCluster_src_classes_h
+
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "CUDADataFormats/Common/interface/HostProduct.h"
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif // CUDADataFormats_SiPixelCluster_src_classes_h
diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml
new file mode 100644
index 0000000000000..4e8325ddce87e
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/TrackingRecHit/test/BuildFile.xml b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml
new file mode 100644
index 0000000000000..74f2818790d0f
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/test/BuildFile.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp
new file mode 100644
index 0000000000000..42be4bc6991e1
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp
@@ -0,0 +1,29 @@
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+namespace testTrackingRecHit2D {
+
+ void runKernels(TrackingRecHit2DSOAView* hits);
+
+}
+
+int main() {
+ exitSansCUDADevices();
+
+ cudaStream_t stream;
+ cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
+
+ // inner scope to deallocate memory before destroying the stream
+ {
+ auto nHits = 200;
+ TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream);
+
+ testTrackingRecHit2D::runKernels(tkhit.view());
+ }
+
+ cudaCheck(cudaStreamDestroy(stream));
+
+ return 0;
+}
diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu
new file mode 100644
index 0000000000000..6b55f8a8f98c5
--- /dev/null
+++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cu
@@ -0,0 +1,31 @@
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
+
+namespace testTrackingRecHit2D {
+
+ __global__ void fill(TrackingRecHit2DSOAView* phits) {
+ assert(phits);
+ auto& hits = *phits;
+ assert(hits.nHits() == 200);
+
+ int i = threadIdx.x;
+ if (i > 200)
+ return;
+ }
+
+ __global__ void verify(TrackingRecHit2DSOAView const* phits) {
+ assert(phits);
+ auto const& hits = *phits;
+ assert(hits.nHits() == 200);
+
+ int i = threadIdx.x;
+ if (i > 200)
+ return;
+ }
+
+ void runKernels(TrackingRecHit2DSOAView* hits) {
+ assert(hits);
+ fill<<<1, 1024>>>(hits);
+ verify<<<1, 1024>>>(hits);
+ }
+
+} // namespace testTrackingRecHit2D
diff --git a/CUDADataFormats/Vertex/BuildFile.xml b/CUDADataFormats/Vertex/BuildFile.xml
new file mode 100644
index 0000000000000..e3f9a0910bbd8
--- /dev/null
+++ b/CUDADataFormats/Vertex/BuildFile.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h b/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h
new file mode 100644
index 0000000000000..d12ed5f3d98de
--- /dev/null
+++ b/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h
@@ -0,0 +1,14 @@
+#ifndef CUDADataFormatsVertexZVertexHeterogeneous_H
+#define CUDADataFormatsVertexZVertexHeterogeneous_H
+
+#include "CUDADataFormats/Vertex/interface/ZVertexSoA.h"
+#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
+#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
+
+using ZVertexHeterogeneous = HeterogeneousSoA;
+#ifndef __CUDACC__
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+using ZVertexCUDAProduct = CUDAProduct;
+#endif
+
+#endif
diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoA.h b/CUDADataFormats/Vertex/interface/ZVertexSoA.h
new file mode 100644
index 0000000000000..cd1f8aea4e340
--- /dev/null
+++ b/CUDADataFormats/Vertex/interface/ZVertexSoA.h
@@ -0,0 +1,26 @@
+#ifndef CUDADataFormatsVertexZVertexSoA_H
+#define CUDADataFormatsVertexZVertexSoA_H
+
+#include
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+
+// SOA for vertices
+// These vertices are clusterized and fitted only along the beam line (z)
+// to obtain their global coordinate the beam spot position shall be added (eventually correcting for the beam angle as well)
+struct ZVertexSoA {
+ static constexpr uint32_t MAXTRACKS = 32 * 1024;
+ static constexpr uint32_t MAXVTX = 1024;
+
+ int16_t idv[MAXTRACKS]; // vertex index for each associated (original) track (-1 == not associate)
+ float zv[MAXVTX]; // output z-posistion of found vertices
+ float wv[MAXVTX]; // output weight (1/error^2) on the above
+ float chi2[MAXVTX]; // vertices chi2
+ float ptv2[MAXVTX]; // vertices pt^2
+ int32_t ndof[MAXVTX]; // vertices number of dof (reused as workspace for the number of nearest neighbours)
+ uint16_t sortInd[MAXVTX]; // sorted index (by pt2) ascending
+ uint32_t nvFinal; // the number of vertices
+
+ __host__ __device__ void init() { nvFinal = 0; }
+};
+
+#endif // CUDADataFormatsVertexZVertexSoA.H
diff --git a/CUDADataFormats/Vertex/src/classes.h b/CUDADataFormats/Vertex/src/classes.h
new file mode 100644
index 0000000000000..f1144d1e3014e
--- /dev/null
+++ b/CUDADataFormats/Vertex/src/classes.h
@@ -0,0 +1,8 @@
+#ifndef CUDADataFormats__src_classes_h
+#define CUDADataFormats__src_classes_h
+
+#include "CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h"
+#include "CUDADataFormats/Common/interface/CUDAProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+
+#endif
diff --git a/CUDADataFormats/Vertex/src/classes_def.xml b/CUDADataFormats/Vertex/src/classes_def.xml
new file mode 100644
index 0000000000000..c43814eb03def
--- /dev/null
+++ b/CUDADataFormats/Vertex/src/classes_def.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h
new file mode 100644
index 0000000000000..56301421f325c
--- /dev/null
+++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h
@@ -0,0 +1,17 @@
+#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h
+#define CalibTracker_Records_SiPixelGainCalibrationForHLTGPURcd_h
+
+#include "FWCore/Framework/interface/EventSetupRecordImplementation.h"
+#include "FWCore/Framework/interface/DependentRecordImplementation.h"
+
+#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h"
+#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"
+
+#include "boost/mpl/vector.hpp"
+
+class SiPixelGainCalibrationForHLTGPURcd
+ : public edm::eventsetup::DependentRecordImplementation<
+ SiPixelGainCalibrationForHLTGPURcd,
+ boost::mpl::vector > {};
+
+#endif
diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc
new file mode 100644
index 0000000000000..e6020eca80b1f
--- /dev/null
+++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTGPURcd.cc
@@ -0,0 +1,5 @@
+#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h"
+#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h"
+#include "FWCore/Utilities/interface/typelookup.h"
+
+EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTGPURcd);
diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml
index e9d22b32f0afb..02a36e17ed732 100644
--- a/CalibTracker/SiPixelESProducers/BuildFile.xml
+++ b/CalibTracker/SiPixelESProducers/BuildFile.xml
@@ -7,7 +7,9 @@
+
+
diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
new file mode 100644
index 0000000000000..8bfefee5c3387
--- /dev/null
+++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
@@ -0,0 +1,32 @@
+#ifndef CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
+#define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
+
+#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
+#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
+
+class SiPixelGainCalibrationForHLT;
+class SiPixelGainForHLTonGPU;
+struct SiPixelGainForHLTonGPU_DecodingStructure;
+class TrackerGeometry;
+
+class SiPixelGainCalibrationForHLTGPU {
+public:
+ explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom);
+ ~SiPixelGainCalibrationForHLTGPU();
+
+ const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const;
+ const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; }
+ const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; }
+
+private:
+ const SiPixelGainCalibrationForHLT *gains_ = nullptr;
+ SiPixelGainForHLTonGPU *gainForHLTonHost_ = nullptr;
+ struct GPUData {
+ ~GPUData();
+ SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr;
+ SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr;
+ };
+ CUDAESProduct gpuData_;
+};
+
+#endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
index 44db9d9ba0582..57bf68a1b7518 100644
--- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
+++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
@@ -6,6 +6,8 @@
+
+
diff --git a/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc
new file mode 100644
index 0000000000000..bf8a0b2c5a75f
--- /dev/null
+++ b/CalibTracker/SiPixelESProducers/plugins/SiPixelGainCalibrationForHLTGPUESProducer.cc
@@ -0,0 +1,47 @@
+#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
+#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h"
+#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
+#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h"
+#include "FWCore/Framework/interface/ESProducer.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/ESHandle.h"
+#include "FWCore/Framework/interface/ModuleFactory.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
+#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"
+
+#include
+
+class SiPixelGainCalibrationForHLTGPUESProducer : public edm::ESProducer {
+public:
+ explicit SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig);
+ std::unique_ptr produce(const SiPixelGainCalibrationForHLTGPURcd& iRecord);
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+private:
+ edm::ESGetToken gainsToken_;
+ edm::ESGetToken geometryToken_;
+};
+
+SiPixelGainCalibrationForHLTGPUESProducer::SiPixelGainCalibrationForHLTGPUESProducer(const edm::ParameterSet& iConfig) {
+ setWhatProduced(this).setConsumes(gainsToken_).setConsumes(geometryToken_);
+}
+
+void SiPixelGainCalibrationForHLTGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ descriptions.add("siPixelGainCalibrationForHLTGPU", desc);
+}
+
+std::unique_ptr SiPixelGainCalibrationForHLTGPUESProducer::produce(
+ const SiPixelGainCalibrationForHLTGPURcd& iRecord) {
+ auto gains = iRecord.getHandle(gainsToken_);
+ auto geom = iRecord.getHandle(geometryToken_);
+ return std::make_unique(*gains, *geom);
+}
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Utilities/interface/typelookup.h"
+#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h"
+
+DEFINE_FWK_EVENTSETUP_MODULE(SiPixelGainCalibrationForHLTGPUESProducer);
diff --git a/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc
new file mode 100644
index 0000000000000..80932fb468f71
--- /dev/null
+++ b/CalibTracker/SiPixelESProducers/src/ES_SiPixelGainCalibrationForHLTGPU.cc
@@ -0,0 +1,4 @@
+#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
+#include "FWCore/Utilities/interface/typelookup.h"
+
+TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTGPU);
diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
new file mode 100644
index 0000000000000..e4f278c28ec69
--- /dev/null
+++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
@@ -0,0 +1,104 @@
+#include
+
+#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h"
+#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
+#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h"
+#include "Geometry/CommonDetUnit/interface/GeomDetType.h"
+#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+SiPixelGainCalibrationForHLTGPU::SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT& gains,
+ const TrackerGeometry& geom)
+ : gains_(&gains) {
+ // bizzarre logic (looking for fist strip-det) don't ask
+ auto const& dus = geom.detUnits();
+ unsigned m_detectors = dus.size();
+ for (unsigned int i = 1; i < 7; ++i) {
+ if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) != dus.size() &&
+ dus[geom.offsetDU(GeomDetEnumerators::tkDetEnum[i])]->type().isTrackerStrip()) {
+ if (geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]) < m_detectors)
+ m_detectors = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]);
+ }
+ }
+
+ /*
+ std::cout << "caching calibs for " << m_detectors << " pixel detectors of size " << gains.data().size() << std::endl;
+ std::cout << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(SiPixelGainForHLTonGPU::DecodingStructure) << std::endl;
+ */
+
+ cudaCheck(cudaMallocHost((void**)&gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU)));
+ gainForHLTonHost_->v_pedestals =
+ (SiPixelGainForHLTonGPU_DecodingStructure*)this->gains_->data().data(); // so it can be used on CPU as well...
+
+ // do not read back from the (possibly write-combined) memory buffer
+ auto minPed = gains.getPedLow();
+ auto maxPed = gains.getPedHigh();
+ auto minGain = gains.getGainLow();
+ auto maxGain = gains.getGainHigh();
+ auto nBinsToUseForEncoding = 253;
+
+ // we will simplify later (not everything is needed....)
+ gainForHLTonHost_->minPed_ = minPed;
+ gainForHLTonHost_->maxPed_ = maxPed;
+ gainForHLTonHost_->minGain_ = minGain;
+ gainForHLTonHost_->maxGain_ = maxGain;
+
+ gainForHLTonHost_->numberOfRowsAveragedOver_ = 80;
+ gainForHLTonHost_->nBinsToUseForEncoding_ = nBinsToUseForEncoding;
+ gainForHLTonHost_->deadFlag_ = 255;
+ gainForHLTonHost_->noisyFlag_ = 254;
+
+ gainForHLTonHost_->pedPrecision = static_cast(maxPed - minPed) / nBinsToUseForEncoding;
+ gainForHLTonHost_->gainPrecision = static_cast(maxGain - minGain) / nBinsToUseForEncoding;
+
+ /*
+ std::cout << "precisions g " << gainForHLTonHost_->pedPrecision << ' ' << gainForHLTonHost_->gainPrecision << std::endl;
+ */
+
+ // fill the index map
+ auto const& ind = gains.getIndexes();
+ /*
+ std::cout << ind.size() << " " << m_detectors << std::endl;
+ */
+
+ for (auto i = 0U; i < m_detectors; ++i) {
+ auto p = std::lower_bound(
+ ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering());
+ assert(p != ind.end() && p->detid == dus[i]->geographicalId());
+ assert(p->iend <= gains.data().size());
+ assert(p->iend >= p->ibegin);
+ assert(0 == p->ibegin % 2);
+ assert(0 == p->iend % 2);
+ assert(p->ibegin != p->iend);
+ assert(p->ncols > 0);
+ gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(p->ibegin, p->iend), p->ncols);
+ // if (ind[i].detid!=dus[i]->geographicalId()) std::cout << ind[i].detid<<"!="<geographicalId() << std::endl;
+ // gainForHLTonHost_->rangeAndCols[i] = std::make_pair(SiPixelGainForHLTonGPU::Range(ind[i].ibegin,ind[i].iend), ind[i].ncols);
+ }
+}
+
+SiPixelGainCalibrationForHLTGPU::~SiPixelGainCalibrationForHLTGPU() { cudaCheck(cudaFreeHost(gainForHLTonHost_)); }
+
+SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
+ cudaCheck(cudaFree(gainForHLTonGPU));
+ cudaCheck(cudaFree(gainDataOnGPU));
+}
+
+const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const {
+ const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) {
+ cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
+ cudaCheck(cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size()));
+ // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
+ cudaCheck(cudaMemcpyAsync(
+ data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream));
+
+ cudaCheck(cudaMemcpyAsync(
+ data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream));
+ cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals),
+ &(data.gainDataOnGPU),
+ sizeof(SiPixelGainForHLTonGPU_DecodingStructure*),
+ cudaMemcpyDefault,
+ stream));
+ });
+ return data.gainForHLTonGPU;
+}
diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h
new file mode 100644
index 0000000000000..8ce3924e54609
--- /dev/null
+++ b/CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h
@@ -0,0 +1,63 @@
+#ifndef CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h
+#define CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h
+
+#include
+#include
+#include
+
+#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
+
+struct SiPixelGainForHLTonGPU_DecodingStructure {
+ uint8_t gain;
+ uint8_t ped;
+};
+
+// copy of SiPixelGainCalibrationForHLT
+class SiPixelGainForHLTonGPU {
+public:
+ using DecodingStructure = SiPixelGainForHLTonGPU_DecodingStructure;
+
+ using Range = std::pair;
+
+ inline __host__ __device__ std::pair getPedAndGain(
+ uint32_t moduleInd, int col, int row, bool& isDeadColumn, bool& isNoisyColumn) const {
+ auto range = rangeAndCols[moduleInd].first;
+ auto nCols = rangeAndCols[moduleInd].second;
+
+ // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X
+ unsigned int lengthOfColumnData = (range.second - range.first) / nCols;
+ unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block
+ unsigned int numberOfDataBlocksToSkip = row / numberOfRowsAveragedOver_;
+
+ auto offset = range.first + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip;
+
+ assert(offset < range.second);
+ assert(offset < 3088384);
+ assert(0 == offset % 2);
+
+ DecodingStructure const* __restrict__ lp = v_pedestals;
+ auto s = lp[offset / 2];
+
+ isDeadColumn = (s.ped & 0xFF) == deadFlag_;
+ isNoisyColumn = (s.ped & 0xFF) == noisyFlag_;
+
+ return std::make_pair(decodePed(s.ped & 0xFF), decodeGain(s.gain & 0xFF));
+ }
+
+ constexpr float decodeGain(unsigned int gain) const { return gain * gainPrecision + minGain_; }
+ constexpr float decodePed(unsigned int ped) const { return ped * pedPrecision + minPed_; }
+
+ DecodingStructure* v_pedestals;
+ std::pair rangeAndCols[2000];
+
+ float minPed_, maxPed_, minGain_, maxGain_;
+
+ float pedPrecision, gainPrecision;
+
+ unsigned int numberOfRowsAveragedOver_; // this is 80!!!!
+ unsigned int nBinsToUseForEncoding_;
+ unsigned int deadFlag_;
+ unsigned int noisyFlag_;
+};
+
+#endif // CondFormats_SiPixelObjects_SiPixelGainForHLTonGPU_h
diff --git a/Configuration/Applications/python/ConfigBuilder.py b/Configuration/Applications/python/ConfigBuilder.py
index 608651f121385..4ae4b2df7a1d7 100644
--- a/Configuration/Applications/python/ConfigBuilder.py
+++ b/Configuration/Applications/python/ConfigBuilder.py
@@ -921,6 +921,8 @@ def define_Configs(self):
self.loadAndRemember('SimGeneral.HepPDTESSource.'+self._options.particleTable+'_cfi')
self.loadAndRemember('FWCore/MessageService/MessageLogger_cfi')
+ # Eventually replace with some more generic file to load
+ self.loadAndRemember('HeterogeneousCore/CUDAServices/CUDAService_cfi')
self.ALCADefaultCFF="Configuration/StandardSequences/AlCaRecoStreams_cff"
self.GENDefaultCFF="Configuration/StandardSequences/Generator_cff"
diff --git a/Configuration/ProcessModifiers/python/gpu_cff.py b/Configuration/ProcessModifiers/python/gpu_cff.py
new file mode 100644
index 0000000000000..993f71804fbc1
--- /dev/null
+++ b/Configuration/ProcessModifiers/python/gpu_cff.py
@@ -0,0 +1,5 @@
+import FWCore.ParameterSet.Config as cms
+
+# This modifier is for replacing CPU modules with GPU counterparts
+
+gpu = cms.Modifier()
diff --git a/Configuration/ProcessModifiers/python/pixelNtupleFit_cff.py b/Configuration/ProcessModifiers/python/pixelNtupleFit_cff.py
new file mode 100644
index 0000000000000..db8a2ac229a02
--- /dev/null
+++ b/Configuration/ProcessModifiers/python/pixelNtupleFit_cff.py
@@ -0,0 +1,5 @@
+import FWCore.ParameterSet.Config as cms
+
+# This modifier is for replacing the default pixel track "fitting" with eihter Riemann or BrokenLine fit
+
+pixelNtupleFit = cms.Modifier()
diff --git a/Configuration/PyReleaseValidation/python/relval_2017.py b/Configuration/PyReleaseValidation/python/relval_2017.py
index 68bb0323078a1..7917a21310dd6 100644
--- a/Configuration/PyReleaseValidation/python/relval_2017.py
+++ b/Configuration/PyReleaseValidation/python/relval_2017.py
@@ -20,10 +20,18 @@
# 2018 (ele guns 10, 35, 1000; pho guns 10, 35; mu guns 1, 10, 100, 1000, QCD 3TeV, QCD Flat)
# 2018 (ZMM, TTbar, ZEE, MinBias, TTbar PU, ZEE PU, TTbar design)
# (TTbar trackingOnly, pixelTrackingOnly)
-# he collapse: TTbar, TTbar PU, TTbar design
-# ParkingBPH: TTbar
+# (HE collapse: TTbar, TTbar PU, TTbar design)
+# (ParkingBPH: TTbar)
+# (Patatrack pixel-only: ZMM - on CPU, on GPU, both, auto)
+# (Patatrack pixel-only: TTbar - on CPU, on GPU, both, auto)
+# (Patatrack ECAL-only: TTbar - on CPU, on GPU, both, auto)
+# (Patatrack HCAL-only: TTbar - on CPU, on GPU, both, auto)
# 2021 (ZMM, TTbar, ZEE, MinBias, TTbar PU, TTbar PU premix, ZEE PU, TTbar design)
# (TTbar trackingMkFit)
+# (Patatrack pixel-only: ZMM - on CPU, on GPU, both, auto)
+# (Patatrack pixel-only: TTbar - on CPU, on GPU, both, auto)
+# (Patatrack ECAL-only: TTbar - on CPU, on GPU, both, auto)
+# (Patatrack HCAL-only: TTbar - on CPU, on GPU, both, auto)
# 2023 (TTbar, TTbar PU, TTbar PU premix)
# 2024 (TTbar, TTbar PU, TTbar PU premix)
numWFIB = [10001.0,10002.0,10003.0,10004.0,10005.0,10006.0,10007.0,10008.0,10009.0,10059.0,10071.0,
@@ -34,8 +42,16 @@
10824.1,10824.5,
10824.6,11024.6,11224.6,
10824.8,
+ 10842.501,10842.502, # 10842.503,10842.504,
+ 10824.501,10824.502, # 10824.503,10824.504,
+ # 10824.511,10824.512,10824.513,10824.514,
+ # 10824.521,10824.522,10824.523,10824.524,
11650.0,11634.0,11646.0,11640.0,11834.0,11834.99,11846.0,12024.0,
11634.7,
+ 11650.501,11650.502, # 11650.503,11650.504,
+ 11634.501,11634.502, # 11634.503,11634.504,
+ # 11634.511,11634.512,11634.513,11634.514,
+ # 11634.521,11634.522,11634.523,11634.524,
12434.0,12634.0,12634.99,
12834.0,13034.0,13034.99]
for numWF in numWFIB:
diff --git a/Configuration/PyReleaseValidation/python/relval_standard.py b/Configuration/PyReleaseValidation/python/relval_standard.py
index 95b6e0d7a8a52..5afa5be092aa2 100644
--- a/Configuration/PyReleaseValidation/python/relval_standard.py
+++ b/Configuration/PyReleaseValidation/python/relval_standard.py
@@ -399,6 +399,14 @@
workflows[136.895] = ['',['RunDisplacedJet2018D','HLTDR2_2018','RECODR2_2018reHLT_skimDisplacedJet_Prompt','HARVEST2018_Prompt']]
workflows[136.896] = ['',['RunCharmonium2018D','HLTDR2_2018','RECODR2_2018reHLT_skimCharmonium_Prompt','HARVEST2018_Prompt']]
+### run 2018D pixel tracks ###
+workflows[136.8855] = ['',['RunHLTPhy2018D','HLTDR2_2018','RECODR2_2018reHLT_Prompt_pixelTrackingOnly','HARVEST2018_pixelTrackingOnly']]
+workflows[136.885501] = ['',['RunHLTPhy2018D','HLTDR2_2018','RECODR2_2018reHLT_Patatrack_PixelOnlyCPU','HARVEST2018_pixelTrackingOnly']]
+workflows[136.885502] = ['',['RunHLTPhy2018D','HLTDR2_2018','RECODR2_2018reHLT_Patatrack_PixelOnlyGPU','HARVEST2018_pixelTrackingOnly']]
+workflows[136.8885] = ['',['RunJetHT2018D','HLTDR2_2018','RECODR2_2018reHLT_Prompt_pixelTrackingOnly','HARVEST2018_pixelTrackingOnly']]
+workflows[136.888501] = ['',['RunJetHT2018D','HLTDR2_2018','RECODR2_2018reHLT_Patatrack_PixelOnlyCPU','HARVEST2018_pixelTrackingOnly']]
+workflows[136.888502] = ['',['RunJetHT2018D','HLTDR2_2018','RECODR2_2018reHLT_Patatrack_PixelOnlyGPU','HARVEST2018_pixelTrackingOnly']]
+
# multi-run harvesting
workflows[137.8] = ['',['RunEGamma2018C','HLTDR2_2018','RECODR2_2018reHLT_skimEGamma_Offline_L1TEgDQM',
'RunEGamma2018D','HLTDR2_2018','RECODR2_2018reHLT_skimEGamma_Prompt_L1TEgDQM','HARVEST2018_L1TEgDQM_MULTIRUN']]
diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
index 46dae0224829a..8af413eb4bdfa 100644
--- a/Configuration/PyReleaseValidation/python/relval_steps.py
+++ b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -2110,6 +2110,12 @@ def gen2018HiMix(fragment,howMuch):
'--era' :'Run2_2016'
}
+step3_pixelNtupleFit = {
+ '--procModifiers': 'pixelNtupleFit',
+}
+step3_gpu = {
+ '--procModifiers': 'gpu',
+}
step3_trackingLowPU = {
'--era': 'Run2_2016_trackingLowPU'
}
@@ -2243,6 +2249,9 @@ def gen2018HiMix(fragment,howMuch):
steps['RECODR2_2018reHLT_Prompt']=merge([{'--conditions':'auto:run2_data_promptlike'},steps['RECODR2_2018reHLT']])
steps['RECODR2_2018reHLT_ZBPrompt']=merge([{'--conditions':'auto:run2_data_promptlike','-s':'RAW2DIGI,L1Reco,RECO,EI,PAT,ALCA:SiStripCalZeroBias+SiStripCalMinBias+TkAlMinBias+EcalESAlign,DQM:@rerecoZeroBias+@ExtraHLT+@miniAODDQM'},steps['RECODR2_2018reHLT']])
+steps['RECODR2_2018reHLT_Prompt_pixelTrackingOnly']=merge([{'-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,DQM:@pixelTrackingOnlyDQM'},steps['RECODR2_2018reHLT_Prompt']])
+steps['RECODR2_2018reHLT_Patatrack_PixelOnlyCPU']=merge([step3_pixelNtupleFit, steps['RECODR2_2018reHLT_Prompt_pixelTrackingOnly']])
+steps['RECODR2_2018reHLT_Patatrack_PixelOnlyGPU']=merge([step3_gpu, steps['RECODR2_2018reHLT_Prompt_pixelTrackingOnly']])
steps['RECODR2_2018reHLT_Offline']=merge([{'--conditions':'auto:run2_data'},steps['RECODR2_2018reHLT']])
steps['RECODR2_2018reHLT_ZBOffline']=merge([{'--conditions':'auto:run2_data','-s':'RAW2DIGI,L1Reco,RECO,EI,PAT,ALCA:SiStripCalZeroBias+SiStripCalMinBias+TkAlMinBias+EcalESAlign,DQM:@rerecoZeroBias+@ExtraHLT+@miniAODDQM'},steps['RECODR2_2018reHLT']])
steps['RECODR2_2018reHLT_skimEGamma_Prompt_L1TEgDQM']=merge([{'--conditions':'auto:run2_data_promptlike'},steps['RECODR2_2018reHLT_skimEGamma_L1TEgDQM']])
@@ -2579,6 +2588,7 @@ def gen2018HiMix(fragment,howMuch):
steps['HARVEST2018_L1TEgDQM_Prompt'] = merge([ {'-s':'HARVESTING:@standardDQMFakeHLT+@miniAODDQM+@L1TEgamma'}, steps['HARVEST2018_Prompt'] ])
steps['HARVEST2018_L1TMuDQM'] = merge([ {'-s':'HARVESTING:@standardDQMFakeHLT+@miniAODDQM+@L1TMuon'}, steps['HARVEST2018'] ])
steps['HARVEST2018_L1TMuDQM_Prompt'] = merge([ {'-s':'HARVESTING:@standardDQMFakeHLT+@miniAODDQM+@L1TMuon'}, steps['HARVEST2018_Prompt'] ])
+steps['HARVEST2018_pixelTrackingOnly'] = merge([ {'-s':'HARVESTING:@pixelTrackingOnlyDQM'}, steps['HARVEST2018'] ])
steps['HARVEST2018_hBStar'] = merge([ {'--era' : 'Run2_2018_highBetaStar'}, steps['HARVEST2018'] ])
steps['HARVEST2018_HEfail'] = merge([ {'--conditions':'auto:run2_data_promptlike_HEfail'}, steps['HARVEST2018'] ])
steps['HARVEST2018_BadHcalMitig'] = merge([ {'--era' : 'Run2_2018,pf_badHcalMitigation','--conditions':'auto:run2_data_promptlike_HEfail'}, steps['HARVEST2018'] ])
diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
index 1ba1874bad75c..8ecd4285ce005 100644
--- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
+++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
@@ -272,6 +272,72 @@ def condition_(self, fragment, stepList, key, hasHarvest):
'--customise': 'RecoTracker/MkFit/customizeInitialStepToMkFit.customizeInitialStepToMkFit'
}
+# Patatrack workflows
+class UpgradeWorkflowPatatrack(UpgradeWorkflow):
+ def condition(self, fragment, stepList, key, hasHarvest):
+ is_2018_ttbar = ('2018' in key and fragment=="TTbar_13")
+ is_2021_ttbar = ('2021' in key and fragment=="TTbar_14TeV")
+ is_2018_zmumu = ('2018' in key and fragment=="ZMM_13")
+ is_2021_zmumu = ('2021' in key and fragment=="ZMM_14")
+ result = any((is_2018_ttbar, is_2021_ttbar, is_2018_zmumu, is_2021_zmumu)) and hasHarvest and self.condition_(fragment, stepList, key, hasHarvest)
+ if result:
+ # skip ALCA and Nano
+ skipList = [s for s in stepList if (("ALCA" in s) or ("Nano" in s))]
+ for skip in skipList:
+ stepList.remove(skip)
+ return result
+ def condition_(self, fragment, stepList, key, hasHarvest):
+ return True
+
+class UpgradeWorkflowPatatrack_PixelOnlyCPU(UpgradeWorkflowPatatrack):
+ def setup_(self, step, stepName, stepDict, k, properties):
+ if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]])
+ elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'}, stepDict[step][k]])
+ def condition_(self, fragment, stepList, key, hasHarvest):
+ return '2018' in key or '2021' in key
+upgradeWFs['PatatrackPixelOnlyCPU'] = UpgradeWorkflowPatatrack_PixelOnlyCPU(
+ steps = [
+ 'RecoFull',
+ 'HARVESTFull',
+ 'RecoFullGlobal',
+ 'HARVESTFullGlobal',
+ ],
+ PU = [],
+ suffix = 'Patatrack_PixelOnlyCPU',
+ offset = 0.501,
+)
+upgradeWFs['PatatrackPixelOnlyCPU'].step3 = {
+ '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
+ '--datatier': 'GEN-SIM-RECO,DQMIO',
+ '--eventcontent': 'RECOSIM,DQM',
+ '--procModifiers': 'pixelNtupleFit'
+}
+
+class UpgradeWorkflowPatatrack_PixelOnlyGPU(UpgradeWorkflowPatatrack):
+ def setup_(self, step, stepName, stepDict, k, properties):
+ if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]])
+ elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'}, stepDict[step][k]])
+ def condition_(self, fragment, stepList, key, hasHarvest):
+ return '2018' in key or '2021' in key
+upgradeWFs['PatatrackPixelOnlyGPU'] = UpgradeWorkflowPatatrack_PixelOnlyGPU(
+ steps = [
+ 'RecoFull',
+ 'HARVESTFull',
+ 'RecoFullGlobal',
+ 'HARVESTFullGlobal',
+ ],
+ PU = [],
+ suffix = 'Patatrack_PixelOnlyGPU',
+ offset = 0.502,
+)
+upgradeWFs['PatatrackPixelOnlyGPU'].step3 = {
+ '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
+ '--datatier': 'GEN-SIM-RECO,DQMIO',
+ '--eventcontent': 'RECOSIM,DQM',
+ '--procModifiers': 'gpu'
+}
+# end of Patatrack workflows
+
class UpgradeWorkflow_ProdLike(UpgradeWorkflow):
def setup_(self, step, stepName, stepDict, k, properties):
if 'Reco' in step:
diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py
index 5eead9b7c72bb..d6ac829a1bfb8 100644
--- a/Configuration/StandardSequences/python/RawToDigi_cff.py
+++ b/Configuration/StandardSequences/python/RawToDigi_cff.py
@@ -3,7 +3,7 @@
# This object is used to selectively make changes for different running
# scenarios. In this case it makes changes for Run 2.
-from EventFilter.SiPixelRawToDigi.SiPixelRawToDigi_cfi import *
+from EventFilter.SiPixelRawToDigi.siPixelDigis_cff import *
from EventFilter.SiStripRawToDigi.SiStripDigis_cfi import *
@@ -46,7 +46,7 @@
from EventFilter.CTPPSRawToDigi.ctppsRawToDigi_cff import *
RawToDigiTask = cms.Task(L1TRawToDigiTask,
- siPixelDigis,
+ siPixelDigisTask,
siStripDigis,
ecalDigis,
ecalPreshowerDigis,
@@ -61,14 +61,14 @@
)
RawToDigi = cms.Sequence(RawToDigiTask)
-RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigis, siStripDigis])
+RawToDigiTask_noTk = RawToDigiTask.copyAndExclude([siPixelDigisTask, siStripDigis])
RawToDigi_noTk = cms.Sequence(RawToDigiTask_noTk)
-RawToDigiTask_pixelOnly = cms.Task(siPixelDigis)
+RawToDigiTask_pixelOnly = cms.Task(siPixelDigisTask, scalersRawToDigi)
RawToDigi_pixelOnly = cms.Sequence(RawToDigiTask_pixelOnly)
scalersRawToDigi.scalersInputTag = 'rawDataCollector'
-siPixelDigis.InputLabel = 'rawDataCollector'
+siPixelDigis.cpu.InputLabel = 'rawDataCollector'
#false by default anyways ecalDigis.DoRegional = False
ecalDigis.InputLabel = 'rawDataCollector'
ecalPreshowerDigis.sourceTag = 'rawDataCollector'
diff --git a/Configuration/StandardSequences/python/Reconstruction_cff.py b/Configuration/StandardSequences/python/Reconstruction_cff.py
index 4b606b213d1cb..52bfc33d5a91e 100644
--- a/Configuration/StandardSequences/python/Reconstruction_cff.py
+++ b/Configuration/StandardSequences/python/Reconstruction_cff.py
@@ -16,7 +16,7 @@
siPixelClusterShapeCachePreSplitting = siPixelClusterShapeCache.clone(
src = 'siPixelClustersPreSplitting'
- )
+)
# Global reco
from RecoEcal.Configuration.RecoEcal_cff import *
@@ -197,9 +197,9 @@
reconstruction_trackingOnly = cms.Sequence(localreco*globalreco_tracking)
reconstruction_pixelTrackingOnly = cms.Sequence(
pixeltrackerlocalreco*
- offlineBeamSpot*
siPixelClusterShapeCachePreSplitting*
- recopixelvertexing
+ recopixelvertexing,
+ offlineBeamSpotTask
)
#need a fully expanded sequence copy
diff --git a/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py b/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py
index 15ceaf93ed20a..cff85e56d94f7 100644
--- a/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py
+++ b/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py
@@ -21,7 +21,10 @@ def _layers(suffix, quant, histoPostfix):
]
pixelTrackingEffFromHitPattern = DQMEDHarvester("DQMGenericClient",
- subDirs = cms.untracked.vstring("Tracking/PixelTrackParameters/HitEffFromHitPattern*"),
+ subDirs = cms.untracked.vstring("Tracking/PixelTrackParameters/pixelTracks/HitEffFromHitPattern*",
+ "Tracking/PixelTrackParameters/dzPV0p1/HitEffFromHitPattern*",
+ "Tracking/PixelTrackParameters/pt_0to1/HitEffFromHitPattern*",
+ "Tracking/PixelTrackParameters/pt_1/HitEffFromHitPattern*"),
efficiency = cms.vstring(
_layers("PU", "GoodNumVertices", "") +
_layers("BX", "BX", "VsBX") +
diff --git a/DQM/TrackingMonitorClient/python/pixelVertexResolutionClient_cfi.py b/DQM/TrackingMonitorClient/python/pixelVertexResolutionClient_cfi.py
new file mode 100644
index 0000000000000..2558e88d26012
--- /dev/null
+++ b/DQM/TrackingMonitorClient/python/pixelVertexResolutionClient_cfi.py
@@ -0,0 +1,7 @@
+import FWCore.ParameterSet.Config as cms
+
+from DQM.TrackingMonitorClient.primaryVertexResolutionClient_cfi import primaryVertexResolutionClient as _primaryVertexResolutionClient
+
+pixelVertexResolutionClient = _primaryVertexResolutionClient.clone(
+ subDirs = ["OfflinePixelPV/Resolution/*"]
+)
diff --git a/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py b/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py
index a075f671f05ce..d5deba78b46c8 100644
--- a/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py
+++ b/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py
@@ -1,23 +1,77 @@
import FWCore.ParameterSet.Config as cms
import DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi
-pixelTracksMonitoring = DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi.TrackerCollisionTrackMon.clone()
-pixelTracksMonitoring.FolderName = 'Tracking/PixelTrackParameters'
-pixelTracksMonitoring.TrackProducer = 'pixelTracks'
-pixelTracksMonitoring.allTrackProducer = 'pixelTracks'
-pixelTracksMonitoring.beamSpot = 'offlineBeamSpot'
-pixelTracksMonitoring.primaryVertex = 'pixelVertices'
-pixelTracksMonitoring.pvNDOF = 1
-pixelTracksMonitoring.doAllPlots = True
-pixelTracksMonitoring.doLumiAnalysis = True
-pixelTracksMonitoring.doProfilesVsLS = True
-pixelTracksMonitoring.doDCAPlots = True
-pixelTracksMonitoring.doProfilesVsLS = True
-pixelTracksMonitoring.doPlotsVsGoodPVtx = True
-pixelTracksMonitoring.doEffFromHitPatternVsPU = False
-pixelTracksMonitoring.doEffFromHitPatternVsBX = False
-pixelTracksMonitoring.doEffFromHitPatternVsLUMI = False
-pixelTracksMonitoring.doPlotsVsGoodPVtx = True
-pixelTracksMonitoring.doPlotsVsLUMI = True
-pixelTracksMonitoring.doPlotsVsBX = True
+pixelTracksMonitor = DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi.TrackerCollisionTrackMon.clone()
+pixelTracksMonitor.FolderName = 'Tracking/PixelTrackParameters/pixelTracks'
+pixelTracksMonitor.TrackProducer = 'pixelTracks'
+pixelTracksMonitor.allTrackProducer = 'pixelTracks'
+pixelTracksMonitor.beamSpot = 'offlineBeamSpot'
+pixelTracksMonitor.primaryVertex = 'pixelVertices'
+pixelTracksMonitor.pvNDOF = 1
+pixelTracksMonitor.doAllPlots = True
+pixelTracksMonitor.doLumiAnalysis = True
+pixelTracksMonitor.doProfilesVsLS = True
+pixelTracksMonitor.doDCAPlots = True
+pixelTracksMonitor.doProfilesVsLS = True
+pixelTracksMonitor.doPlotsVsGoodPVtx = True
+pixelTracksMonitor.doEffFromHitPatternVsPU = False
+pixelTracksMonitor.doEffFromHitPatternVsBX = False
+pixelTracksMonitor.doEffFromHitPatternVsLUMI = False
+pixelTracksMonitor.doPlotsVsGoodPVtx = True
+pixelTracksMonitor.doPlotsVsLUMI = True
+pixelTracksMonitor.doPlotsVsBX = True
+_trackSelector = cms.EDFilter('TrackSelector',
+ src = cms.InputTag('pixelTracks'),
+ cut = cms.string("")
+)
+
+pixelTracksPt0to1 = _trackSelector.clone(cut = "pt >= 0 & pt < 1 ")
+pixelTracksPt1 = _trackSelector.clone(cut = "pt >= 1 ")
+from DQM.TrackingMonitorSource.TrackCollections2monitor_cff import highPurityPV0p1 as _highPurityPV0p1
+pixelTracksPV0p1 = _highPurityPV0p1.clone(
+ src = "pixelTracks",
+ quality = "",
+ vertexTag = "goodPixelVertices"
+)
+
+pixelTracksMonitorPt0to1 = pixelTracksMonitor.clone(
+ TrackProducer = "pixelTracksPt0to1",
+ FolderName = "Tracking/PixelTrackParameters/pt_0to1"
+)
+pixelTracksMonitorPt1 = pixelTracksMonitor.clone(
+ TrackProducer = "pixelTracksPt1",
+ FolderName = "Tracking/PixelTrackParameters/pt_1"
+)
+pixelTracksMonitorPV0p1 = pixelTracksMonitor.clone(
+ TrackProducer = "pixelTracksPV0p1",
+ FolderName = "Tracking/PixelTrackParameters/dzPV0p1"
+)
+
+
+from CommonTools.ParticleFlow.goodOfflinePrimaryVertices_cfi import goodOfflinePrimaryVertices as _goodOfflinePrimaryVertices
+goodPixelVertices = _goodOfflinePrimaryVertices.clone(
+ src = "pixelVertices",
+)
+
+from DQM.TrackingMonitor.primaryVertexResolution_cfi import primaryVertexResolution as _primaryVertexResolution
+pixelVertexResolution = _primaryVertexResolution.clone(
+ vertexSrc = "goodPixelVertices",
+ rootFolder = "OfflinePixelPV/Resolution",
+)
+
+pixelTracksMonitoringTask = cms.Task(
+ goodPixelVertices,
+ pixelTracksPt0to1,
+ pixelTracksPt1,
+ pixelTracksPV0p1,
+)
+
+pixelTracksMonitoring = cms.Sequence(
+ pixelTracksMonitor +
+ pixelTracksMonitorPt0to1 +
+ pixelTracksMonitorPt1 +
+ pixelTracksMonitorPV0p1 +
+ pixelVertexResolution,
+ pixelTracksMonitoringTask
+)
diff --git a/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py b/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py
index fba049f84295b..a4d8e88aa9a40 100644
--- a/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py
+++ b/DQMOffline/Configuration/python/DQMOffline_SecondStep_cff.py
@@ -117,6 +117,7 @@
from DQM.CTPPS.ctppsDQM_cff import *
from Validation.RecoTau.DQMSequences_cfi import *
from DQM.TrackingMonitorClient.pixelTrackingEffFromHitPattern_cff import *
+from DQM.TrackingMonitorClient.pixelVertexResolutionClient_cfi import *
DQMHarvestDCS = cms.Sequence ( dqmDcsInfoClient )
@@ -175,7 +176,8 @@
DQMHarvestTracking = cms.Sequence( TrackingOfflineDQMClient *
dqmFastTimerServiceClient )
-DQMHarvestPixelTracking = cms.Sequence( pixelTrackingEffFromHitPattern )
+DQMHarvestPixelTracking = cms.Sequence( pixelTrackingEffFromHitPattern *
+ pixelVertexResolutionClient )
DQMHarvestOuterTracker = cms.Sequence(
dqmDcsInfoClient *
diff --git a/DQMOffline/Configuration/python/DQMOffline_cff.py b/DQMOffline/Configuration/python/DQMOffline_cff.py
index d729c97e2c7c8..a54a84f0d04dd 100644
--- a/DQMOffline/Configuration/python/DQMOffline_cff.py
+++ b/DQMOffline/Configuration/python/DQMOffline_cff.py
@@ -138,10 +138,12 @@
#DQMOfflineCommon
from DQM.TrackingMonitorSource.pixelTracksMonitoring_cff import *
+from DQMOffline.RecoB.PixelVertexMonitor_cff import *
from DQM.SiOuterTracker.OuterTrackerSourceConfig_cff import *
from Validation.RecoTau.DQMSequences_cfi import *
-DQMOfflinePixelTracking = cms.Sequence( pixelTracksMonitoring )
+DQMOfflinePixelTracking = cms.Sequence( pixelTracksMonitoring *
+ pixelPVMonitor )
DQMOuterTracker = cms.Sequence( DQMOfflineDCS *
OuterTrackerSource *
diff --git a/DQMOffline/RecoB/python/PixelVertexMonitor_cff.py b/DQMOffline/RecoB/python/PixelVertexMonitor_cff.py
new file mode 100644
index 0000000000000..3c2e3d7d6700e
--- /dev/null
+++ b/DQMOffline/RecoB/python/PixelVertexMonitor_cff.py
@@ -0,0 +1,7 @@
+import FWCore.ParameterSet.Config as cms
+
+from DQMOffline.RecoB.PrimaryVertexMonitor_cff import pvMonitor as _pvMonitor
+pixelPVMonitor = _pvMonitor.clone(
+ TopFolderName = "OfflinePixelPV",
+ vertexLabel = "pixelVertices",
+)
diff --git a/DataFormats/CaloRecHit/test/BuildFile.xml b/DataFormats/CaloRecHit/test/BuildFile.xml
index 983e853f47698..6daf8cf086086 100644
--- a/DataFormats/CaloRecHit/test/BuildFile.xml
+++ b/DataFormats/CaloRecHit/test/BuildFile.xml
@@ -1,13 +1,14 @@
-
-
-