diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml
index 5406d1355533f..1bf72a85ddc0a 100644
--- a/CUDADataFormats/SiPixelCluster/BuildFile.xml
+++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml
@@ -2,6 +2,7 @@
+
diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
index 4ecdf14d8d33c..7f461bef6d2f9 100644
--- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
+++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
@@ -5,16 +5,34 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
+
#include
-class SiPixelClustersCUDA {
+GENERATE_SOA_LAYOUT(SiPixelClustersCUDALayout,
+ SOA_COLUMN(uint32_t, moduleStart),
+ SOA_COLUMN(uint32_t, clusInModule),
+ SOA_COLUMN(uint32_t, moduleId),
+ SOA_COLUMN(uint32_t, clusModuleStart))
+
+using SiPixelClustersCUDASoA = SiPixelClustersCUDALayout<>;
+using SiPixelClustersCUDASOAView = SiPixelClustersCUDALayout<>::View;
+using SiPixelClustersCUDASOAConstView = SiPixelClustersCUDALayout<>::ConstView;
+
+// TODO: The class is created via inheritance of the PortableDeviceCollection.
+// This is generally discouraged, and should be done via composition, i.e.,
+// by adding a public class attribute like:
+// cms::cuda::Portabledevicecollection> collection;
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+class SiPixelClustersCUDA : public cms::cuda::PortableDeviceCollection> {
public:
SiPixelClustersCUDA() = default;
- explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream);
~SiPixelClustersCUDA() = default;
- SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete;
- SiPixelClustersCUDA &operator=(const SiPixelClustersCUDA &) = delete;
+ explicit SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream)
+ : PortableDeviceCollection>(maxModules + 1, stream) {}
+
SiPixelClustersCUDA(SiPixelClustersCUDA &&) = default;
SiPixelClustersCUDA &operator=(SiPixelClustersCUDA &&) = default;
@@ -26,41 +44,7 @@ class SiPixelClustersCUDA {
uint32_t nClusters() const { return nClusters_h; }
int32_t offsetBPIX2() const { return offsetBPIX2_h; }
- uint32_t *moduleStart() { return moduleStart_d.get(); }
- uint32_t *clusInModule() { return clusInModule_d.get(); }
- uint32_t *moduleId() { return moduleId_d.get(); }
- uint32_t *clusModuleStart() { return clusModuleStart_d.get(); }
-
- uint32_t const *moduleStart() const { return moduleStart_d.get(); }
- uint32_t const *clusInModule() const { return clusInModule_d.get(); }
- uint32_t const *moduleId() const { return moduleId_d.get(); }
- uint32_t const *clusModuleStart() const { return clusModuleStart_d.get(); }
-
- class SiPixelClustersCUDASOAView {
- public:
- __device__ __forceinline__ uint32_t moduleStart(int i) const { return __ldg(moduleStart_ + i); }
- __device__ __forceinline__ uint32_t clusInModule(int i) const { return __ldg(clusInModule_ + i); }
- __device__ __forceinline__ uint32_t moduleId(int i) const { return __ldg(moduleId_ + i); }
- __device__ __forceinline__ uint32_t clusModuleStart(int i) const { return __ldg(clusModuleStart_ + i); }
-
- uint32_t const *moduleStart_;
- uint32_t const *clusInModule_;
- uint32_t const *moduleId_;
- uint32_t const *clusModuleStart_;
- };
-
- SiPixelClustersCUDASOAView const *view() const { return view_d.get(); }
-
private:
- cms::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module
- cms::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module
- cms::cuda::device::unique_ptr moduleId_d; // module id of each module
-
- // originally from rechits
- cms::cuda::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module
-
- cms::cuda::device::unique_ptr view_d; // "me" pointer
-
uint32_t nClusters_h = 0;
int32_t offsetBPIX2_h = 0;
};
diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
deleted file mode 100644
index c8a340d2162f9..0000000000000
--- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-
-SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxModules, cudaStream_t stream)
- : moduleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)),
- clusInModule_d(cms::cuda::make_device_unique(maxModules, stream)),
- moduleId_d(cms::cuda::make_device_unique(maxModules, stream)),
- clusModuleStart_d(cms::cuda::make_device_unique(maxModules + 1, stream)) {
- auto view = cms::cuda::make_host_unique(stream);
- view->moduleStart_ = moduleStart_d.get();
- view->clusInModule_ = clusInModule_d.get();
- view->moduleId_ = moduleId_d.get();
- view->clusModuleStart_ = clusModuleStart_d.get();
-
- view_d = cms::cuda::make_device_unique(stream);
- cms::cuda::copyAsync(view_d, view, stream);
-}
diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml
index 0806768a9b657..784f42c4441a4 100644
--- a/CUDADataFormats/SiPixelDigi/BuildFile.xml
+++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml
@@ -3,6 +3,7 @@
+
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
index cf6b51687982f..5888cd04a6128 100644
--- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
+++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
@@ -6,17 +6,32 @@
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h"
-
-class SiPixelDigisCUDA {
+#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+
+GENERATE_SOA_LAYOUT(SiPixelDigisSoALayout,
+ SOA_COLUMN(int32_t, clus),
+ SOA_COLUMN(uint32_t, pdigi),
+ SOA_COLUMN(uint32_t, rawIdArr),
+ SOA_COLUMN(uint16_t, adc),
+ SOA_COLUMN(uint16_t, xx),
+ SOA_COLUMN(uint16_t, yy),
+ SOA_COLUMN(uint16_t, moduleId))
+
+using SiPixelDigisCUDASOA = SiPixelDigisSoALayout<>;
+using SiPixelDigisCUDASOAView = SiPixelDigisCUDASOA::View;
+using SiPixelDigisCUDASOAConstView = SiPixelDigisCUDASOA::ConstView;
+
+// TODO: The class is created via inheritance of the PortableDeviceCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection> {
public:
- using StoreType = uint16_t;
SiPixelDigisCUDA() = default;
- explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
+ explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream)
+ : PortableDeviceCollection>(maxFedWords + 1, stream) {}
~SiPixelDigisCUDA() = default;
- SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
- SiPixelDigisCUDA &operator=(const SiPixelDigisCUDA &) = delete;
SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default;
SiPixelDigisCUDA &operator=(SiPixelDigisCUDA &&) = default;
@@ -28,17 +43,7 @@ class SiPixelDigisCUDA {
uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }
- cms::cuda::host::unique_ptr copyAllToHostAsync(cudaStream_t stream) const;
-
- SiPixelDigisCUDASOAView view() { return m_view; }
- SiPixelDigisCUDASOAView const view() const { return m_view; }
-
private:
- // These are consumed by downstream device code
- cms::cuda::device::unique_ptr m_store;
-
- SiPixelDigisCUDASOAView m_view;
-
uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
};
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h
deleted file mode 100644
index 78406cd241473..0000000000000
--- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDASOAView_h
-#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDASOAView_h
-
-#include
-
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-
-#include
-
-class SiPixelDigisCUDASOAView {
-public:
- friend class SiPixelDigisCUDA;
-
- template
- friend class SiPixelRecHitSoAFromLegacyT;
-
- enum class StorageLocation {
- kCLUS = 0,
- kPDIGI = 2,
- kRAWIDARR = 4,
- kADC = 6,
- kXX = 7,
- kYY = 8,
- kMODULEIND = 9,
- kMAX = 10
- };
- /*
- ============================================================================================================================
- | CLUS | PDIGI | RAWIDARR | ADC | XX | YY | MODULEIND |
- ============================================================================================================================
- | 0: N*32 | 2: N*32 | 4: N*32 | 6: N*16 | 7: N*16 | 8: N*16 | 9: N*16 |
- ============================================================================================================================
- */
- // These are for CPU output
- // we don't copy local x and y coordinates and module index
- enum class StorageLocationHost { kCLUS = 0, kPDIGI = 2, kRAWIDARR = 4, kADC = 6, kMAX = 7 };
- /*
- ========================================================================================
- | CLUS | PDIGI | RAWIDARR | ADC |
- ========================================================================================
- | 0: N*32 | 2: N*32 | 4: N*32 | 6: N*16 |
- ========================================================================================
- */
-
- SiPixelDigisCUDASOAView() = default;
-
- template
- SiPixelDigisCUDASOAView(StoreType& store, int maxFedWords, StorageLocation s) {
- xx_ = getColumnAddress(StorageLocation::kXX, store, maxFedWords);
- yy_ = getColumnAddress(StorageLocation::kYY, store, maxFedWords);
- adc_ = getColumnAddress(StorageLocation::kADC, store, maxFedWords);
- moduleInd_ = getColumnAddress(StorageLocation::kMODULEIND, store, maxFedWords);
- clus_ = getColumnAddress(StorageLocation::kCLUS, store, maxFedWords);
- pdigi_ = getColumnAddress(StorageLocation::kPDIGI, store, maxFedWords);
- rawIdArr_ = getColumnAddress(StorageLocation::kRAWIDARR, store, maxFedWords);
- }
-
- template
- SiPixelDigisCUDASOAView(StoreType& store, int maxFedWords, StorageLocationHost s) {
- adc_ = getColumnAddress(StorageLocationHost::kADC, store, maxFedWords);
- clus_ = getColumnAddress(StorageLocationHost::kCLUS, store, maxFedWords);
- pdigi_ = getColumnAddress(StorageLocationHost::kPDIGI, store, maxFedWords);
- rawIdArr_ = getColumnAddress(StorageLocationHost::kRAWIDARR, store, maxFedWords);
- }
-
- __device__ __forceinline__ uint16_t xx(int i) const { return __ldg(xx_ + i); }
- __device__ __forceinline__ uint16_t yy(int i) const { return __ldg(yy_ + i); }
- __device__ __forceinline__ uint16_t adc(int i) const { return __ldg(adc_ + i); }
- __device__ __forceinline__ uint16_t moduleInd(int i) const { return __ldg(moduleInd_ + i); }
- __device__ __forceinline__ int32_t clus(int i) const { return __ldg(clus_ + i); }
- __device__ __forceinline__ uint32_t pdigi(int i) const { return __ldg(pdigi_ + i); }
- __device__ __forceinline__ uint32_t rawIdArr(int i) const { return __ldg(rawIdArr_ + i); }
-
- const uint16_t* xx() const { return xx_; }
- const uint16_t* yy() const { return yy_; }
- const uint16_t* adc() const { return adc_; }
- const uint16_t* moduleInd() const { return moduleInd_; }
- const int32_t* clus() const { return clus_; }
- const uint32_t* pdigi() const { return pdigi_; }
- const uint32_t* rawIdArr() const { return rawIdArr_; }
-
- uint16_t* xx() { return xx_; }
- uint16_t* yy() { return yy_; }
- uint16_t* adc() { return adc_; }
- uint16_t* moduleInd() { return moduleInd_; }
- int32_t* clus() { return clus_; }
- uint32_t* pdigi() { return pdigi_; }
- uint32_t* rawIdArr() { return rawIdArr_; }
-
-private:
- uint16_t* xx_; // local coordinates of each pixel
- uint16_t* yy_;
- uint16_t* adc_; // ADC of each pixel
- uint16_t* moduleInd_; // module id of each pixel
- int32_t* clus_; // cluster id of each pixel
- uint32_t* pdigi_;
- uint32_t* rawIdArr_;
-
- template
- ReturnType* getColumnAddress(LocationType column, StoreType& store, int size) {
- return reinterpret_cast(store.get() + static_cast(column) * roundFor128ByteAlignment(size));
- }
-
- static int roundFor128ByteAlignment(int size) {
- constexpr int mul = 128 / sizeof(uint16_t);
- return ((size + mul - 1) / mul) * mul;
- };
-};
-
-#endif
diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
deleted file mode 100644
index 9a7f8ae8bdad5..0000000000000
--- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-#include
-
-#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-
-SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream)
- : m_store(cms::cuda::make_device_unique(
- SiPixelDigisCUDASOAView::roundFor128ByteAlignment(maxFedWords) *
- static_cast(SiPixelDigisCUDASOAView::StorageLocation::kMAX),
- stream)),
- m_view(m_store, maxFedWords, SiPixelDigisCUDASOAView::StorageLocation::kMAX) {
- assert(maxFedWords != 0);
-}
-
-cms::cuda::host::unique_ptr SiPixelDigisCUDA::copyAllToHostAsync(
- cudaStream_t stream) const {
- auto ret = cms::cuda::make_host_unique(
- m_view.roundFor128ByteAlignment(nDigis()) * static_cast(SiPixelDigisCUDASOAView::StorageLocationHost::kMAX),
- stream);
- cudaCheck(cudaMemcpyAsync(ret.get(),
- m_view.clus(),
- m_view.roundFor128ByteAlignment(nDigis()) * sizeof(SiPixelDigisCUDA::StoreType) *
- static_cast(SiPixelDigisCUDASOAView::StorageLocationHost::kMAX),
- cudaMemcpyDeviceToHost,
- stream));
- return ret;
-}
diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml
index e3f9a0910bbd8..cf07e3b540f24 100644
--- a/CUDADataFormats/Track/BuildFile.xml
+++ b/CUDADataFormats/Track/BuildFile.xml
@@ -2,6 +2,7 @@
+
diff --git a/CUDADataFormats/Track/README.md b/CUDADataFormats/Track/README.md
new file mode 100644
index 0000000000000..8f66d9e4c4467
--- /dev/null
+++ b/CUDADataFormats/Track/README.md
@@ -0,0 +1,50 @@
+# Track CUDA Data Formats
+
+`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for
+storing information about `Track`s created during the Pixel-local Reconstruction
+chain. It stores data in an SoA manner. It combines the data contained in the
+deprecated `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` classes.
+
+The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`,
+while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h`
+
+Both formats use the same SoA Layout (`TrackSoAHeterogeneousLayout`) which is generated
+via the `GENERATE_SOA_LAYOUT` macro in the `PixelTrackUtilities.h` file.
+
+## Notes
+
+-`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the
+layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA
+`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size)
+is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this.
+- Host and Device classes should **not** be created via inheritance, as they're done here,
+but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309).
+
+## TrackSoAHeterogeneousHost
+
+The version of the data format to be used for storing `Track` data on the CPU.
+Instances of this class are to be used for:
+
+- Having a place to copy data to host from device, via `cudaMemcpy`, or
+- Running host-side algorithms using data stored in an SoA manner.
+
+## TrackSoAHeterogeneousDevice
+
+The version of the data format to be used for storing `Track` data on the GPU.
+
+Instances of `TrackSoAHeterogeneousDevice` are to be created on host and be
+used on device only. To do so, the instance's `view()` method is to be called
+to pass a `View` to any kernel launched. Accessing data from the `view()` is not
+possible on the host side.
+
+## Utilities
+
+`PixelTrackUtilities.h` contains a collection of methods which were originally
+defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT`
+which have been adapted to operate on `View` instances, so that they are callable
+from within `__global__` kernels, on both CPU and CPU.
+
+## Use case
+
+See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation,
+processing and copying from device to host.
diff --git a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
deleted file mode 100644
index f9e9b3a37c63f..0000000000000
--- a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef CUDADataFormats_Track_PixelTrackHeterogeneous_h
-#define CUDADataFormats_Track_PixelTrackHeterogeneous_h
-
-#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h"
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-
-template
-using PixelTrackHeterogeneousT = HeterogeneousSoA>;
-
-#endif // #ifndef CUDADataFormats_Track_PixelTrackHeterogeneous_h
diff --git a/CUDADataFormats/Track/interface/PixelTrackUtilities.h b/CUDADataFormats/Track/interface/PixelTrackUtilities.h
new file mode 100644
index 0000000000000..6d7ea258be8d2
--- /dev/null
+++ b/CUDADataFormats/Track/interface/PixelTrackUtilities.h
@@ -0,0 +1,243 @@
+#ifndef CUDADataFormats_Track_PixelTrackUtilities_h
+#define CUDADataFormats_Track_PixelTrackUtilities_h
+
+#include
+#include
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+
+namespace pixelTrack {
+
+ enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality };
+ constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)};
+ const std::string qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"};
+ inline Quality qualityByName(std::string const &name) {
+ auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName;
+ return static_cast(qp);
+ }
+
+} // namespace pixelTrack
+
+template
+struct TrackSoA {
+ static constexpr int32_t S = TrackerTraits::maxNumberOfTuples;
+ static constexpr int32_t H = TrackerTraits::avgHitsPerTrack;
+ // Aliases in order to not confuse the GENERATE_SOA_LAYOUT
+ // macro with weird colons and angled brackets.
+ using Vector5f = Eigen::Matrix;
+ using Vector15f = Eigen::Matrix;
+ using Quality = pixelTrack::Quality;
+
+ using hindex_type = uint32_t;
+
+ using HitContainer = cms::cuda::OneToManyAssoc;
+
+ GENERATE_SOA_LAYOUT(TrackSoALayout,
+ SOA_COLUMN(Quality, quality),
+ SOA_COLUMN(float, chi2),
+ SOA_COLUMN(int8_t, nLayers),
+ SOA_COLUMN(float, eta),
+ SOA_COLUMN(float, pt),
+ SOA_EIGEN_COLUMN(Vector5f, state),
+ SOA_EIGEN_COLUMN(Vector15f, covariance),
+ SOA_SCALAR(int, nTracks),
+ SOA_SCALAR(HitContainer, hitIndices),
+ SOA_SCALAR(HitContainer, detIndices))
+};
+
+// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods.
+
+template
+struct TracksUtilities {
+ using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View;
+ using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView;
+ using hindex_type = typename TrackSoA::hindex_type;
+
+ // State at the Beam spot
+ // phi,tip,1/pt,cotan(theta),zip
+ static __host__ __device__ inline float charge(const TrackSoAConstView &tracks, int32_t i) {
+ return std::copysign(1.f, tracks[i].state()(2));
+ }
+
+ static constexpr __host__ __device__ inline float phi(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(0);
+ }
+
+ static constexpr __host__ __device__ inline float tip(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(1);
+ }
+
+ static constexpr __host__ __device__ inline float zip(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(4);
+ }
+
+ static constexpr __host__ __device__ inline bool isTriplet(const TrackSoAConstView &tracks, int i) {
+ return tracks[i].nLayers() == 3;
+ }
+
+ template
+ static constexpr __host__ __device__ inline void copyFromCircle(
+ TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) {
+ tracks[i].state() << cp.template cast(), lp.template cast();
+
+ tracks[i].state()(2) = tracks[i].state()(2) * b;
+ auto cov = tracks[i].covariance();
+ cov(0) = ccov(0, 0);
+ cov(1) = ccov(0, 1);
+ cov(2) = b * float(ccov(0, 2));
+ cov(4) = cov(3) = 0;
+ cov(5) = ccov(1, 1);
+ cov(6) = b * float(ccov(1, 2));
+ cov(8) = cov(7) = 0;
+ cov(9) = b * b * float(ccov(2, 2));
+ cov(11) = cov(10) = 0;
+ cov(12) = lcov(0, 0);
+ cov(13) = lcov(0, 1);
+ cov(14) = lcov(1, 1);
+ }
+
+ template
+ static constexpr __host__ __device__ inline void copyFromDense(TrackSoAView &tracks,
+ V5 const &v,
+ M5 const &cov,
+ int32_t i) {
+ tracks[i].state() = v.template cast();
+ for (int j = 0, ind = 0; j < 5; ++j)
+ for (auto k = j; k < 5; ++k)
+ tracks[i].covariance()(ind++) = cov(j, k);
+ }
+
+ template
+ static constexpr __host__ __device__ inline void copyToDense(const TrackSoAConstView &tracks,
+ V5 &v,
+ M5 &cov,
+ int32_t i) {
+ v = tracks[i].state().template cast();
+ for (int j = 0, ind = 0; j < 5; ++j) {
+ cov(j, j) = tracks[i].covariance()(ind++);
+ for (auto k = j + 1; k < 5; ++k)
+ cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++);
+ }
+ }
+
+ static constexpr __host__ __device__ inline int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i) {
+ auto pdet = tracks.detIndices().begin(i);
+ int nl = 1;
+ auto ol = pixelTopology::getLayer(*pdet);
+ for (; pdet < tracks.detIndices().end(i); ++pdet) {
+ auto il = pixelTopology::getLayer(*pdet);
+ if (il != ol)
+ ++nl;
+ ol = il;
+ }
+ return nl;
+ }
+
+ static constexpr __host__ __device__ inline int nHits(const TrackSoAConstView &tracks, int i) {
+ return tracks.detIndices().size(i);
+ }
+};
+
+namespace pixelTrack {
+
+ template
+ struct QualityCutsT {};
+
+ template
+ struct QualityCutsT> {
+ using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View;
+ using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView;
+ using tracksHelper = TracksUtilities;
+ // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
+ float chi2Coeff[4];
+ float chi2MaxPt; // GeV
+ float chi2Scale;
+
+ struct Region {
+ float maxTip; // cm
+ float minPt; // GeV
+ float maxZip; // cm
+ };
+
+ Region triplet;
+ Region quadruplet;
+
+ __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
+ // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip)
+ // default cuts:
+ // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm
+ // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
+ // (see CAHitNtupletGeneratorGPU.cc)
+ auto const ®ion = (nHits > 3) ? quadruplet : triplet;
+ return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and
+ (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip);
+ }
+
+ __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const {
+ auto roughLog = [](float x) {
+ // max diff [0.5,12] at 1.25 0.16143
+ // average diff 0.0662998
+ union IF {
+ uint32_t i;
+ float f;
+ };
+ IF z;
+ z.f = x;
+ uint32_t lsb = 1 < 21;
+ z.i += lsb;
+ z.i >>= 21;
+ auto f = z.i & 3;
+ int ex = int(z.i >> 2) - 127;
+
+ // log2(1+0.25*f)
+ // averaged over bins
+ const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
+ return float(ex) + frac[f];
+ };
+
+ float pt = std::min(tracks.pt(it), chi2MaxPt);
+ float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]);
+ if (tracks.chi2(it) >= chi2Cut) {
+#ifdef NTUPLE_FIT_DEBUG
+ printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it));
+#endif
+ return true;
+ }
+ return false;
+ }
+ };
+
+ template
+ struct QualityCutsT> {
+ using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View;
+ using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView;
+ using tracksHelper = TracksUtilities;
+
+ float maxChi2;
+ float minPt;
+ float maxTip;
+ float maxZip;
+
+ __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
+ return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and
+ (std::abs(tracksHelper::zip(tracks, it)) < maxZip);
+ }
+ __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const {
+ return tracks.chi2(it) >= maxChi2;
+ }
+ };
+
+} // namespace pixelTrack
+
+template
+using TrackLayout = typename TrackSoA::template TrackSoALayout<>;
+template
+using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View;
+template
+using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView;
+
+template struct TracksUtilities;
+template struct TracksUtilities;
+
+#endif
diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h
new file mode 100644
index 0000000000000..1938991e071e1
--- /dev/null
+++ b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h
@@ -0,0 +1,36 @@
+#ifndef CUDADataFormats_Track_TrackHeterogeneousDevice_H
+#define CUDADataFormats_Track_TrackHeterogeneousDevice_H
+
+#include
+
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
+#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
+
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+
+// TODO: The class is created via inheritance of the PortableDeviceCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+template
+class TrackSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection> {
+public:
+ using cms::cuda::PortableDeviceCollection>::view;
+ using cms::cuda::PortableDeviceCollection>::const_view;
+ using cms::cuda::PortableDeviceCollection>::buffer;
+ using cms::cuda::PortableDeviceCollection>::bufferSize;
+
+ TrackSoAHeterogeneousDevice() = default; // cms::cuda::Product needs this
+
+ // Constructor which specifies the SoA size
+ explicit TrackSoAHeterogeneousDevice(cudaStream_t stream)
+ : cms::cuda::PortableDeviceCollection>(TrackerTraits::maxNumberOfTuples, stream) {}
+};
+
+namespace pixelTrack {
+
+ using TrackSoADevicePhase1 = TrackSoAHeterogeneousDevice;
+ using TrackSoADevicePhase2 = TrackSoAHeterogeneousDevice;
+
+} // namespace pixelTrack
+
+#endif // CUDADataFormats_Track_TrackHeterogeneousT_H
diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h
new file mode 100644
index 0000000000000..af8af2a40a52e
--- /dev/null
+++ b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h
@@ -0,0 +1,35 @@
+#ifndef CUDADataFormats_Track_TrackHeterogeneousHost_H
+#define CUDADataFormats_Track_TrackHeterogeneousHost_H
+
+#include
+
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
+#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
+
+// TODO: The class is created via inheritance of the PortableHostCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+template
+class TrackSoAHeterogeneousHost : public cms::cuda::PortableHostCollection> {
+public:
+ static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime
+ explicit TrackSoAHeterogeneousHost() : cms::cuda::PortableHostCollection>(S) {}
+
+ using cms::cuda::PortableHostCollection>::view;
+ using cms::cuda::PortableHostCollection>::const_view;
+ using cms::cuda::PortableHostCollection>::buffer;
+ using cms::cuda::PortableHostCollection>::bufferSize;
+
+ // Constructor which specifies the SoA size
+ explicit TrackSoAHeterogeneousHost(cudaStream_t stream)
+ : cms::cuda::PortableHostCollection>(S, stream) {}
+};
+
+namespace pixelTrack {
+
+ using TrackSoAHostPhase1 = TrackSoAHeterogeneousHost;
+ using TrackSoAHostPhase2 = TrackSoAHeterogeneousHost;
+
+} // namespace pixelTrack
+
+#endif // CUDADataFormats_Track_TrackHeterogeneousT_H
diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h
deleted file mode 100644
index b5b1df0d5118a..0000000000000
--- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h
+++ /dev/null
@@ -1,195 +0,0 @@
-#ifndef CUDADataFormats_Track_TrackHeterogeneousT_H
-#define CUDADataFormats_Track_TrackHeterogeneousT_H
-
-#include
-#include
-
-#include "CUDADataFormats/Track/interface/TrajectoryStateSoAT.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
-#include "DataFormats/Common/interface/CMS_CLASS_VERSION.h"
-
-namespace pixelTrack {
-
- enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality };
- constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)};
- const std::string qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"};
- inline Quality qualityByName(std::string const &name) {
- auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName;
- return static_cast(qp);
- }
-
-} // namespace pixelTrack
-
-template
-class TrackSoAHeterogeneousT {
-public:
- static constexpr int32_t S = TrackerTraits::maxNumberOfTuples;
- static constexpr int32_t H = TrackerTraits::maxHitsOnTrack; // Average hits rather than max?
- static constexpr int32_t stride() { return S; }
-
- using hindex_type = uint32_t; //TrackerTraits::hindex_type ?
-
- using Quality = pixelTrack::Quality;
- using HitContainer = cms::cuda::OneToManyAssoc;
-
- // Always check quality is at least loose!
- // CUDA does not support enums in __lgc ...
-protected:
- eigenSoA::ScalarSoA quality_;
-
-public:
- constexpr Quality quality(int32_t i) const { return (Quality)(quality_(i)); }
- constexpr Quality &quality(int32_t i) { return (Quality &)(quality_(i)); }
- constexpr Quality const *qualityData() const { return (Quality const *)(quality_.data()); }
- constexpr Quality *qualityData() { return (Quality *)(quality_.data()); }
-
- // this is chi2/ndof as not necessarely all hits are used in the fit
- eigenSoA::ScalarSoA chi2;
-
- eigenSoA::ScalarSoA nLayers;
-
- constexpr int nTracks() const { return nTracks_; }
- constexpr void setNTracks(int n) { nTracks_ = n; }
-
- constexpr int nHits(int i) const { return detIndices.size(i); }
-
- constexpr bool isTriplet(int i) const { return nLayers(i) == 3; }
-
- constexpr int computeNumberOfLayers(int32_t i) const {
- // layers are in order and we assume tracks are either forward or backward
- auto pdet = detIndices.begin(i);
- int nl = 1;
- auto ol = pixelTopology::getLayer(*pdet);
- for (; pdet < detIndices.end(i); ++pdet) {
- auto il = pixelTopology::getLayer(*pdet);
- if (il != ol)
- ++nl;
- ol = il;
- }
- return nl;
- }
-
- // State at the Beam spot
- // phi,tip,1/pt,cotan(theta),zip
- TrajectoryStateSoAT stateAtBS;
- eigenSoA::ScalarSoA eta;
- eigenSoA::ScalarSoA pt;
- constexpr float charge(int32_t i) const { return std::copysign(1.f, stateAtBS.state(i)(2)); }
- constexpr float phi(int32_t i) const { return stateAtBS.state(i)(0); }
- constexpr float tip(int32_t i) const { return stateAtBS.state(i)(1); }
- constexpr float zip(int32_t i) const { return stateAtBS.state(i)(4); }
-
- // state at the detector of the outermost hit
- // representation to be decided...
- // not yet filled on GPU
- // TrajectoryStateSoA stateAtOuterDet;
-
- HitContainer hitIndices;
- HitContainer detIndices;
-
-private:
- int nTracks_;
-};
-
-namespace pixelTrack {
-
- template
- using TrackSoAT = TrackSoAHeterogeneousT;
-
- template
- using HitContainerT = typename TrackSoAHeterogeneousT::HitContainer;
-
- //Used only to ease classes definitions
- using TrackSoAPhase1 = TrackSoAHeterogeneousT;
- using TrackSoAPhase2 = TrackSoAHeterogeneousT;
-
- template
- struct QualityCutsT {};
-
- template
- struct QualityCutsT> {
- // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
- float chi2Coeff[4];
- float chi2MaxPt; // GeV
- float chi2Scale;
-
- struct Region {
- float maxTip; // cm
- float minPt; // GeV
- float maxZip; // cm
- };
-
- Region triplet;
- Region quadruplet;
-
- __device__ __forceinline__ bool isHP(TrackSoAHeterogeneousT const *__restrict__ tracks,
- int nHits,
- int it) const {
- // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip)
- // default cuts:
- // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm
- // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
- // (see CAHitNtupletGeneratorGPU.cc)
- auto const ®ion = (nHits > 3) ? quadruplet : triplet;
- return (std::abs(tracks->tip(it)) < region.maxTip) and (tracks->pt(it) > region.minPt) and
- (std::abs(tracks->zip(it)) < region.maxZip);
- }
-
- __device__ __forceinline__ bool strictCut(TrackSoAHeterogeneousT const *__restrict__ tracks,
- int it) const {
- auto roughLog = [](float x) {
- // max diff [0.5,12] at 1.25 0.16143
- // average diff 0.0662998
- union IF {
- uint32_t i;
- float f;
- };
- IF z;
- z.f = x;
- uint32_t lsb = 1 < 21;
- z.i += lsb;
- z.i >>= 21;
- auto f = z.i & 3;
- int ex = int(z.i >> 2) - 127;
-
- // log2(1+0.25*f)
- // averaged over bins
- const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
- return float(ex) + frac[f];
- };
-
- float pt = std::min(tracks->pt(it), chi2MaxPt);
- float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]);
- if (tracks->chi2(it) >= chi2Cut) {
-#ifdef NTUPLE_FIT_DEBUG
- printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks->pt(it), tracks->eta(it), tracks->chi2(it));
-#endif
- return true;
- }
- return false;
- }
- };
-
- template
- struct QualityCutsT> {
- float maxChi2;
- float minPt;
- float maxTip;
- float maxZip;
-
- __device__ __forceinline__ bool isHP(TrackSoAHeterogeneousT const *__restrict__ tracks,
- int nHits,
- int it) const {
- return (std::abs(tracks->tip(it)) < maxTip) and (tracks->pt(it) > minPt) and (std::abs(tracks->zip(it)) < maxZip);
- }
- __device__ __forceinline__ bool strictCut(TrackSoAHeterogeneousT const *__restrict__ tracks,
- int it) const {
- return tracks->chi2(it) >= maxChi2;
- }
- };
-
-} // namespace pixelTrack
-
-#endif // CUDADataFormats_Track_TrackHeterogeneousT_H
diff --git a/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h b/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h
deleted file mode 100644
index 64fcd573a6991..0000000000000
--- a/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef CUDADataFormats_Track_TrajectoryStateSOAT_H
-#define CUDADataFormats_Track_TrajectoryStateSOAT_H
-
-#include
-#include "HeterogeneousCore/CUDAUtilities/interface/eigenSoA.h"
-
-template
-struct TrajectoryStateSoAT {
- using Vector5f = Eigen::Matrix;
- using Vector15f = Eigen::Matrix;
-
- using Vector5d = Eigen::Matrix;
- using Matrix5d = Eigen::Matrix;
-
- static constexpr int32_t stride() { return S; }
-
- eigenSoA::MatrixSoA state;
- eigenSoA::MatrixSoA covariance;
-
- template
- __host__ __device__ inline void copyFromCircle(
- V3 const& cp, M3 const& ccov, V2 const& lp, M2 const& lcov, float b, int32_t i) {
- state(i) << cp.template cast(), lp.template cast();
- state(i)(2) *= b;
- auto cov = covariance(i);
- cov(0) = ccov(0, 0);
- cov(1) = ccov(0, 1);
- cov(2) = b * float(ccov(0, 2));
- cov(4) = cov(3) = 0;
- cov(5) = ccov(1, 1);
- cov(6) = b * float(ccov(1, 2));
- cov(8) = cov(7) = 0;
- cov(9) = b * b * float(ccov(2, 2));
- cov(11) = cov(10) = 0;
- cov(12) = lcov(0, 0);
- cov(13) = lcov(0, 1);
- cov(14) = lcov(1, 1);
- }
-
- template
- __host__ __device__ inline void copyFromDense(V5 const& v, M5 const& cov, int32_t i) {
- state(i) = v.template cast();
- for (int j = 0, ind = 0; j < 5; ++j)
- for (auto k = j; k < 5; ++k)
- covariance(i)(ind++) = cov(j, k);
- }
-
- template
- __host__ __device__ inline void copyToDense(V5& v, M5& cov, int32_t i) const {
- v = state(i).template cast();
- for (int j = 0, ind = 0; j < 5; ++j) {
- cov(j, j) = covariance(i)(ind++);
- for (auto k = j + 1; k < 5; ++k)
- cov(k, j) = cov(j, k) = covariance(i)(ind++);
- }
- }
-};
-
-#endif // CUDADataFormats_Track_TrajectoryStateSOAT_H
diff --git a/CUDADataFormats/Track/src/classes.h b/CUDADataFormats/Track/src/classes.h
index 97c116f6c88d3..2e07adddcddd0 100644
--- a/CUDADataFormats/Track/src/classes.h
+++ b/CUDADataFormats/Track/src/classes.h
@@ -3,7 +3,10 @@
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/Common/interface/HostProduct.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h"
+
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
+
#include "DataFormats/Common/interface/Wrapper.h"
#endif // CUDADataFormats_Track_src_classes_h
diff --git a/CUDADataFormats/Track/src/classes_def.xml b/CUDADataFormats/Track/src/classes_def.xml
index 5216c19dded65..5e3116609330a 100644
--- a/CUDADataFormats/Track/src/classes_def.xml
+++ b/CUDADataFormats/Track/src/classes_def.xml
@@ -1,15 +1,15 @@
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml
index fc78783db473b..32256c87ed577 100644
--- a/CUDADataFormats/Track/test/BuildFile.xml
+++ b/CUDADataFormats/Track/test/BuildFile.xml
@@ -1,19 +1,22 @@
-
-
-
-
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_t.cpp b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_t.cpp
deleted file mode 100644
index 9708b689dd05b..0000000000000
--- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_t.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h"
-
-#include
-#include
-
-int main() {
- // test quality
-
- auto q = pixelTrack::qualityByName("tight");
- assert(pixelTrack::Quality::tight == q);
- q = pixelTrack::qualityByName("toght");
- assert(pixelTrack::Quality::notQuality == q);
-
- for (uint32_t i = 0; i < pixelTrack::qualitySize; ++i) {
- auto const qt = static_cast(i);
- auto q = pixelTrack::qualityByName(pixelTrack::qualityName[i]);
- assert(qt == q);
- }
-
- return 0;
-}
diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp
new file mode 100644
index 0000000000000..dafa75e2e18d7
--- /dev/null
+++ b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp
@@ -0,0 +1,73 @@
+/**
+ Simple test for the pixelTrack::TrackSoA data structure
+ which inherits from PortableDeviceCollection.
+
+ Creates an instance of the class (automatically allocates
+ memory on device), passes the view of the SoA data to
+ the CUDA kernels which:
+ - Fill the SoA with data.
+ - Verify that the data written is correct.
+
+ Then, the SoA data are copied back to Host, where
+ a temporary host-side view (tmp_view) is created using
+ the same Layout to access the data on host and print it.
+ */
+
+#include
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+
+namespace testTrackSoA {
+
+ template
+ void runKernels(TrackSoAView &tracks_view, cudaStream_t stream);
+}
+
+int main() {
+ cms::cudatest::requireDevices();
+
+ cudaStream_t stream;
+ cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
+
+ // Inner scope to deallocate memory before destroying the stream
+ {
+ // Instantiate tracks on device. PortableDeviceCollection allocates
+ // SoA on device automatically.
+ TrackSoAHeterogeneousDevice tracks_d(stream);
+ testTrackSoA::runKernels(tracks_d.view(), stream);
+
+ // Instantate tracks on host. This is where the data will be
+ // copied to from device.
+ TrackSoAHeterogeneousHost tracks_h(stream);
+
+ cudaCheck(cudaMemcpyAsync(
+ tracks_h.buffer().get(), tracks_d.const_buffer().get(), tracks_d.bufferSize(), cudaMemcpyDeviceToHost, stream));
+ cudaCheck(cudaStreamSynchronize(stream));
+
+ // Print results
+ std::cout << "pt"
+ << "\t"
+ << "eta"
+ << "\t"
+ << "chi2"
+ << "\t"
+ << "quality"
+ << "\t"
+ << "nLayers"
+ << "\t"
+ << "hitIndices off" << std::endl;
+
+ for (int i = 0; i < 10; ++i) {
+ std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2()
+ << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t"
+ << tracks_h.view().hitIndices().off[i] << std::endl;
+ }
+ }
+ cudaCheck(cudaStreamDestroy(stream));
+
+ return 0;
+}
diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu
new file mode 100644
index 0000000000000..8e8595eb43e94
--- /dev/null
+++ b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu
@@ -0,0 +1,63 @@
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/OneToManyAssoc.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+namespace testTrackSoA {
+
+ // Kernel which fills the TrackSoAView with data
+ // to test writing to it
+ template
+ __global__ void fill(TrackSoAView tracks_view) {
+ int i = threadIdx.x;
+ if (i == 0) {
+ tracks_view.nTracks() = 420;
+ }
+
+ for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) {
+ tracks_view[j].pt() = (float)j;
+ tracks_view[j].eta() = (float)j;
+ tracks_view[j].chi2() = (float)j;
+ tracks_view[j].quality() = (pixelTrack::Quality)(j % 256);
+ tracks_view[j].nLayers() = j % 128;
+ tracks_view.hitIndices().off[j] = j;
+ }
+ }
+
+ // Kernel which reads from the TrackSoAView to verify
+ // that it was written correctly from the fill kernel
+ template
+ __global__ void verify(TrackSoAConstView tracks_view) {
+ int i = threadIdx.x;
+
+ if (i == 0) {
+ printf("SoA size: % d, block dims: % d\n", tracks_view.metadata().size(), blockDim.x);
+ assert(tracks_view.nTracks() == 420);
+ }
+ for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) {
+ assert(abs(tracks_view[j].pt() - (float)j) < .0001);
+ assert(abs(tracks_view[j].eta() - (float)j) < .0001);
+ assert(abs(tracks_view[j].chi2() - (float)j) < .0001);
+ assert(tracks_view[j].quality() == (pixelTrack::Quality)(j % 256));
+ assert(tracks_view[j].nLayers() == j % 128);
+ assert(tracks_view.hitIndices().off[j] == j);
+ }
+ }
+
+ // Host function which invokes the two kernels above
+ template
+ void runKernels(TrackSoAView& tracks_view, cudaStream_t stream) {
+ fill<<<1, 1024, 0, stream>>>(tracks_view);
+ cudaCheck(cudaGetLastError());
+ cudaCheck(cudaDeviceSynchronize());
+
+ verify<<<1, 1024, 0, stream>>>(tracks_view);
+ cudaCheck(cudaGetLastError());
+ cudaCheck(cudaDeviceSynchronize());
+ }
+
+ template void runKernels(TrackSoAView& tracks_view,
+ cudaStream_t stream);
+ template void runKernels(TrackSoAView& tracks_view,
+ cudaStream_t stream);
+
+} // namespace testTrackSoA
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
index 97b88873c2613..6ba0eaa5c986e 100644
--- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
+++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
@@ -1,7 +1,11 @@
-#include "CUDADataFormats/Track/interface/TrajectoryStateSoAT.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
+#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
using Vector5d = Eigen::Matrix;
using Matrix5d = Eigen::Matrix;
+using helper = TracksUtilities;
__host__ __device__ Matrix5d loadCov(Vector5d const& e) {
Matrix5d cov;
@@ -17,26 +21,21 @@ __host__ __device__ Matrix5d loadCov(Vector5d const& e) {
return cov;
}
-using TS = TrajectoryStateSoAT<128>;
-
-__global__ void testTSSoA(TS* pts, int n) {
- assert(n <= 128);
-
+template
+__global__ void testTSSoA(TrackSoAView ts) {
Vector5d par0;
par0 << 0.2, 0.1, 3.5, 0.8, 0.1;
Vector5d e0;
e0 << 0.01, 0.01, 0.035, -0.03, -0.01;
auto cov0 = loadCov(e0);
- TS& ts = *pts;
-
int first = threadIdx.x + blockIdx.x * blockDim.x;
- for (int i = first; i < n; i += blockDim.x * gridDim.x) {
- ts.copyFromDense(par0, cov0, i);
+ for (int i = first; i < ts.metadata().size(); i += blockDim.x * gridDim.x) {
+ helper::copyFromDense(ts, par0, cov0, i);
Vector5d par1;
Matrix5d cov1;
- ts.copyToDense(par1, cov1, i);
+ helper::copyToDense(ts, par1, cov1, i);
Vector5d delV = par1 - par0;
Matrix5d delM = cov1 - cov0;
for (int j = 0; j < 5; ++j) {
@@ -58,18 +57,29 @@ __global__ void testTSSoA(TS* pts, int n) {
int main() {
#ifdef __CUDACC__
cms::cudatest::requireDevices();
+ cudaStream_t stream;
+ cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
#endif
- TS ts;
+#ifdef __CUDACC__
+ // Since we are going to copy data from ts_d to ts_h, we
+ // need to initialize the Host collection with a stream.
+ TrackSoAHeterogeneousHost ts_h(stream);
+ TrackSoAHeterogeneousDevice ts_d(stream);
+#else
+ // If CUDA is not available, Host collection must not be initialized
+ // with a stream.
+ TrackSoAHeterogeneousHost ts_h;
+#endif
#ifdef __CUDACC__
- TS* ts_d;
- cudaCheck(cudaMalloc(&ts_d, sizeof(TS)));
- testTSSoA<<<1, 64>>>(ts_d, 128);
+ testTSSoA<<<1, 64, 0, stream>>>(ts_d.view());
+ cudaCheck(cudaGetLastError());
+ cudaCheck(cudaMemcpyAsync(
+ ts_h.buffer().get(), ts_d.const_buffer().get(), ts_d.bufferSize(), cudaMemcpyDeviceToHost, stream));
cudaCheck(cudaGetLastError());
- cudaCheck(cudaMemcpy(&ts, ts_d, sizeof(TS), cudaMemcpyDefault));
- cudaCheck(cudaDeviceSynchronize());
+ cudaCheck(cudaStreamSynchronize(stream));
#else
- testTSSoA(&ts, 128);
+ testTSSoA(ts_h.view());
#endif
}
diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml
index 4cda8ebd306b0..e67c2227feef9 100644
--- a/CUDADataFormats/TrackingRecHit/BuildFile.xml
+++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml
@@ -3,6 +3,7 @@
+
diff --git a/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h b/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h
index b3bdade5ec97c..13322ce3952b7 100644
--- a/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h
+++ b/CUDADataFormats/TrackingRecHit/interface/SiPixelHitStatus.h
@@ -12,4 +12,9 @@ struct SiPixelHitStatus {
uint8_t qBin : 3; // ∈[0,1,...,7]
};
+struct SiPixelHitStatusAndCharge {
+ SiPixelHitStatus status;
+ uint32_t charge : 24;
+};
+
#endif
diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
deleted file mode 100644
index ad78daa8354e2..0000000000000
--- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
+++ /dev/null
@@ -1,384 +0,0 @@
-#ifndef CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
-#define CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h
-
-#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h"
-#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
-#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "DataFormats/Common/interface/CMS_CLASS_VERSION.h"
-
-namespace {
- enum class Storage32 {
- kXLocal = 0,
- kYLocal = 1,
- kXerror = 2,
- kYerror = 3,
- kCharge = 4,
- kXGlobal = 5,
- kYGlobal = 6,
- kZGlobal = 7,
- kRGlobal = 8,
- kPhiStorage = 9,
- kLayers = 10
- };
-
- enum class Storage16 {
- kDetId = 0,
- kPhi = 1,
- kXSize = 2,
- kYSize = 3,
- };
-} // namespace
-
-template
-class TrackingRecHit2DHeterogeneousT {
-public:
- template
- friend class TrackingRecHit2DHostT;
-
- template
- using unique_ptr = typename Traits::template unique_ptr;
-
- using TrackingRecHit2DSOAView = TrackingRecHit2DSOAViewT;
- using PhiBinner = typename TrackingRecHit2DSOAView::PhiBinner;
- using AverageGeometry = typename TrackingRecHit2DSOAView::AverageGeometry;
-
- TrackingRecHit2DHeterogeneousT() = default;
-
- explicit TrackingRecHit2DHeterogeneousT(uint32_t nHits,
- int32_t offsetBPIX2,
- pixelCPEforGPU::ParamsOnGPUT const* cpeParams,
- uint32_t const* hitsModuleStart,
- cudaStream_t stream = nullptr);
-
- explicit TrackingRecHit2DHeterogeneousT(cms::cuda::host::unique_ptr& store32,
- cms::cuda::host::unique_ptr& store16,
- uint32_t* modules,
- int nHits,
- cudaStream_t stream = nullptr);
- ~TrackingRecHit2DHeterogeneousT() = default;
-
- TrackingRecHit2DHeterogeneousT(const TrackingRecHit2DHeterogeneousT&) = delete;
- TrackingRecHit2DHeterogeneousT& operator=(const TrackingRecHit2DHeterogeneousT&) = delete;
- TrackingRecHit2DHeterogeneousT(TrackingRecHit2DHeterogeneousT&&) = default;
- TrackingRecHit2DHeterogeneousT& operator=(TrackingRecHit2DHeterogeneousT&&) = default;
-
- TrackingRecHit2DSOAView* view() { return m_view.get(); }
- TrackingRecHit2DSOAView const* view() const { return m_view.get(); }
-
- auto nHits() const { return m_nHits; }
- auto offsetBPIX2() const { return m_offsetBPIX2; }
-
- auto hitsModuleStart() const { return m_hitsModuleStart; }
- auto hitsLayerStart() { return m_hitsLayerStart; }
- auto phiBinner() { return m_phiBinner; }
- auto phiBinnerStorage() { return m_phiBinnerStorage; }
- auto iphi() { return m_iphi; }
-
- cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const;
-
- cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const;
-
- cms::cuda::host::unique_ptr store16ToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr store32ToHostAsync(cudaStream_t stream) const;
-
-protected:
- static constexpr uint32_t n16 = 4; // number of elements in m_store16
- static constexpr uint32_t n32 = 10; // number of elements in m_store32
- static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious
- static_assert(n32 == static_cast(Storage32::kLayers));
- unique_ptr m_store16; //!
- unique_ptr m_store32; //!
-
- unique_ptr m_PhiBinnerStore; //!
- unique_ptr m_AverageGeometryStore; //!
-
- unique_ptr m_view; //!
-
- uint32_t m_nHits;
- int32_t m_offsetBPIX2;
-
- uint32_t const* m_hitsModuleStart; // needed for legacy, this is on GPU!
-
- // needed as kernel params...
- PhiBinner* m_phiBinner;
- typename PhiBinner::index_type* m_phiBinnerStorage;
- uint32_t* m_hitsLayerStart;
- int16_t* m_iphi;
-};
-
-//Inherit and overload only what we need to overload, remember to use this->
-//GPU
-template
-class TrackingRecHit2DGPUT : public TrackingRecHit2DHeterogeneousT {
-public:
- using TrackingRecHit2DHeterogeneousT::TrackingRecHit2DHeterogeneousT;
-
- cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr store16ToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr store32ToHostAsync(cudaStream_t stream) const;
-};
-
-//CPU
-template
-class TrackingRecHit2DCPUT : public TrackingRecHit2DHeterogeneousT {
-public:
- using TrackingRecHit2DHeterogeneousT::TrackingRecHit2DHeterogeneousT;
-
- cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr store16ToHostAsync(cudaStream_t stream) const;
- cms::cuda::host::unique_ptr store32ToHostAsync(cudaStream_t stream) const;
-};
-
-//HOST
-template
-class TrackingRecHit2DHostT : public TrackingRecHit2DHeterogeneousT {
-public:
- ~TrackingRecHit2DHostT() = default;
- TrackingRecHit2DHostT() = default;
-
- explicit TrackingRecHit2DHostT(uint32_t nHits,
- int32_t offsetBPIX2,
- pixelCPEforGPU::ParamsOnGPUT const* cpeParams,
- uint32_t const* hitsModuleStart,
- cudaStream_t stream = nullptr)
- : TrackingRecHit2DHeterogeneousT(
- nHits, offsetBPIX2, cpeParams, hitsModuleStart, stream) {}
-
- explicit TrackingRecHit2DHostT(cms::cuda::host::unique_ptr& store32,
- cms::cuda::host::unique_ptr& store16,
- uint32_t* modules,
- int nHits,
- cudaStream_t stream = nullptr)
- : TrackingRecHit2DHeterogeneousT(
- store32, store16, modules, nHits, stream) {}
-
- explicit TrackingRecHit2DHostT(uint32_t nHits,
- int32_t offsetBPIX2,
- pixelCPEforGPU::ParamsOnGPUT const* cpeParams,
- uint32_t const* hitsModuleStart,
- cudaStream_t stream,
- TrackingRecHit2DHeterogeneousT const* input);
-};
-
-#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-
-template
-TrackingRecHit2DHeterogeneousT::TrackingRecHit2DHeterogeneousT(
- uint32_t nHits,
- int32_t offsetBPIX2,
- pixelCPEforGPU::ParamsOnGPUT const* cpeParams,
- uint32_t const* hitsModuleStart,
- cudaStream_t stream)
- : m_nHits(nHits), m_offsetBPIX2(offsetBPIX2), m_hitsModuleStart(hitsModuleStart) {
- using TrackingRecHit2DSOAView = TrackingRecHit2DSOAViewT;
-
- auto view = Traits::template make_host_unique(stream);
-
- view->m_nHits = nHits;
- m_view = Traits::template make_unique(stream); // leave it on host and pass it by value?
- m_AverageGeometryStore = Traits::template make_unique(stream);
- view->m_averageGeometry = m_AverageGeometryStore.get();
- view->m_cpeParams = cpeParams;
- view->m_hitsModuleStart = hitsModuleStart;
-
- // if empy do not bother
- if (0 == nHits) {
- if constexpr (std::is_same_v) {
- cms::cuda::copyAsync(m_view, view, stream);
- } else {
- m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
- }
- return;
- }
-
- // the single arrays are not 128 bit alligned...
- // the hits are actually accessed in order only in building
- // if ordering is relevant they may have to be stored phi-ordered by layer or so
- // this will break 1to1 correspondence with cluster and module locality
- // so unless proven VERY inefficient we keep it ordered as generated
-
- m_store16 = Traits::template make_unique(nHits * n16, stream);
- m_store32 = Traits::template make_unique(nHits * n32 + TrackerTraits::numberOfLayers + 1, stream);
- m_PhiBinnerStore = Traits::template make_unique(stream);
-
- static_assert(sizeof(typename TrackingRecHit2DSOAView::hindex_type) == sizeof(float));
- static_assert(sizeof(typename TrackingRecHit2DSOAView::hindex_type) ==
- sizeof(typename TrackingRecHit2DSOAView::PhiBinner::index_type));
-
- auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast(i) * nHits; };
-
- // copy all the pointers
- m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get();
- m_phiBinnerStorage = view->m_phiBinnerStorage =
- reinterpret_cast(get32(Storage32::kPhiStorage));
-
- view->m_xl = get32(Storage32::kXLocal);
- view->m_yl = get32(Storage32::kYLocal);
- view->m_xerr = get32(Storage32::kXerror);
- view->m_yerr = get32(Storage32::kYerror);
- view->m_chargeAndStatus = reinterpret_cast(get32(Storage32::kCharge));
-
- view->m_xg = get32(Storage32::kXGlobal);
- view->m_yg = get32(Storage32::kYGlobal);
- view->m_zg = get32(Storage32::kZGlobal);
- view->m_rg = get32(Storage32::kRGlobal);
-
- auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast(i) * nHits; };
- m_iphi = view->m_iphi = reinterpret_cast(get16(Storage16::kPhi));
-
- view->m_xsize = reinterpret_cast(get16(Storage16::kXSize));
- view->m_ysize = reinterpret_cast(get16(Storage16::kYSize));
- view->m_detInd = get16(Storage16::kDetId);
-
- m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get();
- m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(Storage32::kLayers));
-
- // transfer view
- if constexpr (std::is_same_v) {
- cms::cuda::copyAsync(m_view, view, stream);
- } else {
- m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
- }
-}
-
-template
-TrackingRecHit2DHostT::TrackingRecHit2DHostT(
- uint32_t nHits,
- int32_t offsetBPIX2,
- pixelCPEforGPU::ParamsOnGPUT const* cpeParams,
- uint32_t const* hitsModuleStart,
- cudaStream_t stream,
- TrackingRecHit2DHeterogeneousT const* input) {
- using TrackingRecHit2DSOAView = TrackingRecHit2DSOAViewT;
-
- this->m_nHits = nHits;
- this->m_offsetBPIX2 = offsetBPIX2;
- this->m_hitsModuleStart = hitsModuleStart;
-
- auto view = cms::cuda::make_host_unique