@@ -14,10 +14,12 @@ class TrackingRecHit2DHeterogeneous {
14
14
15
15
TrackingRecHit2DHeterogeneous () = default ;
16
16
17
- explicit TrackingRecHit2DHeterogeneous (uint32_t nHits,
18
- pixelCPEforGPU::ParamsOnGPU const * cpeParams,
19
- uint32_t const * hitsModuleStart,
20
- cudaStream_t stream);
17
+ explicit TrackingRecHit2DHeterogeneous (
18
+ uint32_t nHits,
19
+ pixelCPEforGPU::ParamsOnGPU const * cpeParams,
20
+ uint32_t const * hitsModuleStart,
21
+ cudaStream_t stream,
22
+ TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const * input = nullptr );
21
23
22
24
~TrackingRecHit2DHeterogeneous () = default ;
23
25
@@ -41,6 +43,9 @@ class TrackingRecHit2DHeterogeneous {
41
43
cms::cuda::host::unique_ptr<float []> localCoordToHostAsync (cudaStream_t stream) const ;
42
44
cms::cuda::host::unique_ptr<uint32_t []> hitsModuleStartToHostAsync (cudaStream_t stream) const ;
43
45
46
+ // needs specialization for Host
47
+ void copyFromGPU (TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const * input, cudaStream_t stream);
48
+
44
49
private:
45
50
static constexpr uint32_t n16 = 4 ; // number of elements in m_store16
46
51
static constexpr uint32_t n32 = 10 ; // number of elements in m_store32
@@ -65,20 +70,27 @@ class TrackingRecHit2DHeterogeneous {
65
70
int16_t * m_iphi;
66
71
};
67
72
73
+ using TrackingRecHit2DGPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
74
+ using TrackingRecHit2DCUDA = TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits>;
75
+ using TrackingRecHit2DCPU = TrackingRecHit2DHeterogeneous<cms::cudacompat::CPUTraits>;
76
+ using TrackingRecHit2DHost = TrackingRecHit2DHeterogeneous<cms::cudacompat::HostTraits>;
77
+
68
78
#include " HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
69
79
#include " HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
70
80
71
81
template <typename Traits>
72
- TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nHits,
73
- pixelCPEforGPU::ParamsOnGPU const * cpeParams,
74
- uint32_t const * hitsModuleStart,
75
- cudaStream_t stream)
82
+ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(
83
+ uint32_t nHits,
84
+ pixelCPEforGPU::ParamsOnGPU const * cpeParams,
85
+ uint32_t const * hitsModuleStart,
86
+ cudaStream_t stream,
87
+ TrackingRecHit2DHeterogeneous<cms::cudacompat::GPUTraits> const * input)
76
88
: m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) {
77
89
auto view = Traits::template make_host_unique<TrackingRecHit2DSOAView>(stream);
78
90
79
91
view->m_nHits = nHits;
80
- m_view = Traits::template make_device_unique <TrackingRecHit2DSOAView>(stream);
81
- m_AverageGeometryStore = Traits::template make_device_unique <TrackingRecHit2DSOAView::AverageGeometry>(stream);
92
+ m_view = Traits::template make_unique <TrackingRecHit2DSOAView>(stream); // leave it on host and pass it by value?
93
+ m_AverageGeometryStore = Traits::template make_unique <TrackingRecHit2DSOAView::AverageGeometry>(stream);
82
94
view->m_averageGeometry = m_AverageGeometryStore.get ();
83
95
view->m_cpeParams = cpeParams;
84
96
view->m_hitsModuleStart = hitsModuleStart;
@@ -98,15 +110,21 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
98
110
// if ordering is relevant they may have to be stored phi-ordered by layer or so
99
111
// this will break 1to1 correspondence with cluster and module locality
100
112
// so unless proven VERY inefficient we keep it ordered as generated
101
- m_store16 = Traits::template make_device_unique<uint16_t []>(nHits * n16, stream);
102
- m_store32 =
103
- Traits::template make_device_unique<float []>(nHits * n32 + phase1PixelTopology::numberOfLayers + 1 , stream);
104
- m_PhiBinnerStore = Traits::template make_device_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
113
+
114
+ // host copy is "reduced" (to be reviewed at some point)
115
+ if constexpr (std::is_same<Traits, cms::cudacompat::HostTraits>::value) {
116
+ // it has to compile for ALL cases
117
+ copyFromGPU (input, stream);
118
+ } else {
119
+ assert (input == nullptr );
120
+ m_store16 = Traits::template make_unique<uint16_t []>(nHits * n16, stream);
121
+ m_store32 = Traits::template make_unique<float []>(nHits * n32 + phase1PixelTopology::numberOfLayers + 1 , stream);
122
+ m_PhiBinnerStore = Traits::template make_unique<TrackingRecHit2DSOAView::PhiBinner>(stream);
123
+ }
105
124
106
125
static_assert (sizeof (TrackingRecHit2DSOAView::hindex_type) == sizeof (float ));
107
126
static_assert (sizeof (TrackingRecHit2DSOAView::hindex_type) == sizeof (TrackingRecHit2DSOAView::PhiBinner::index_type));
108
127
109
- auto get16 = [&](int i) { return m_store16.get () + i * nHits; };
110
128
auto get32 = [&](int i) { return m_store32.get () + i * nHits; };
111
129
112
130
// copy all the pointers
@@ -118,20 +136,25 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
118
136
view->m_yl = get32 (1 );
119
137
view->m_xerr = get32 (2 );
120
138
view->m_yerr = get32 (3 );
139
+ view->m_chargeAndStatus = reinterpret_cast <uint32_t *>(get32 (4 ));
121
140
122
- view->m_xg = get32 (4 );
123
- view->m_yg = get32 (5 );
124
- view->m_zg = get32 (6 );
125
- view->m_rg = get32 (7 );
141
+ if constexpr (!std::is_same<Traits, cms::cudacompat::HostTraits>::value) {
142
+ assert (input == nullptr );
143
+ view->m_xg = get32 (5 );
144
+ view->m_yg = get32 (6 );
145
+ view->m_zg = get32 (7 );
146
+ view->m_rg = get32 (8 );
126
147
127
- m_iphi = view->m_iphi = reinterpret_cast <int16_t *>(get16 (0 ));
148
+ auto get16 = [&](int i) { return m_store16.get () + i * nHits; };
149
+ m_iphi = view->m_iphi = reinterpret_cast <int16_t *>(get16 (1 ));
128
150
129
- view->m_charge = reinterpret_cast <int32_t *>(get32 (8 ));
130
- view->m_xsize = reinterpret_cast <int16_t *>(get16 (2 ));
131
- view->m_ysize = reinterpret_cast <int16_t *>(get16 (3 ));
132
- view->m_detInd = get16 (1 );
151
+ view->m_xsize = reinterpret_cast <int16_t *>(get16 (2 ));
152
+ view->m_ysize = reinterpret_cast <int16_t *>(get16 (3 ));
153
+ view->m_detInd = get16 (0 );
133
154
134
- m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast <uint32_t *>(get32 (n32));
155
+ m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get ();
156
+ m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast <uint32_t *>(get32 (n32));
157
+ }
135
158
136
159
// transfer view
137
160
if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
0 commit comments