Skip to content

Commit a37530d

Browse files
VinInnfwyzard
authored andcommitted
Migrated PixelRecHit to Heterogeneous producer (#81)
Migrate PixelRecHit EDProducer to HeterogeneousEDProducer, including the cpu product. Data structures on gpu now include everything needed for Doublets, CA and fit. Layer splitting done: phi sorting (or partial sorting) requires #69. Includes some cleanup and bug fixes.
1 parent 6b9e36f commit a37530d

File tree

6 files changed

+116
-41
lines changed

6 files changed

+116
-41
lines changed

Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h

+9
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ namespace phase1PixelTopology {
2020

2121
constexpr uint32_t numPixsInModule = uint32_t(numRowsInModule)* uint32_t(numColsInModule);
2222

23+
constexpr uint32_t numberOfModules = 1856;
24+
25+
constexpr uint32_t layerStart[11] = {0,96,320,672,1184,1296,1408,1520,1632,1744,1856};
26+
constexpr char const * layerName[10] = {"BL1","BL2","BL3","BL4",
27+
"E+1", "E+2", "E+3",
28+
"E-1", "E-2", "E-3"
29+
};
30+
31+
2332
// this is for the ROC n<512 (upgrade 1024)
2433
constexpr inline
2534
uint16_t divu52(uint16_t n) {

RecoLocalTracker/Configuration/python/RecoLocalTracker_cff.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,23 @@
1717
striptrackerlocalreco = cms.Sequence(siStripZeroSuppression*siStripClusters*siStripMatchedRecHits)
1818
trackerlocalreco = cms.Sequence(pixeltrackerlocalreco*striptrackerlocalreco*clusterSummaryProducer)
1919

20+
2021
from RecoLocalTracker.SiPixelClusterizer.siPixelClustersHeterogeneous_cfi import *
2122
from RecoLocalTracker.SiPixelClusterizer.siPixelFedCablingMapGPUWrapper_cfi import *
2223
from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTGPU_cfi import *
2324

25+
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import *
26+
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneousConverter_cfi import siPixelRecHitHeterogeneousConverter as _siPixelRecHitHeterogeneousConverter
27+
gpu.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitHeterogeneousConverter.clone())
28+
29+
30+
2431
from Configuration.ProcessModifiers.gpu_cff import gpu
2532
_pixeltrackerlocalreco_gpu = pixeltrackerlocalreco.copy()
2633
_pixeltrackerlocalreco_gpu.replace(siPixelClustersPreSplitting, siPixelClustersHeterogeneous+siPixelClustersPreSplitting)
34+
_pixeltrackerlocalreco_gpu.replace(siPixelRecHitsPreSplitting, siPixelRecHitHeterogeneous+siPixelRecHitsPreSplitting)
2735
gpu.toReplaceWith(pixeltrackerlocalreco, _pixeltrackerlocalreco_gpu)
2836

29-
3037
from RecoLocalTracker.SiPhase2Clusterizer.phase2TrackerClusterizer_cfi import *
3138
from RecoLocalTracker.Phase2TrackerRecHits.Phase2StripCPEGeometricESProducer_cfi import *
3239

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu

+59-6
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,60 @@
1717
#include "gpuPixelRecHits.h"
1818

1919
namespace pixelgpudetails {
20-
PixelRecHitGPUKernel::PixelRecHitGPUKernel() {
20+
PixelRecHitGPUKernel::PixelRecHitGPUKernel(cuda::stream_t<>& cudaStream) {
21+
22+
cudaCheck(cudaMalloc((void**) & gpu_.bs_d,3*sizeof(float)));
2123
cudaCheck(cudaMalloc((void**) & gpu_.hitsModuleStart_d,(gpuClustering::MaxNumModules+1)*sizeof(uint32_t)));
24+
cudaCheck(cudaMalloc((void**) & gpu_.hitsLayerStart_d,(11)*sizeof(uint32_t)));
2225
cudaCheck(cudaMalloc((void**) & gpu_.charge_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
26+
cudaCheck(cudaMalloc((void**) & gpu_.detInd_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t)));
2327
cudaCheck(cudaMalloc((void**) & gpu_.xg_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
2428
cudaCheck(cudaMalloc((void**) & gpu_.yg_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
2529
cudaCheck(cudaMalloc((void**) & gpu_.zg_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
30+
cudaCheck(cudaMalloc((void**) & gpu_.rg_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
31+
cudaCheck(cudaMalloc((void**) & gpu_.xl_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
32+
cudaCheck(cudaMalloc((void**) & gpu_.yl_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
2633
cudaCheck(cudaMalloc((void**) & gpu_.xerr_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
2734
cudaCheck(cudaMalloc((void**) & gpu_.yerr_d,(gpuClustering::MaxNumModules*256)*sizeof(float)));
35+
cudaCheck(cudaMalloc((void**) & gpu_.iphi_d,(gpuClustering::MaxNumModules*256)*sizeof(int16_t)));
36+
cudaCheck(cudaMalloc((void**) & gpu_.sortIndex_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t)));
2837
cudaCheck(cudaMalloc((void**) & gpu_.mr_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t)));
38+
cudaCheck(cudaMalloc((void**) & gpu_.mc_d,(gpuClustering::MaxNumModules*256)*sizeof(uint16_t)));
39+
// cudaCheck(cudaMalloc((void**) & gpu_.hist_d, 10*sizeof(HitsOnGPU::Hist)));
40+
41+
cudaCheck(cudaMalloc((void**) & gpu_d, sizeof(HitsOnGPU)));
42+
cudaCheck(cudaMemcpyAsync(gpu_d, &gpu_, sizeof(HitsOnGPU), cudaMemcpyDefault,cudaStream.id()));
43+
2944
}
3045

3146
PixelRecHitGPUKernel::~PixelRecHitGPUKernel() {
3247
cudaCheck(cudaFree(gpu_.hitsModuleStart_d));
3348
cudaCheck(cudaFree(gpu_.charge_d));
49+
cudaCheck(cudaFree(gpu_.detInd_d));
3450
cudaCheck(cudaFree(gpu_.xg_d));
3551
cudaCheck(cudaFree(gpu_.yg_d));
3652
cudaCheck(cudaFree(gpu_.zg_d));
53+
cudaCheck(cudaFree(gpu_.rg_d));
54+
cudaCheck(cudaFree(gpu_.xl_d));
55+
cudaCheck(cudaFree(gpu_.yl_d));
3756
cudaCheck(cudaFree(gpu_.xerr_d));
3857
cudaCheck(cudaFree(gpu_.yerr_d));
58+
cudaCheck(cudaFree(gpu_.iphi_d));
59+
cudaCheck(cudaFree(gpu_.sortIndex_d));
3960
cudaCheck(cudaFree(gpu_.mr_d));
61+
cudaCheck(cudaFree(gpu_.mc_d));
62+
// cudaCheck(cudaFree(gpu_.hist_d));
63+
64+
cudaCheck(cudaFree(gpu_d));
4065
}
4166

4267
void PixelRecHitGPUKernel::makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
68+
float const * bs,
4369
pixelCPEforGPU::ParamsOnGPU const * cpeParams,
4470
cuda::stream_t<>& stream) {
71+
72+
cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3*sizeof(float), cudaMemcpyDefault, stream.id()));
73+
4574
thrust::exclusive_scan(thrust::cuda::par.on(stream.id()),
4675
input.clusInModule_d,
4776
input.clusInModule_d + gpuClustering::MaxNumModules + 1,
@@ -51,6 +80,7 @@ namespace pixelgpudetails {
5180
int blocks = input.nModules; // active modules (with digis)
5281
gpuPixelRecHits::getHits<<<blocks, threadsPerBlock, 0, stream.id()>>>(
5382
cpeParams,
83+
gpu_.bs_d,
5484
input.moduleInd_d,
5585
input.xx_d, input.yy_d, input.adc_d,
5686
input.moduleStart_d,
@@ -59,27 +89,50 @@ namespace pixelgpudetails {
5989
input.nDigis,
6090
gpu_.hitsModuleStart_d,
6191
gpu_.charge_d,
62-
gpu_.xg_d, gpu_.yg_d, gpu_.zg_d,
63-
gpu_.xerr_d, gpu_.yerr_d, gpu_.mr_d,
64-
true // for the time being stay local...
92+
gpu_.detInd_d,
93+
gpu_.xg_d, gpu_.yg_d, gpu_.zg_d, gpu_.rg_d,
94+
gpu_.iphi_d,
95+
gpu_.xl_d, gpu_.yl_d,
96+
gpu_.xerr_d, gpu_.yerr_d,
97+
gpu_.mr_d, gpu_.mc_d
6598
);
6699

67100
// needed only if hits on CPU are required...
68101
cudaCheck(cudaMemcpyAsync(hitsModuleStart_, gpu_.hitsModuleStart_d, (gpuClustering::MaxNumModules+1) * sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
102+
103+
// to be moved to gpu?
104+
auto nhits = hitsModuleStart_[gpuClustering::MaxNumModules];
105+
for (int i=0;i<10;++i) hitsLayerStart_[i]=hitsModuleStart_[phase1PixelTopology::layerStart[i]];
106+
hitsLayerStart_[10]=nhits;
107+
108+
std::cout << "hit layerStart ";
109+
for (int i=0;i<10;++i) std::cout << phase1PixelTopology::layerName[i] << ':' << hitsLayerStart_[i] << ' ';
110+
std::cout << "end:" << hitsLayerStart_[10] << std::endl;
111+
112+
cudaCheck(cudaMemcpyAsync(gpu_.hitsLayerStart_d, hitsLayerStart_, (11) * sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
113+
114+
// for timing test
115+
// radixSortMultiWrapper<int16_t><<<10, 256, 0, c.stream>>>(gpu_.iphi_d,gpu_.sortIndex_d,gpu_.hitsLayerStart_d);
116+
117+
// fillManyFromVector(gpu_.hist_d,10,gpu_.iphi_d, gpu_.hitsLayerStart_d, nhits,256,c.stream);
118+
119+
69120
}
70121

71122
HitsOnCPU PixelRecHitGPUKernel::getOutput(cuda::stream_t<>& stream) const {
72123
// needed only if hits on CPU are required...
73124
auto nhits = hitsModuleStart_[gpuClustering::MaxNumModules];
74125

75126
HitsOnCPU hoc(nhits);
127+
hoc.gpu_d = gpu_d;
76128
memcpy(hoc.hitsModuleStart, hitsModuleStart_, (gpuClustering::MaxNumModules+1) * sizeof(uint32_t));
77129
cudaCheck(cudaMemcpyAsync(hoc.charge.data(), gpu_.charge_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
78-
cudaCheck(cudaMemcpyAsync(hoc.xl.data(), gpu_.xg_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
79-
cudaCheck(cudaMemcpyAsync(hoc.yl.data(), gpu_.yg_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
130+
cudaCheck(cudaMemcpyAsync(hoc.xl.data(), gpu_.xl_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
131+
cudaCheck(cudaMemcpyAsync(hoc.yl.data(), gpu_.yl_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
80132
cudaCheck(cudaMemcpyAsync(hoc.xe.data(), gpu_.xerr_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
81133
cudaCheck(cudaMemcpyAsync(hoc.ye.data(), gpu_.yerr_d, nhits*sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
82134
cudaCheck(cudaMemcpyAsync(hoc.mr.data(), gpu_.mr_d, nhits*sizeof(uint16_t), cudaMemcpyDefault, stream.id()));
135+
cudaCheck(cudaMemcpyAsync(hoc.mc.data(), gpu_.mc_d, nhits*sizeof(uint16_t), cudaMemcpyDefault, stream.id()));
83136
cudaCheck(cudaStreamSynchronize(stream.id()));
84137
return hoc;
85138
}

RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h

+9-17
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,21 @@
99
#include <cstdint>
1010
#include <vector>
1111

12+
#include "RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h"
13+
14+
1215
namespace pixelCPEforGPU {
1316
struct ParamsOnGPU;
1417
}
1518

1619
namespace pixelgpudetails {
17-
struct HitsOnGPU{
18-
uint32_t * hitsModuleStart_d;
19-
int32_t * charge_d;
20-
float *xg_d, *yg_d, *zg_d;
21-
float *xerr_d, *yerr_d;
22-
uint16_t * mr_d;
23-
};
20+
using HitsOnGPU = siPixelRecHitsHeterogeneousProduct::HitsOnGPU;
2421

25-
struct HitsOnCPU {
26-
explicit HitsOnCPU(uint32_t nhits) :
27-
charge(nhits),xl(nhits),yl(nhits),xe(nhits),ye(nhits), mr(nhits){}
28-
uint32_t hitsModuleStart[2001];
29-
std::vector<int32_t> charge;
30-
std::vector<float> xl, yl;
31-
std::vector<float> xe, ye;
32-
std::vector<uint16_t> mr;
33-
};
22+
using HitsOnCPU = siPixelRecHitsHeterogeneousProduct::HitsOnCPU;
3423

3524
class PixelRecHitGPUKernel {
3625
public:
37-
PixelRecHitGPUKernel();
26+
PixelRecHitGPUKernel(cuda::stream_t<>& cudaStream);
3827
~PixelRecHitGPUKernel();
3928

4029
PixelRecHitGPUKernel(const PixelRecHitGPUKernel&) = delete;
@@ -43,14 +32,17 @@ namespace pixelgpudetails {
4332
PixelRecHitGPUKernel& operator=(PixelRecHitGPUKernel&&) = delete;
4433

4534
void makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
35+
float const * bs,
4636
pixelCPEforGPU::ParamsOnGPU const * cpeParams,
4737
cuda::stream_t<>& stream);
4838

4939
HitsOnCPU getOutput(cuda::stream_t<>& stream) const;
5040

5141
private:
42+
HitsOnGPU * gpu_d; // copy of the structure on the gpu itself: this is the "Product"
5243
HitsOnGPU gpu_;
5344
uint32_t hitsModuleStart_[gpuClustering::MaxNumModules+1];
45+
uint32_t hitsLayerStart_[11];
5446
};
5547
}
5648

RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h

+31-14
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
#include <cstdio>
77
#include <limits>
88

9+
#include "DataFormats/Math/interface/approx_atan2.h"
910
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
1011

1112
namespace gpuPixelRecHits {
1213

14+
15+
16+
1317
// to be moved in common namespace...
1418
constexpr uint16_t InvId=9999; // must be > MaxNumModules
1519

@@ -20,20 +24,23 @@ namespace gpuPixelRecHits {
2024

2125

2226
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * cpeParams,
27+
float const * bs,
2328
uint16_t const * id,
2429
uint16_t const * x,
2530
uint16_t const * y,
2631
uint16_t const * adc,
2732
uint32_t const * digiModuleStart,
2833
uint32_t const * clusInModule,
2934
uint32_t const * moduleId,
30-
int32_t const * clus,
35+
int32_t const * clus,
3136
int numElements,
3237
uint32_t const * hitsModuleStart,
3338
int32_t * chargeh,
34-
float * xh, float * yh, float * zh,
35-
float * xe, float * ye, uint16_t * mr,
36-
bool local) // if true fill just x & y in local coord
39+
uint16_t * detInd,
40+
float * xg, float * yg, float * zg, float * rg, int16_t * iph,
41+
float * xl, float * yl,
42+
float * xe, float * ye,
43+
uint16_t * mr, uint16_t * mc)
3744
{
3845
// as usual one block per module
3946
__shared__ ClusParams clusParams;
@@ -108,16 +115,26 @@ namespace gpuPixelRecHits {
108115

109116
chargeh[h] = clusParams.charge[ic];
110117

111-
if (local) {
112-
xh[h] = clusParams.xpos[ic];
113-
yh[h] = clusParams.ypos[ic];
114-
} else {
115-
cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic], clusParams.ypos[ic],
116-
xh[h], yh[h], zh[h] );
117-
}
118-
xe[h] = clusParams.xerr[ic];
119-
ye[h] = clusParams.yerr[ic];
120-
mr[h] = clusParams.minRow[ic];
118+
detInd[h] = me;
119+
120+
xl[h]= clusParams.xpos[ic];
121+
yl[h]= clusParams.ypos[ic];
122+
123+
xe[h]= clusParams.xerr[ic];
124+
ye[h]= clusParams.yerr[ic];
125+
mr[h]= clusParams.minRow[ic];
126+
mc[h]= clusParams.minCol[ic];
127+
128+
// to global and compute phi...
129+
cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
130+
// here correct for the beamspot...
131+
xg[h]-=bs[0];
132+
yg[h]-=bs[1];
133+
zg[h]-=bs[2];
134+
135+
rg[h] = std::sqrt(xg[h]*xg[h]+yg[h]*yg[h]);
136+
iph[h] = unsafe_atan2s<7>(yg[h],xg[h]);
137+
121138
}
122139

123140
}

RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py

-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
VerboseLevel = cms.untracked.int32(0),
88
)
99

10-
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHeterogeneous_cfi import siPixelRecHitHeterogeneous as _siPixelRecHitHeterogeneous
11-
gpu.toReplaceWith(siPixelRecHits, _siPixelRecHitHeterogeneous)
12-
1310
siPixelRecHitsPreSplitting = siPixelRecHits.clone(
1411
src = 'siPixelClustersPreSplitting'
1512
)

0 commit comments

Comments
 (0)