17
17
#include " gpuPixelRecHits.h"
18
18
19
19
namespace pixelgpudetails {
20
- PixelRecHitGPUKernel::PixelRecHitGPUKernel () {
20
+ PixelRecHitGPUKernel::PixelRecHitGPUKernel (cuda::stream_t <>& cudaStream) {
21
+
22
+ cudaCheck (cudaMalloc ((void **) & gpu_.bs_d ,3 *sizeof (float )));
21
23
cudaCheck (cudaMalloc ((void **) & gpu_.hitsModuleStart_d ,(gpuClustering::MaxNumModules+1 )*sizeof (uint32_t )));
24
+ cudaCheck (cudaMalloc ((void **) & gpu_.hitsLayerStart_d ,(11 )*sizeof (uint32_t )));
22
25
cudaCheck (cudaMalloc ((void **) & gpu_.charge_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
26
+ cudaCheck (cudaMalloc ((void **) & gpu_.detInd_d ,(gpuClustering::MaxNumModules*256 )*sizeof (uint16_t )));
23
27
cudaCheck (cudaMalloc ((void **) & gpu_.xg_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
24
28
cudaCheck (cudaMalloc ((void **) & gpu_.yg_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
25
29
cudaCheck (cudaMalloc ((void **) & gpu_.zg_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
30
+ cudaCheck (cudaMalloc ((void **) & gpu_.rg_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
31
+ cudaCheck (cudaMalloc ((void **) & gpu_.xl_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
32
+ cudaCheck (cudaMalloc ((void **) & gpu_.yl_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
26
33
cudaCheck (cudaMalloc ((void **) & gpu_.xerr_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
27
34
cudaCheck (cudaMalloc ((void **) & gpu_.yerr_d ,(gpuClustering::MaxNumModules*256 )*sizeof (float )));
35
+ cudaCheck (cudaMalloc ((void **) & gpu_.iphi_d ,(gpuClustering::MaxNumModules*256 )*sizeof (int16_t )));
36
+ cudaCheck (cudaMalloc ((void **) & gpu_.sortIndex_d ,(gpuClustering::MaxNumModules*256 )*sizeof (uint16_t )));
28
37
cudaCheck (cudaMalloc ((void **) & gpu_.mr_d ,(gpuClustering::MaxNumModules*256 )*sizeof (uint16_t )));
38
+ cudaCheck (cudaMalloc ((void **) & gpu_.mc_d ,(gpuClustering::MaxNumModules*256 )*sizeof (uint16_t )));
39
+ // cudaCheck(cudaMalloc((void**) & gpu_.hist_d, 10*sizeof(HitsOnGPU::Hist)));
40
+
41
+ cudaCheck (cudaMalloc ((void **) & gpu_d, sizeof (HitsOnGPU)));
42
+ cudaCheck (cudaMemcpyAsync (gpu_d, &gpu_, sizeof (HitsOnGPU), cudaMemcpyDefault,cudaStream.id ()));
43
+
29
44
}
30
45
31
46
PixelRecHitGPUKernel::~PixelRecHitGPUKernel () {
32
47
cudaCheck (cudaFree (gpu_.hitsModuleStart_d ));
33
48
cudaCheck (cudaFree (gpu_.charge_d ));
49
+ cudaCheck (cudaFree (gpu_.detInd_d ));
34
50
cudaCheck (cudaFree (gpu_.xg_d ));
35
51
cudaCheck (cudaFree (gpu_.yg_d ));
36
52
cudaCheck (cudaFree (gpu_.zg_d ));
53
+ cudaCheck (cudaFree (gpu_.rg_d ));
54
+ cudaCheck (cudaFree (gpu_.xl_d ));
55
+ cudaCheck (cudaFree (gpu_.yl_d ));
37
56
cudaCheck (cudaFree (gpu_.xerr_d ));
38
57
cudaCheck (cudaFree (gpu_.yerr_d ));
58
+ cudaCheck (cudaFree (gpu_.iphi_d ));
59
+ cudaCheck (cudaFree (gpu_.sortIndex_d ));
39
60
cudaCheck (cudaFree (gpu_.mr_d ));
61
+ cudaCheck (cudaFree (gpu_.mc_d ));
62
+ // cudaCheck(cudaFree(gpu_.hist_d));
63
+
64
+ cudaCheck (cudaFree (gpu_d));
40
65
}
41
66
42
67
void PixelRecHitGPUKernel::makeHitsAsync (const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
68
+ float const * bs,
43
69
pixelCPEforGPU::ParamsOnGPU const * cpeParams,
44
70
cuda::stream_t <>& stream) {
71
+
72
+ cudaCheck (cudaMemcpyAsync (gpu_.bs_d , bs, 3 *sizeof (float ), cudaMemcpyDefault, stream.id ()));
73
+
45
74
thrust::exclusive_scan (thrust::cuda::par.on (stream.id ()),
46
75
input.clusInModule_d ,
47
76
input.clusInModule_d + gpuClustering::MaxNumModules + 1 ,
@@ -51,6 +80,7 @@ namespace pixelgpudetails {
51
80
int blocks = input.nModules ; // active modules (with digis)
52
81
gpuPixelRecHits::getHits<<<blocks, threadsPerBlock, 0 , stream.id()>>> (
53
82
cpeParams,
83
+ gpu_.bs_d ,
54
84
input.moduleInd_d ,
55
85
input.xx_d , input.yy_d , input.adc_d ,
56
86
input.moduleStart_d ,
@@ -59,27 +89,50 @@ namespace pixelgpudetails {
59
89
input.nDigis ,
60
90
gpu_.hitsModuleStart_d ,
61
91
gpu_.charge_d ,
62
- gpu_.xg_d , gpu_.yg_d , gpu_.zg_d ,
63
- gpu_.xerr_d , gpu_.yerr_d , gpu_.mr_d ,
64
- true // for the time being stay local...
92
+ gpu_.detInd_d ,
93
+ gpu_.xg_d , gpu_.yg_d , gpu_.zg_d , gpu_.rg_d ,
94
+ gpu_.iphi_d ,
95
+ gpu_.xl_d , gpu_.yl_d ,
96
+ gpu_.xerr_d , gpu_.yerr_d ,
97
+ gpu_.mr_d , gpu_.mc_d
65
98
);
66
99
67
100
// needed only if hits on CPU are required...
68
101
cudaCheck (cudaMemcpyAsync (hitsModuleStart_, gpu_.hitsModuleStart_d , (gpuClustering::MaxNumModules+1 ) * sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
102
+
103
+ // to be moved to gpu?
104
+ auto nhits = hitsModuleStart_[gpuClustering::MaxNumModules];
105
+ for (int i=0 ;i<10 ;++i) hitsLayerStart_[i]=hitsModuleStart_[phase1PixelTopology::layerStart[i]];
106
+ hitsLayerStart_[10 ]=nhits;
107
+
108
+ std::cout << " hit layerStart " ;
109
+ for (int i=0 ;i<10 ;++i) std::cout << phase1PixelTopology::layerName[i] << ' :' << hitsLayerStart_[i] << ' ' ;
110
+ std::cout << " end:" << hitsLayerStart_[10 ] << std::endl;
111
+
112
+ cudaCheck (cudaMemcpyAsync (gpu_.hitsLayerStart_d , hitsLayerStart_, (11 ) * sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
113
+
114
+ // for timing test
115
+ // radixSortMultiWrapper<int16_t><<<10, 256, 0, c.stream>>>(gpu_.iphi_d,gpu_.sortIndex_d,gpu_.hitsLayerStart_d);
116
+
117
+ // fillManyFromVector(gpu_.hist_d,10,gpu_.iphi_d, gpu_.hitsLayerStart_d, nhits,256,c.stream);
118
+
119
+
69
120
}
70
121
71
122
HitsOnCPU PixelRecHitGPUKernel::getOutput (cuda::stream_t <>& stream) const {
72
123
// needed only if hits on CPU are required...
73
124
auto nhits = hitsModuleStart_[gpuClustering::MaxNumModules];
74
125
75
126
HitsOnCPU hoc (nhits);
127
+ hoc.gpu_d = gpu_d;
76
128
memcpy (hoc.hitsModuleStart , hitsModuleStart_, (gpuClustering::MaxNumModules+1 ) * sizeof (uint32_t ));
77
129
cudaCheck (cudaMemcpyAsync (hoc.charge .data (), gpu_.charge_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
78
- cudaCheck (cudaMemcpyAsync (hoc.xl .data (), gpu_.xg_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
79
- cudaCheck (cudaMemcpyAsync (hoc.yl .data (), gpu_.yg_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
130
+ cudaCheck (cudaMemcpyAsync (hoc.xl .data (), gpu_.xl_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
131
+ cudaCheck (cudaMemcpyAsync (hoc.yl .data (), gpu_.yl_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
80
132
cudaCheck (cudaMemcpyAsync (hoc.xe .data (), gpu_.xerr_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
81
133
cudaCheck (cudaMemcpyAsync (hoc.ye .data (), gpu_.yerr_d , nhits*sizeof (uint32_t ), cudaMemcpyDefault, stream.id ()));
82
134
cudaCheck (cudaMemcpyAsync (hoc.mr .data (), gpu_.mr_d , nhits*sizeof (uint16_t ), cudaMemcpyDefault, stream.id ()));
135
+ cudaCheck (cudaMemcpyAsync (hoc.mc .data (), gpu_.mc_d , nhits*sizeof (uint16_t ), cudaMemcpyDefault, stream.id ()));
83
136
cudaCheck (cudaStreamSynchronize (stream.id ()));
84
137
return hoc;
85
138
}
0 commit comments