Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 219ff60

Browse files
authoredDec 21, 2020
Use CMS_UNROLL_LOOP instead of #pragma unroll in ECAL code (#597)
1 parent 72676e6 commit 219ff60

File tree

3 files changed

+28
-25
lines changed

3 files changed

+28
-25
lines changed
 

‎RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.cu

+4-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
1111
#include "DataFormats/Math/interface/approx_exp.h"
1212
#include "DataFormats/Math/interface/approx_log.h"
13+
#include "FWCore/Utilities/interface/CMSUnrollLoop.h"
1314

1415
#include "AmplitudeComputationCommonKernels.h"
1516
#include "KernelHelpers.h"
@@ -128,7 +129,7 @@ namespace ecal {
128129

129130
// non-divergent branch (except for the last 4 threads)
130131
if (threadIdx.x <= blockDim.x - 5) {
131-
#pragma unroll
132+
CMS_UNROLL_LOOP
132133
for (int i = 0; i < 5; i++)
133134
shr_counts[threadIdx.x] += shr_hasSwitchToGain0[threadIdx.x + i];
134135
}
@@ -263,7 +264,7 @@ namespace ecal {
263264

264265
// check if samples before sample_max have true
265266
bool saturated_before_max = false;
266-
#pragma unroll
267+
CMS_UNROLL_LOOP
267268
for (char ii = 0; ii < 5; ii++)
268269
saturated_before_max = saturated_before_max || shr_hasSwitchToGain0[chStart + ii];
269270

@@ -397,7 +398,7 @@ namespace ecal {
397398
noise_value += rms_x12[hashedId] * rms_x12[hashedId] * pedestal * G12SamplesCorrelation[vidx];
398399
// non-divergent branch
399400
if (!dynamicPedestal && addPedestalUncertainty > 0.f) {
400-
noise_value += addPedestalUncertainty * addPedestalUncertainty * pedestal; // gainratio is 1
401+
noise_value += addPedestalUncertainty * addPedestalUncertainty * pedestal; // gainratio is 1
401402
}
402403

403404
//

‎RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu

+18-17
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
1010
#include "DataFormats/Math/interface/approx_exp.h"
1111
#include "DataFormats/Math/interface/approx_log.h"
12+
#include "FWCore/Utilities/interface/CMSUnrollLoop.h"
1213

1314
#include "AmplitudeComputationCommonKernels.h"
1415
#include "AmplitudeComputationKernels.h"
@@ -24,7 +25,7 @@ namespace ecal {
2425
constexpr int nsamples = SampleVector::RowsAtCompileTime;
2526
constexpr int npulses = BXVectorType::RowsAtCompileTime;
2627

27-
#pragma unroll
28+
CMS_UNROLL_LOOP
2829
for (unsigned int ipulse = 0; ipulse < npulses; ipulse++) {
2930
auto const amplitude = amplitudes.coeff(ipulse);
3031
if (amplitude == 0)
@@ -116,12 +117,12 @@ namespace ecal {
116117
int npassive = 0;
117118

118119
calo::multifit::ColumnVector<NPULSES, int> pulseOffsets;
119-
#pragma unroll
120+
CMS_UNROLL_LOOP
120121
for (int i = 0; i < NPULSES; ++i)
121122
pulseOffsets(i) = i;
122123

123124
calo::multifit::ColumnVector<NPULSES, DataType> resultAmplitudes;
124-
#pragma unroll
125+
CMS_UNROLL_LOOP
125126
for (int counter = 0; counter < NPULSES; counter++)
126127
resultAmplitudes(counter) = 0;
127128

@@ -141,12 +142,12 @@ namespace ecal {
141142
DataType* covMatrixStorage = shrMatrixLForFnnlsStorage;
142143
calo::multifit::MapSymM<DataType, NSAMPLES> covMatrix{covMatrixStorage};
143144
int counter = 0;
144-
#pragma unroll
145-
for (int col = 0; col < NSAMPLES; col++)
146-
#pragma unroll
145+
CMS_UNROLL_LOOP
146+
for (int col = 0; col < NSAMPLES; col++) {
147+
CMS_UNROLL_LOOP
147148
for (int row = col; row < NSAMPLES; row++)
148149
covMatrixStorage[counter++] = __ldg(&noisecov[idx].coeffRef(row, col));
149-
150+
}
150151
update_covariance(pulse_covariance[hashedId], covMatrix, resultAmplitudes);
151152

152153
// compute actual covariance decomposition
@@ -169,36 +170,36 @@ namespace ecal {
169170
calo::multifit::MapSymM<DataType, NPULSES> AtA{shrAtAStorage};
170171
//SampleMatrix AtA;
171172
SampleVector Atb;
172-
#pragma unroll
173+
CMS_UNROLL_LOOP
173174
for (int icol = 0; icol < NPULSES; icol++) {
174175
float reg_ai[NSAMPLES];
175176

176-
// load column icol
177-
#pragma unroll
177+
// load column icol
178+
CMS_UNROLL_LOOP
178179
for (int counter = 0; counter < NSAMPLES; counter++)
179180
reg_ai[counter] = A(counter, icol);
180181

181182
// compute diagoanl
182183
float sum = 0.f;
183-
#pragma unroll
184+
CMS_UNROLL_LOOP
184185
for (int counter = 0; counter < NSAMPLES; counter++)
185186
sum += reg_ai[counter] * reg_ai[counter];
186187

187188
// store
188189
AtA(icol, icol) = sum;
189190

190-
// go thru the other columns
191-
#pragma unroll
191+
// go thru the other columns
192+
CMS_UNROLL_LOOP
192193
for (int j = icol + 1; j < NPULSES; j++) {
193194
// load column j
194195
float reg_aj[NSAMPLES];
195-
#pragma unroll
196+
CMS_UNROLL_LOOP
196197
for (int counter = 0; counter < NSAMPLES; counter++)
197198
reg_aj[counter] = A(counter, j);
198199

199200
// accum
200201
float sum = 0.f;
201-
#pragma unroll
202+
CMS_UNROLL_LOOP
202203
for (int counter = 0; counter < NSAMPLES; counter++)
203204
sum += reg_aj[counter] * reg_ai[counter];
204205

@@ -209,7 +210,7 @@ namespace ecal {
209210

210211
// Atb accum
211212
float sum_atb = 0.f;
212-
#pragma unroll
213+
CMS_UNROLL_LOOP
213214
for (int counter = 0; counter < NSAMPLES; counter++)
214215
sum_atb += reg_ai[counter] * reg_b[counter];
215216

@@ -251,7 +252,7 @@ namespace ecal {
251252
chi2s[inputCh] = chi2;
252253
energies[inputCh] = resultAmplitudes(5);
253254

254-
#pragma unroll
255+
CMS_UNROLL_LOOP
255256
for (int counter = 0; counter < NPULSES; counter++)
256257
amplitudes[inputCh](counter) = resultAmplitudes(counter);
257258
}

‎RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.cu

+6-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
88
#include "DataFormats/Math/interface/approx_exp.h"
99
#include "DataFormats/Math/interface/approx_log.h"
10+
#include "FWCore/Utilities/interface/CMSUnrollLoop.h"
1011

1112
#include "Common.h"
1213
#include "TimeComputationKernels.h"
@@ -263,7 +264,7 @@ namespace ecal {
263264
if (ratio_step == 1 && ratio_value >= l_timeFitLimits_first && ratio_value <= l_timeFitLimits_second) {
264265
const auto time_max_i = static_cast<ScalarType>(ratio_index);
265266
auto u = timeFitParameters[timeFitParameters_size - 1];
266-
#pragma unroll
267+
CMS_UNROLL_LOOP
267268
for (int k = timeFitParameters_size - 2; k >= 0; k--)
268269
u = u * ratio_value + timeFitParameters[k];
269270

@@ -365,7 +366,7 @@ namespace ecal {
365366
// TODO validate/check
366367
char iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
367368
bool oddElements = nthreads_per_channel % 2;
368-
#pragma unroll
369+
CMS_UNROLL_LOOP
369370
while (iter >= 1) {
370371
if (ltx < iter)
371372
// for odd ns, the last guy will just store itself
@@ -410,7 +411,7 @@ namespace ecal {
410411
// reduce to compute time_max and time_wgt
411412
iter = nthreads_per_channel / 2 + nthreads_per_channel % 2;
412413
oddElements = nthreads_per_channel % 2;
413-
#pragma unroll
414+
CMS_UNROLL_LOOP
414415
while (iter >= 1) {
415416
if (ltx < iter) {
416417
shr_time_wgt[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0)
@@ -893,8 +894,8 @@ namespace ecal {
893894
sample_value = (static_cast<SampleVector::Scalar>(adc) - mean_x6[hashedId]) * gain12Over6[hashedId];
894895
sample_value_error = rms_x6[hashedId] * gain12Over6[hashedId];
895896
} else if (gainId == 3) {
896-
sample_value = (static_cast<SampleVector::Scalar>(adc) - mean_x1[hashedId]) * gain6Over1[hashedId] *
897-
gain12Over6[hashedId];
897+
sample_value =
898+
(static_cast<SampleVector::Scalar>(adc) - mean_x1[hashedId]) * gain6Over1[hashedId] * gain12Over6[hashedId];
898899
sample_value_error = rms_x1[hashedId] * gain6Over1[hashedId] * gain12Over6[hashedId];
899900
} else {
900901
sample_value = 0;

0 commit comments

Comments
 (0)
Please sign in to comment.