From aa002f52e3521574f56e525c9754cb16633b4e08 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 12 Oct 2022 17:40:05 +0200 Subject: [PATCH 1/3] Skip invalid or corrupted ROCs --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 670d5a9131b32..85933624d776c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -353,8 +353,9 @@ namespace pixelgpudetails { } uint32_t link = sipixelconstants::getLink(ww); // Extract link - uint32_t roc = sipixelconstants::getROC(ww); // Extract Roc in link + uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + uint32_t rawId = detId.rawId; uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); @@ -364,9 +365,13 @@ namespace pixelgpudetails { continue; } - uint32_t rawId = detId.rawId; - uint32_t rocIdInDetUnit = detId.rocInDet; - bool barrel = isBarrel(rawId); + // check for spurious channels + if (roc > MAX_ROC or link > MAX_LINK) { + if (debug) { + printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex); + } + continue; + } uint32_t index = fedId * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; if (useQualityInfo) { @@ -381,6 +386,7 @@ namespace pixelgpudetails { uint32_t layer = 0; int side = 0, panel = 0, module = 0; + bool barrel = isBarrel(rawId); if (barrel) { layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; @@ -425,7 +431,7 @@ namespace pixelgpudetails { } } - pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, detId.rocInDet, localPix); xx[gIndex] = globalPix.row; // origin shifting by 1 0-159 yy[gIndex] = globalPix.col; // origin shifting by 1 0-415 adc[gIndex] = sipixelconstants::getADC(ww); From b03bc2d627ca018607300908a2b6fb0a28f0562e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 12 Oct 2022 17:46:26 +0200 Subject: [PATCH 2/3] Make if (debug) a compile-time check --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 117 ++++++++++-------- 1 file changed, 66 insertions(+), 51 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 85933624d776c..d424adbfaac48 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -120,36 +120,37 @@ namespace pixelgpudetails { } // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - __device__ uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { + template + __device__ uint8_t conversionError(uint8_t fedId, uint8_t status) { uint8_t errorType = 0; switch (status) { case (1): { - if (debug) + if constexpr (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); errorType = 35; break; } case (2): { - if (debug) + if constexpr (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); errorType = 36; break; } case (3): { - if (debug) + if constexpr (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); errorType = 37; break; } case (4): { - if (debug) + if constexpr (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); errorType = 38; break; } default: - if (debug) + if constexpr (debug) printf("Cabling check returned unexpected result, status = %i\n", status); }; @@ -164,11 +165,9 @@ namespace pixelgpudetails { __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - __device__ uint8_t checkROC(uint32_t errorWord, - uint8_t fedId, - uint32_t link, - const SiPixelROCsStatusAndMapping *cablingMap, - bool debug = false) { + template + __device__ uint8_t + checkROC(uint32_t errorWord, uint8_t fedId, uint32_t link, const SiPixelROCsStatusAndMapping *cablingMap) { uint8_t errorType = (errorWord >> sipixelconstants::ROC_shift) & sipixelconstants::ERROR_mask; if (errorType < 25) return 0; @@ -182,47 +181,48 @@ namespace pixelgpudetails { if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false; } - if (debug and errorFound) - printf("Invalid ROC = 25 found (errorType = 25)\n"); + if constexpr (debug) + if (errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); break; } case (26): { - if (debug) + if constexpr (debug) printf("Gap word found (errorType = 26)\n"); errorFound = true; break; } case (27): { - if (debug) + if constexpr (debug) printf("Dummy word found (errorType = 27)\n"); errorFound = true; break; } case (28): { - if (debug) + if constexpr (debug) printf("Error fifo nearly full (errorType = 28)\n"); errorFound = true; break; } case (29): { - if (debug) + if constexpr (debug) printf("Timeout on a channel (errorType = 29)\n"); if ((errorWord >> sipixelconstants::OMIT_ERR_shift) & sipixelconstants::OMIT_ERR_mask) { - if (debug) + if constexpr (debug) printf("...first errorType=29 error, this gets masked out\n"); } errorFound = true; break; } case (30): { - if (debug) + if constexpr (debug) printf("TBM error trailer (errorType = 30)\n"); int stateMatch_bits = 4; int stateMatch_shift = 8; uint32_t stateMatch_mask = ~(~uint32_t(0) << stateMatch_bits); int stateMatch = (errorWord >> stateMatch_shift) & stateMatch_mask; if (stateMatch != 1 && stateMatch != 8) { - if (debug) + if constexpr (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); } if (stateMatch == 1) @@ -231,7 +231,7 @@ namespace pixelgpudetails { break; } case (31): { - if (debug) + if constexpr (debug) printf("Event number error (errorType = 31)\n"); errorFound = true; break; @@ -244,11 +244,9 @@ namespace pixelgpudetails { } // error decoding and handling copied from EventFilter/SiPixelRawToDigi/src/ErrorChecker.cc - __device__ uint32_t getErrRawID(uint8_t fedId, - uint32_t errWord, - uint32_t errorType, - const SiPixelROCsStatusAndMapping *cablingMap, - bool debug = false) { + template + __device__ uint32_t + getErrRawID(uint8_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelROCsStatusAndMapping *cablingMap) { uint32_t rID = 0xffffffff; switch (errorType) { @@ -314,6 +312,7 @@ namespace pixelgpudetails { } // Kernel to perform Raw to Digi conversion + template __global__ void RawToDigi_kernel(const SiPixelROCsStatusAndMapping *cablingMap, const unsigned char *modToUnp, const uint32_t wordCounter, @@ -327,8 +326,7 @@ namespace pixelgpudetails { uint16_t *moduleId, cms::cuda::SimpleVector *err, bool useQualityInfo, - bool includeErrors, - bool debug) { + bool includeErrors) { //if (threadIdx.x==0) printf("Event: %u blockIdx.x: %u start: %u end: %u\n", eventno, blockIdx.x, begin, end); int32_t first = threadIdx.x + blockIdx.x * blockDim.x; @@ -357,17 +355,17 @@ namespace pixelgpudetails { pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); uint32_t rawId = detId.rawId; - uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + uint8_t errorType = checkROC(ww, fedId, link, cablingMap); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); if (includeErrors and skipROC) { - uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap); err->push_back(SiPixelErrorCompact{rID, ww, errorType, fedId}); continue; } // check for spurious channels if (roc > MAX_ROC or link > MAX_LINK) { - if (debug) { + if constexpr (debug) { printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex); } continue; @@ -407,9 +405,9 @@ namespace pixelgpudetails { localPix.col = col; if (includeErrors) { if (not rocRowColIsValid(row, col)) { - uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + uint8_t error = conversionError(fedId, 3); //use the device function and fill the arrays err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); - if (debug) + if constexpr (debug) printf("BPIX1 Error status: %i\n", error); continue; } @@ -423,9 +421,9 @@ namespace pixelgpudetails { localPix.row = row; localPix.col = col; if (includeErrors and not dcolIsValid(dcol, pxid)) { - uint8_t error = conversionError(fedId, 3, debug); + uint8_t error = conversionError(fedId, 3); err->push_back(SiPixelErrorCompact{rawId, ww, error, fedId}); - if (debug) + if constexpr (debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); continue; } @@ -441,6 +439,7 @@ namespace pixelgpudetails { } // end of loop (gIndex < end) } // end of Raw to Digi kernel + template __global__ void fillHitsModuleStart(uint32_t const *__restrict__ clusInModule, uint32_t *__restrict__ moduleStart, @@ -569,22 +568,38 @@ namespace pixelgpudetails { fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream)); // Launch rawToDigi kernel - RawToDigi_kernel<<>>( - cablingMap, - modToUnp, - wordCounter, - word_d.get(), - fedId_d.get(), - digis_d.view().xx(), - digis_d.view().yy(), - digis_d.view().adc(), - digis_d.view().pdigi(), - digis_d.view().rawIdArr(), - digis_d.view().moduleInd(), - digiErrors_d.error(), // returns nullptr if default-constructed - useQualityInfo, - includeErrors, - debug); + if (debug) + RawToDigi_kernel<<>>( // + cablingMap, + modToUnp, + wordCounter, + word_d.get(), + fedId_d.get(), + digis_d.view().xx(), + digis_d.view().yy(), + digis_d.view().adc(), + digis_d.view().pdigi(), + digis_d.view().rawIdArr(), + digis_d.view().moduleInd(), + digiErrors_d.error(), // returns nullptr if default-constructed + useQualityInfo, + includeErrors); + else + RawToDigi_kernel<<>>( // + cablingMap, + modToUnp, + wordCounter, + word_d.get(), + fedId_d.get(), + digis_d.view().xx(), + digis_d.view().yy(), + digis_d.view().adc(), + digis_d.view().pdigi(), + digis_d.view().rawIdArr(), + digis_d.view().moduleInd(), + digiErrors_d.error(), // returns nullptr if default-constructed + useQualityInfo, + includeErrors); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaCheck(cudaStreamSynchronize(stream)); From 85fea7e73e74fbddf9eb9dc7d60b2416e4e0ea6b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 12 Oct 2022 18:41:14 +0200 Subject: [PATCH 3/3] Print the correct detector id for spurious ROCs --- .../plugins/SiPixelRawToClusterGPUKernel.cu | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index d424adbfaac48..48dfa98839d36 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -352,8 +352,6 @@ namespace pixelgpudetails { uint32_t link = sipixelconstants::getLink(ww); // Extract link uint32_t roc = sipixelconstants::getROC(ww); // Extract ROC in link - pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); - uint32_t rawId = detId.rawId; uint8_t errorType = checkROC(ww, fedId, link, cablingMap); skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); @@ -366,7 +364,11 @@ namespace pixelgpudetails { // check for spurious channels if (roc > MAX_ROC or link > MAX_LINK) { if constexpr (debug) { - printf("spurious roc %d found on link %d, detector %d (index %d)\n", roc, link, rawId, gIndex); + printf("spurious roc %d found on link %d, detector %d (index %d)\n", + roc, + link, + getRawId(cablingMap, fedId, link, 1).rawId, + gIndex); } continue; } @@ -381,9 +383,10 @@ namespace pixelgpudetails { if (skipROC) continue; + pixelgpudetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + uint32_t rawId = detId.rawId; uint32_t layer = 0; int side = 0, panel = 0, module = 0; - bool barrel = isBarrel(rawId); if (barrel) { layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask;