From 87aaae822fd84f02704777a65a32a579a94ac31c Mon Sep 17 00:00:00 2001 From: Adriano Di Florio Date: Wed, 15 Nov 2023 17:44:55 +0100 Subject: [PATCH 1/2] Port the pixel local reconstruction to Alpaka Co-authored-by: Andrea Bocci Co-authored-by: Breno Orzari Co-authored-by: Dimitris Papagiannis --- .../SiPixelGainCalibrationForHLTSoARcd.h | 14 + .../interface/SiPixelMappingSoARecord.h | 17 + .../src/SiPixelGainCalibrationForHLTSoARcd.cc | 5 + .../Records/src/SiPixelMappingSoARcd.cc | 5 + .../SiPixelESProducers/plugins/BuildFile.xml | 9 +- .../alpaka/SiPixelCablingSoAESProducer.cc | 140 +++ ...PixelGainCalibrationForHLTSoAESProducer.cc | 128 +++ CondFormats/SiPixelObjects/BuildFile.xml | 4 + .../SiPixelGainCalibrationForHLTHost.h | 9 + .../SiPixelGainCalibrationForHLTLayout.h | 42 + .../interface/SiPixelMappingHost.h | 10 + .../interface/SiPixelMappingLayout.h | 24 + .../SiPixelGainCalibrationForHLTDevice.h | 13 + .../SiPixelGainCalibrationForHLTUtilities.h | 41 + .../interface/alpaka/SiPixelMappingDevice.h | 17 + .../alpaka/SiPixelMappingUtilities.h | 53 ++ ...tSetup_SiPixelGainCalibrationForHLTHost.cc | 4 + .../src/T_EventSetup_SiPixelMappingHost.cc | 4 + ...etup_SiPixelGainCalibrationForHLTDevice.cc | 4 + .../T_EventSetup_SiPixelMappingDevice.cc | 4 + DataFormats/SiPixelClusterSoA/BuildFile.xml | 8 + .../interface/ClusteringConstants.h | 35 + .../interface/SiPixelClustersDevice.h | 38 + .../interface/SiPixelClustersHost.h | 33 + .../interface/SiPixelClustersSoA.h | 16 + .../alpaka/SiPixelClustersSoACollection.h | 35 + .../src/alpaka/classes_cuda.h | 8 + .../src/alpaka/classes_cuda_def.xml | 6 + .../src/alpaka/classes_rocm.h | 8 + .../src/alpaka/classes_rocm_def.xml | 6 + DataFormats/SiPixelClusterSoA/src/classes.cc | 4 + DataFormats/SiPixelClusterSoA/src/classes.h | 7 + .../SiPixelClusterSoA/src/classes_def.xml | 10 + .../SiPixelClusterSoA/test/BuildFile.xml | 6 + .../test/alpaka/Clusters_test.cc | 45 + .../test/alpaka/Clusters_test.dev.cc | 49 ++ DataFormats/SiPixelDigiSoA/BuildFile.xml | 11 + .../interface/SiPixelDigiErrorsDevice.h | 33 + .../interface/SiPixelDigiErrorsHost.h | 30 + .../interface/SiPixelDigiErrorsSoA.h | 14 + .../interface/SiPixelDigisDevice.h | 37 + .../interface/SiPixelDigisHost.h | 30 + .../interface/SiPixelDigisSoA.h | 19 + .../alpaka/SiPixelDigiErrorsSoACollection.h | 39 + .../alpaka/SiPixelDigisSoACollection.h | 36 + .../SiPixelDigiSoA/src/alpaka/classes_cuda.h | 12 + .../src/alpaka/classes_cuda_def.xml | 15 + .../SiPixelDigiSoA/src/alpaka/classes_rocm.h | 13 + .../src/alpaka/classes_rocm_def.xml | 15 + DataFormats/SiPixelDigiSoA/src/classes.cc | 6 + DataFormats/SiPixelDigiSoA/src/classes.h | 10 + .../SiPixelDigiSoA/src/classes_def.xml | 17 + DataFormats/SiPixelDigiSoA/test/BuildFile.xml | 11 + .../test/alpaka/DigiErrors_test.cc | 54 ++ .../test/alpaka/DigiErrors_test.dev.cc | 50 ++ .../SiPixelDigiSoA/test/alpaka/Digis_test.cc | 48 ++ .../test/alpaka/Digis_test.dev.cc | 49 ++ DataFormats/SiPixelRawData/src/classes.h | 11 +- .../SiPixelRawData/src/classes_def.xml | 21 +- DataFormats/TrackingRecHitSoA/BuildFile.xml | 12 + .../interface/SiPixelHitStatus.h | 20 + .../interface/TrackingRecHitsDevice.h | 44 + .../interface/TrackingRecHitsHost.h | 43 + .../interface/TrackingRecHitsSoA.h | 55 ++ .../alpaka/TrackingRecHitsSoACollection.h | 46 + .../src/alpaka/classes_cuda.h | 12 + .../src/alpaka/classes_cuda_def.xml | 16 + .../src/alpaka/classes_rocm.h | 12 + .../src/alpaka/classes_rocm_def.xml | 17 + DataFormats/TrackingRecHitSoA/src/classes.cc | 7 + DataFormats/TrackingRecHitSoA/src/classes.h | 11 + .../TrackingRecHitSoA/src/classes_def.xml | 34 + .../TrackingRecHitSoA/test/BuildFile.xml | 6 + .../test/alpaka/Hits_test.cc | 47 ++ .../test/alpaka/Hits_test.dev.cc | 65 ++ .../SiPixelRawToDigi/plugins/BuildFile.xml | 1 + .../plugins/SiPixelDigiErrorsFromSoAAlpaka.cc | 130 +++ .../python/siPixelDigis_cff.py | 6 + .../AlpakaInterface/interface/workdivision.h | 204 +++++ .../interface/PixelCPEFastParamsRecord.h | 27 + .../Records/src/PixelCPEFastParamsRecord.cc | 5 + .../SiPixelClusterThresholds.h | 10 +- .../SiPixelClusterizer/plugins/BuildFile.xml | 21 +- .../plugins/SiPixelDigisClustersFromSoA.cc | 20 +- .../SiPixelDigisClustersFromSoAAlpaka.cc | 240 ++++++ .../plugins/SiPixelPhase2DigiToClusterCUDA.cc | 20 +- .../plugins/SiPixelRawToClusterCUDA.cc | 4 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 7 +- .../plugins/SiPixelRawToClusterGPUKernel.h | 23 +- .../plugins/alpaka/CalibPixel.h | 136 +++ .../plugins/alpaka/ClusterChargeCut.h | 207 +++++ .../plugins/alpaka/PixelClustering.h | 454 ++++++++++ .../alpaka/SiPixelPhase2DigiToCluster.cc | 158 ++++ .../plugins/alpaka/SiPixelRawToCluster.cc | 289 +++++++ .../alpaka/SiPixelRawToClusterKernel.dev.cc | 799 ++++++++++++++++++ .../alpaka/SiPixelRawToClusterKernel.h | 199 +++++ .../plugins/gpuCalibPixel.h | 7 +- .../plugins/gpuClusterChargeCut.h | 4 +- .../python/siPixelClustersPreSplitting_cff.py | 94 ++- .../SiPixelClusterizer/test/gpuClustering_t.h | 9 +- RecoLocalTracker/SiPixelRecHits/BuildFile.xml | 5 + .../interface/PixelCPEFastParamsDevice.h | 43 + .../interface/PixelCPEFastParamsHost.h | 66 ++ .../interface/PixelCPEGenericBase.h | 3 +- .../alpaka/PixelCPEFastParamsCollection.h | 40 + .../interface/pixelCPEforDevice.h | 433 ++++++++++ .../SiPixelRecHits/plugins/BuildFile.xml | 21 +- .../plugins/SiPixelRecHitFromSoAAlpaka.cc | 187 ++++ .../PixelCPEFastParamsESProducerAlpaka.cc | 120 +++ .../plugins/alpaka/PixelRecHitKernel.h | 45 + .../plugins/alpaka/PixelRecHitKernels.dev.cc | 143 ++++ .../plugins/alpaka/PixelRecHits.h | 240 ++++++ .../plugins/alpaka/SiPixelRecHitAlpaka.cc | 100 +++ .../python/PixelCPEESProducers_cff.py | 8 + .../python/SiPixelRecHits_cfi.py | 50 +- .../src/ES_PixelCPEFastParams.cc | 9 + .../SiPixelRecHits/src/PixelCPEFastParams.cc | 9 + .../src/PixelCPEFastParamsHost.cc | 482 +++++++++++ .../src/alpaka/ES_PixelCPEFastParams.cc | 5 + 119 files changed, 6898 insertions(+), 74 deletions(-) create mode 100644 CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h create mode 100644 CalibTracker/Records/interface/SiPixelMappingSoARecord.h create mode 100644 CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc create mode 100644 CalibTracker/Records/src/SiPixelMappingSoARcd.cc create mode 100644 CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc create mode 100644 CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h create mode 100644 CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h create mode 100644 CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h create mode 100644 CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h create mode 100644 CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h create mode 100644 CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h create mode 100644 CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc create mode 100644 CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc create mode 100644 CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc create mode 100644 CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc create mode 100644 DataFormats/SiPixelClusterSoA/BuildFile.xml create mode 100644 DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h create mode 100644 DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h create mode 100644 DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h create mode 100644 DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h create mode 100644 DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h create mode 100644 DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h create mode 100644 DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml create mode 100644 DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h create mode 100644 DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml create mode 100644 DataFormats/SiPixelClusterSoA/src/classes.cc create mode 100644 DataFormats/SiPixelClusterSoA/src/classes.h create mode 100644 DataFormats/SiPixelClusterSoA/src/classes_def.xml create mode 100644 DataFormats/SiPixelClusterSoA/test/BuildFile.xml create mode 100644 DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc create mode 100644 DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc create mode 100644 DataFormats/SiPixelDigiSoA/BuildFile.xml create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h create mode 100644 DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h create mode 100644 DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h create mode 100644 DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml create mode 100644 DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h create mode 100644 DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml create mode 100644 DataFormats/SiPixelDigiSoA/src/classes.cc create mode 100644 DataFormats/SiPixelDigiSoA/src/classes.h create mode 100644 DataFormats/SiPixelDigiSoA/src/classes_def.xml create mode 100644 DataFormats/SiPixelDigiSoA/test/BuildFile.xml create mode 100644 DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc create mode 100644 DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc create mode 100644 DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc create mode 100644 DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc create mode 100644 DataFormats/TrackingRecHitSoA/BuildFile.xml create mode 100644 DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h create mode 100644 DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h create mode 100644 DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h create mode 100644 DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h create mode 100644 DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h create mode 100644 DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h create mode 100644 DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml create mode 100644 DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h create mode 100644 DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml create mode 100644 DataFormats/TrackingRecHitSoA/src/classes.cc create mode 100644 DataFormats/TrackingRecHitSoA/src/classes.h create mode 100644 DataFormats/TrackingRecHitSoA/src/classes_def.xml create mode 100644 DataFormats/TrackingRecHitSoA/test/BuildFile.xml create mode 100644 DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc create mode 100644 DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc create mode 100644 RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h create mode 100644 RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc rename RecoLocalTracker/SiPixelClusterizer/{plugins => interface}/SiPixelClusterThresholds.h (82%) create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc create mode 100644 RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h create mode 100644 RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc create mode 100644 RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h new file mode 100644 index 0000000000000..f0f2e5f5103ab --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h @@ -0,0 +1,14 @@ +#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h +#define CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h + +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelGainCalibrationForHLTSoARcd + : public edm::eventsetup::DependentRecordImplementation< + SiPixelGainCalibrationForHLTSoARcd, + edm::mpl::Vector> {}; + +#endif // CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h diff --git a/CalibTracker/Records/interface/SiPixelMappingSoARecord.h b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h new file mode 100644 index 0000000000000..d8c31754cd8d9 --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h @@ -0,0 +1,17 @@ +#ifndef CalibTracker_Records_interface_SiPixelMappingSoARecord_h +#define CalibTracker_Records_interface_SiPixelMappingSoARecord_h + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelMappingSoARecord + : public edm::eventsetup::DependentRecordImplementation> {}; + +#endif // CalibTracker_Records_interface_SiPixelMappingSoARecord_h diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc new file mode 100644 index 0000000000000..6634cee007301 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTSoARcd); diff --git a/CalibTracker/Records/src/SiPixelMappingSoARcd.cc b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc new file mode 100644 index 0000000000000..fea2c978c1539 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelMappingSoARecord); diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 05446593b6229..8de546ff8856b 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,4 +1,3 @@ - @@ -11,6 +10,14 @@ + + + + + + + + diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc new file mode 100644 index 0000000000000..37f4bc6bd5945 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc @@ -0,0 +1,140 @@ +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + + class SiPixelCablingSoAESProducer : public ESProducer { + public: + SiPixelCablingSoAESProducer(edm::ParameterSet const& iConfig) + : ESProducer(iConfig), useQuality_(iConfig.getParameter("UseQualityInfo")) { + auto cc = setWhatProduced(this); + cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); + if (useQuality_) { + qualityToken_ = cc.consumes(); + } + geometryToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UseQualityInfo", false); + descriptions.addWithDefaultLabel(desc); + } + + std::optional produce(const SiPixelMappingSoARecord& iRecord) { + auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); + const SiPixelQuality* quality = nullptr; + if (useQuality_) { + auto qualityInfo = iRecord.getTransientHandle(qualityToken_); + quality = qualityInfo.product(); + } + + auto geom = iRecord.getTransientHandle(geometryToken_); + SiPixelMappingHost product(pixelgpudetails::MAX_SIZE, cms::alpakatools::host()); + std::vector const& fedIds = cablingMap->fedIds(); + std::unique_ptr const& cabling = cablingMap->cablingTree(); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back(); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + auto mapView = product.view(); + + mapView.hasQuality() = useQuality_; + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + mapView[index].fed() = fed; + mapView[index].link() = link; + mapView[index].roc() = roc; + if (pixelRoc != nullptr) { + mapView[index].rawId() = pixelRoc->rawId(); + mapView[index].rocInDet() = pixelRoc->idInDetUnit(); + mapView[index].modToUnpDefault() = false; + if (quality != nullptr) + mapView[index].badRocs() = quality->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); + else + mapView[index].badRocs() = false; + } else { // store some dummy number + mapView[index].rawId() = pixelClustering::invalidModuleId; + mapView[index].rocInDet() = pixelClustering::invalidModuleId; + mapView[index].badRocs() = true; + mapView[index].modToUnpDefault() = true; + } + index++; + } + } + } // end of FED loop + // Given FedId, Link and idinLnk; use the following formula + // to get the rawId and idinDU + // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; + // where, MAX_LINK = 48, MAX_ROC = 8 + // FedID varies between 1200 to 1338 (In total 108 FED's) + // Link varies between 1 to 48 + // idinLnk varies between 1 to 8 + + auto trackerGeom = iRecord.getTransientHandle(geometryToken_); + + for (int i = 1; i < index; i++) { + if (mapView[i].rawId() == pixelClustering::invalidModuleId) { + mapView[i].moduleId() = pixelClustering::invalidModuleId; + } else { + auto gdet = trackerGeom->idToDetUnit(mapView[i].rawId()); + if (!gdet) { + LogDebug("SiPixelCablingSoAESProducer") << " Not found: " << mapView[i].rawId() << std::endl; + continue; + } + mapView[i].moduleId() = gdet->index(); + } + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + LogDebug("SiPixelCablingSoAESProducer") << i << std::setw(20) << mapView[i].fed() << std::setw(20) + << mapView[i].link() << std::setw(20) << mapView[i].roc() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].rawId() << std::setw(20) << mapView[i].rocInDet() << std::setw(20) + << mapView[i].moduleId() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].badRocs() << std::setw(20) << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + } + + mapView.size() = index - 1; + + return product; + } + + private: + edm::ESGetToken cablingMapToken_; + edm::ESGetToken qualityToken_; + edm::ESGetToken geometryToken_; + const bool useQuality_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelCablingSoAESProducer); diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc new file mode 100644 index 0000000000000..935d141793a40 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc @@ -0,0 +1,128 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelGainCalibrationForHLTSoAESProducer : public ESProducer { + public: + explicit SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const SiPixelGainCalibrationForHLTSoARcd& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken gainsToken_; + edm::ESGetToken geometryToken_; + }; + + SiPixelGainCalibrationForHLTSoAESProducer::SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig) + : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + gainsToken_ = cc.consumes(); + geometryToken_ = cc.consumes(); + } + + void SiPixelGainCalibrationForHLTSoAESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr SiPixelGainCalibrationForHLTSoAESProducer::produce( + const SiPixelGainCalibrationForHLTSoARcd& iRecord) { + auto const& gains = iRecord.get(gainsToken_); + auto const& geom = iRecord.get(geometryToken_); + + auto product = std::make_unique(gains.data().size(), cms::alpakatools::host()); + + // bizzarre logic (looking for fist strip-det) don't ask + auto const& dus = geom.detUnits(); + unsigned int n_detectors = dus.size(); + for (unsigned int i = 1; i < 7; ++i) { + const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) { + if (n_detectors > offset) + n_detectors = offset; + } + } + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n' + << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(siPixelGainsSoA::DecodingStructure); + + for (size_t i = 0; i < gains.data().size(); i = i + 2) { + product->view().v_pedestals()[i / 2].gain = gains.data()[i]; + product->view().v_pedestals()[i / 2].ped = gains.data()[i + 1]; + } + + //std::copy here + // do not read back from the (possibly write-combined) memory buffer + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); + auto minGain = gains.getGainLow(); + auto maxGain = gains.getGainHigh(); + auto nBinsToUseForEncoding = 253; + + // we will simplify later (not everything is needed....) + product->view().minPed() = minPed; + product->view().maxPed() = maxPed; + product->view().minGain() = minGain; + product->view().maxGain() = maxGain; + + product->view().numberOfRowsAveragedOver() = 80; + product->view().nBinsToUseForEncoding() = nBinsToUseForEncoding; + product->view().deadFlag() = 255; + product->view().noisyFlag() = 254; + + product->view().pedPrecision() = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + product->view().gainPrecision() = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "precisions g " << product->view().pedPrecision() << ' ' << product->view().gainPrecision(); + + // fill the index map + auto const& ind = gains.getIndexes(); + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind.size() << " " << n_detectors; + + for (auto i = 0U; i < n_detectors; ++i) { + auto p = std::lower_bound( + ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert(p != ind.end() && p->detid == dus[i]->geographicalId()); + assert(p->iend <= gains.data().size()); + assert(p->iend >= p->ibegin); + assert(0 == p->ibegin % 2); + assert(0 == p->iend % 2); + assert(p->ibegin != p->iend); + assert(p->ncols > 0); + + product->view().modStarts()[i] = p->ibegin; + product->view().modEnds()[i] = p->iend; + product->view().modCols()[i] = p->ncols; + + if (ind[i].detid != dus[i]->geographicalId()) + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind[i].detid << "!=" << dus[i]->geographicalId(); + } + + return product; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelGainCalibrationForHLTSoAESProducer); diff --git a/CondFormats/SiPixelObjects/BuildFile.xml b/CondFormats/SiPixelObjects/BuildFile.xml index 1d9b8d6b19f53..ddd87c956d217 100644 --- a/CondFormats/SiPixelObjects/BuildFile.xml +++ b/CondFormats/SiPixelObjects/BuildFile.xml @@ -1,3 +1,4 @@ + @@ -12,6 +13,9 @@ + + + diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h new file mode 100644 index 0000000000000..28361ab184073 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h @@ -0,0 +1,9 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h +#define CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +using SiPixelGainCalibrationForHLTHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h new file mode 100644 index 0000000000000..03c1c37c61046 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h @@ -0,0 +1,42 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +namespace siPixelGainsSoA { + struct DecodingStructure { + uint8_t gain; + uint8_t ped; + }; + + using Ranges = std::array; + using Cols = std::array; +} // namespace siPixelGainsSoA + +GENERATE_SOA_LAYOUT(SiPixelGainCalibrationForHLTLayout, + SOA_COLUMN(siPixelGainsSoA::DecodingStructure, v_pedestals), + + SOA_SCALAR(siPixelGainsSoA::Ranges, modStarts), + SOA_SCALAR(siPixelGainsSoA::Ranges, modEnds), + SOA_SCALAR(siPixelGainsSoA::Cols, modCols), + + SOA_SCALAR(float, minPed), + SOA_SCALAR(float, maxPed), + SOA_SCALAR(float, minGain), + SOA_SCALAR(float, maxGain), + SOA_SCALAR(float, pedPrecision), + SOA_SCALAR(float, gainPrecision), + + SOA_SCALAR(unsigned int, numberOfRowsAveragedOver), + SOA_SCALAR(unsigned int, nBinsToUseForEncoding), + SOA_SCALAR(unsigned int, deadFlag), + SOA_SCALAR(unsigned int, noisyFlag), + SOA_SCALAR(float, link)) + +using SiPixelGainCalibrationForHLTSoA = SiPixelGainCalibrationForHLTLayout<>; +using SiPixelGainCalibrationForHLTSoAView = SiPixelGainCalibrationForHLTSoA::View; +using SiPixelGainCalibrationForHLTSoAConstView = SiPixelGainCalibrationForHLTSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h new file mode 100644 index 0000000000000..772a7a97e267b --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h @@ -0,0 +1,10 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelMappingHost_h +#define CondFormats_SiPixelObjects_SiPixelMappingHost_h + +#include +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" + +using SiPixelMappingHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelMappingHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h new file mode 100644 index 0000000000000..ef123d443c795 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h @@ -0,0 +1,24 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" + +GENERATE_SOA_LAYOUT(SiPixelMappingLayout, + SOA_COLUMN(unsigned int, fed), + SOA_COLUMN(unsigned int, link), + SOA_COLUMN(unsigned int, roc), + SOA_COLUMN(unsigned int, rawId), + SOA_COLUMN(unsigned int, rocInDet), + SOA_COLUMN(unsigned int, moduleId), + SOA_COLUMN(bool, badRocs), + SOA_COLUMN(unsigned char, modToUnpDefault), + SOA_SCALAR(unsigned int, size), + SOA_SCALAR(bool, hasQuality)) + +using SiPixelMappingSoA = SiPixelMappingLayout<>; +using SiPixelMappingSoAView = SiPixelMappingSoA::View; +using SiPixelMappingSoAConstView = SiPixelMappingSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h new file mode 100644 index 0000000000000..3c5e7094654c6 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h @@ -0,0 +1,13 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h + +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelGainCalibrationForHLTDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h new file mode 100644 index 0000000000000..1fbce15dbe231 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h @@ -0,0 +1,41 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +struct SiPixelGainUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static std::pair getPedAndGain( + const SiPixelGainCalibrationForHLTSoAConstView& view, + uint32_t moduleInd, + int col, + int row, + bool& isDeadColumn, + bool& isNoisyColumn) { + auto start = view.modStarts()[moduleInd]; + auto end = view.modEnds()[moduleInd]; + auto nCols = view.modCols()[moduleInd]; + // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X + unsigned int lengthOfColumnData = (end - start) / nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int numberOfDataBlocksToSkip = row / view.numberOfRowsAveragedOver(); + + auto offset = start + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip; + assert(offset < end); + assert(offset < 3088384); + assert(0 == offset % 2); + + auto lp = view.v_pedestals(); + auto s = lp[offset / 2]; + + isDeadColumn = (s.ped & 0xFF) == view.deadFlag(); + isNoisyColumn = (s.ped & 0xFF) == view.noisyFlag(); + float decodeGain = float(s.gain & 0xFF) * view.gainPrecision() + view.minGain(); + float decodePed = float(s.ped & 0xFF) * view.pedPrecision() + view.minPed(); + + return std::make_pair(decodePed, decodeGain); + }; +}; + +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h new file mode 100644 index 0000000000000..8a16caa0d7368 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h @@ -0,0 +1,17 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelMappingDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // DataFormats_SiPixelMappingSoA_alpaka_SiPixelClustersDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h new file mode 100644 index 0000000000000..800cf0ac671cd --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h @@ -0,0 +1,53 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + struct SiPixelMappingUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static bool hasQuality(const SiPixelMappingSoAConstView& view) { + return view.hasQuality(); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static cms::alpakatools::device_buffer + getModToUnpRegionalAsync(std::set const& modules, + const SiPixelFedCablingTree* cabling, + std::vector const& fedIds, + Queue& queue) { + auto modToUnpDevice = cms::alpakatools::make_device_buffer(queue, pixelgpudetails::MAX_SIZE); + auto modToUnpHost = cms::alpakatools::make_host_buffer(queue, pixelgpudetails::MAX_SIZE); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back() - 1; + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + if (pixelRoc != nullptr) { + modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); + } else { // store some dummy number + modToUnpHost[index] = true; + } + index++; + } + } + } + + alpaka::memcpy(queue, modToUnpDevice, modToUnpHost); + + return modToUnpDevice; + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc new file mode 100644 index 0000000000000..be54c23dd8df6 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTHost); diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc new file mode 100644 index 0000000000000..27201b65add22 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelMappingHost); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc new file mode 100644 index 0000000000000..fec7ca3ba1c52 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelGainCalibrationForHLTDevice); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc new file mode 100644 index 0000000000000..0b86fdf64978b --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelMappingDevice); diff --git a/DataFormats/SiPixelClusterSoA/BuildFile.xml b/DataFormats/SiPixelClusterSoA/BuildFile.xml new file mode 100644 index 0000000000000..c9b7e4ef81817 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h new file mode 100644 index 0000000000000..6726c1d29d5c9 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h +#define DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h + +#include +#include + +//TODO: move this to TrackerTraits! +namespace pixelClustering { +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxHitsInIter() { return 64; } +#else + // optimized for real data PU 50 + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxHitsInIter() { return 160; } //TODO better tuning for PU 140-200 +#endif + constexpr uint32_t maxHitsInModule() { return 1024; } + + constexpr uint16_t clusterThresholdLayerOne = 2000; + constexpr uint16_t clusterThresholdOtherLayers = 4000; + + constexpr uint16_t clusterThresholdPhase2LayerOne = 4000; + constexpr uint16_t clusterThresholdPhase2OtherLayers = 4000; + + constexpr uint32_t maxNumDigis = 3 * 256 * 1024; // @PU=200 µ=530k σ=50k this is >4σ away + constexpr uint16_t maxNumModules = 4000; + + constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + constexpr int invalidClusterId = -9999; + static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules + +} // namespace pixelClustering + +#endif // DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h new file mode 100644 index 0000000000000..2593475bf5c3a --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h @@ -0,0 +1,38 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +template +class SiPixelClustersDevice : public PortableDeviceCollection { +public: + SiPixelClustersDevice() = default; + + template + explicit SiPixelClustersDevice(size_t maxModules, TQueue queue) + : PortableDeviceCollection(maxModules + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelClustersDevice(size_t maxModules, TDev const &device) + : PortableDeviceCollection(maxModules + 1, device) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h new file mode 100644 index 0000000000000..eb086160a6188 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelClustersHost : public PortableHostCollection { +public: + SiPixelClustersHost() = default; + + template + explicit SiPixelClustersHost(size_t maxModules, TQueue queue) + : PortableHostCollection(maxModules + 1, queue) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h new file mode 100644 index 0000000000000..c44c0148662ff --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h @@ -0,0 +1,16 @@ +#ifndef DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h +#define DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelClustersLayout, + SOA_COLUMN(uint32_t, moduleStart), + SOA_COLUMN(uint32_t, clusInModule), + SOA_COLUMN(uint32_t, moduleId), + SOA_COLUMN(uint32_t, clusModuleStart)) + +using SiPixelClustersSoA = SiPixelClustersLayout<>; +using SiPixelClustersSoAView = SiPixelClustersSoA::View; +using SiPixelClustersSoAConstView = SiPixelClustersSoA::ConstView; + +#endif // DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h diff --git a/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h new file mode 100644 index 0000000000000..c5e35475b5330 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h +#define DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using SiPixelClustersSoACollection = + std::conditional_t, SiPixelClustersHost, SiPixelClustersDevice>; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue &queue, SiPixelClustersDevice const &srcData) { + SiPixelClustersHost dstData(srcData->metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNClusters(srcData.nClusters(), srcData.offsetBPIX2()); +#ifdef GPU_DEBUG //keeping this untiil copies are in the Tracer + printf("SiPixelClustersSoACollection: I'm copying to host.\n"); +#endif + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelClustersSoACollection, SiPixelClustersHost); +#endif // DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..e54864699fb73 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..b9858c3fbffdd --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..bd510fa1618b0 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..d27887904579c --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/classes.cc b/DataFormats/SiPixelClusterSoA/src/classes.cc new file mode 100644 index 0000000000000..70b4f7b100cb4 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes.cc @@ -0,0 +1,4 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); \ No newline at end of file diff --git a/DataFormats/SiPixelClusterSoA/src/classes.h b/DataFormats/SiPixelClusterSoA/src/classes.h new file mode 100644 index 0000000000000..8514c7732375b --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes.h @@ -0,0 +1,7 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_classes_h +#define DataFormats_SiPixelClusterSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelClusterSoA/src/classes_def.xml b/DataFormats/SiPixelClusterSoA/src/classes_def.xml new file mode 100644 index 0000000000000..96b9df2725473 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/BuildFile.xml b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..ed54aae76ecab --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc new file mode 100644 index 0000000000000..d96469858b916 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc @@ -0,0 +1,45 @@ +#include + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testClusterSoA { + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelClustersSoACollection clusters_d(100, queue); + testClusterSoA::runKernels(clusters_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelClustersHost clusters_h(clusters_d.view().metadata().size(), queue); + + std::cout << clusters_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, clusters_h.buffer(), clusters_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc new file mode 100644 index 0000000000000..684380dcbdfbc --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testClusterSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAView clust_view) const { + for (int32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + clust_view[j].moduleStart() = j; + clust_view[j].clusInModule() = j * 2; + clust_view[j].moduleId() = j * 3; + clust_view[j].clusModuleStart() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAConstView clust_view) const { + for (uint32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + assert(clust_view[j].moduleStart() == j); + assert(clust_view[j].clusInModule() == j * 2); + assert(clust_view[j].moduleId() == j * 3); + assert(clust_view[j].clusModuleStart() == j * 4); + } + } + }; + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(clust_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, clust_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, clust_view); + } + + } // namespace testClusterSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigiSoA/BuildFile.xml b/DataFormats/SiPixelDigiSoA/BuildFile.xml new file mode 100644 index 0000000000000..538802f92c3ca --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h new file mode 100644 index 0000000000000..36c7d0be7e88a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigiErrorsDevice : public PortableDeviceCollection { +public: + SiPixelDigiErrorsDevice() = default; + template + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TDev const& device) + : PortableDeviceCollection(maxFedWords, device) {} + + auto& error_data() const { return (*this->view().pixelErrors()); } + auto maxFedWords() const { return maxFedWords_; } + +private: + int maxFedWords_; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h new file mode 100644 index 0000000000000..ac706dea4b544 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +class SiPixelDigiErrorsHost : public PortableHostCollection { +public: + SiPixelDigiErrorsHost() = default; + template + explicit SiPixelDigiErrorsHost(int maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + int maxFedWords() const { return maxFedWords_; } + + auto& error_data() { return (*view().pixelErrors()); } + auto const& error_data() const { return (*view().pixelErrors()); } + +private: + int maxFedWords_ = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h new file mode 100644 index 0000000000000..b6398bc840c5b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +GENERATE_SOA_LAYOUT(SiPixelDigiErrorsLayout, SOA_COLUMN(SiPixelErrorCompact, pixelErrors), SOA_SCALAR(uint32_t, size)) + +using SiPixelDigiErrorsSoA = SiPixelDigiErrorsLayout<>; +using SiPixelDigiErrorsSoAView = SiPixelDigiErrorsSoA::View; +using SiPixelDigiErrorsSoAConstView = SiPixelDigiErrorsSoA::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h new file mode 100644 index 0000000000000..1748069685923 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h @@ -0,0 +1,37 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigisDevice : public PortableDeviceCollection { +public: + SiPixelDigisDevice() = default; + template + explicit SiPixelDigisDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigisDevice(size_t maxFedWords, TDev const &device) + : PortableDeviceCollection(maxFedWords + 1, device) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h new file mode 100644 index 0000000000000..4e4650efac1cb --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +// TODO: The class is created via inheritance of the PortableDeviceCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelDigisHost : public PortableHostCollection { +public: + SiPixelDigisHost() = default; + template + explicit SiPixelDigisHost(size_t maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords + 1, queue) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h new file mode 100644 index 0000000000000..2c7c5e1079513 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h @@ -0,0 +1,19 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelDigisLayout, + SOA_COLUMN(int32_t, clus), + SOA_COLUMN(uint32_t, pdigi), + SOA_COLUMN(uint32_t, rawIdArr), + SOA_COLUMN(uint16_t, adc), + SOA_COLUMN(uint16_t, xx), + SOA_COLUMN(uint16_t, yy), + SOA_COLUMN(uint16_t, moduleId)) + +using SiPixelDigisSoA = SiPixelDigisLayout<>; +using SiPixelDigisSoAView = SiPixelDigisSoA::View; +using SiPixelDigisSoAConstView = SiPixelDigisSoA::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h new file mode 100644 index 0000000000000..673a22bd23a1e --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h @@ -0,0 +1,39 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelDigiErrorsSoACollection = + std::conditional_t, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, SiPixelDigiErrorsDevice const& srcData) { + SiPixelDigiErrorsHost dstData(srcData.maxFedWords(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); +#ifdef GPU_DEBUG + printf("SiPixelDigiErrorsSoACollection: I'm copying to host.\n"); +#endif + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelDigiErrorsSoACollection, SiPixelDigiErrorsHost); + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h new file mode 100644 index 0000000000000..2fe60454d553f --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h @@ -0,0 +1,36 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelDigisSoACollection = + std::conditional_t, SiPixelDigisHost, SiPixelDigisDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue &queue, SiPixelDigisDevice const &srcData) { + SiPixelDigisHost dstData(srcData.view().metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNModulesDigis(srcData.nModules(), srcData.nDigis()); + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelDigisSoACollection, SiPixelDigisHost); + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..d2fb20448545c --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..7315bc37eeb1b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..db5bf9385f99d --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..21deb7bbd46dc --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/classes.cc b/DataFormats/SiPixelDigiSoA/src/classes.cc new file mode 100644 index 0000000000000..9022a3102107e --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes.cc @@ -0,0 +1,6 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); diff --git a/DataFormats/SiPixelDigiSoA/src/classes.h b/DataFormats/SiPixelDigiSoA/src/classes.h new file mode 100644 index 0000000000000..427a4c972863d --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_SiPixelDigisSoA_src_classes_h +#define DataFormats_SiPixelDigisSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelDigiSoA/src/classes_def.xml b/DataFormats/SiPixelDigiSoA/src/classes_def.xml new file mode 100644 index 0000000000000..c68be4a01bf5a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes_def.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/BuildFile.xml b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..b4bd8297f5011 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc new file mode 100644 index 0000000000000..4703e68630f35 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc @@ -0,0 +1,54 @@ +#include +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigiErrorsSoACollection digiErrors_d(1000, queue); + testDigisSoA::runKernels(digiErrors_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigiErrorsHost digiErrors_h(digiErrors_d.view().metadata().size(), queue); + alpaka::memcpy(queue, digiErrors_h.buffer(), digiErrors_d.const_buffer()); + std::cout << "digiErrors_h.view().metadata().size(): " << digiErrors_h.view().metadata().size() << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().rawId: " << digiErrors_h.view()[100].pixelErrors().rawId + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().word: " << digiErrors_h.view()[100].pixelErrors().word + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().errorType: " + << digiErrors_h.view()[100].pixelErrors().errorType << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().fedId: " << digiErrors_h.view()[100].pixelErrors().fedId + << std::endl; + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc new file mode 100644 index 0000000000000..c7add92dab018 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc @@ -0,0 +1,50 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + digiErrors_view[j].pixelErrors().rawId = j; + digiErrors_view[j].pixelErrors().word = j; + digiErrors_view[j].pixelErrors().errorType = j; + digiErrors_view[j].pixelErrors().fedId = j; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAConstView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + assert(digiErrors_view[j].pixelErrors().rawId == j); + assert(digiErrors_view[j].pixelErrors().word == j); + assert(digiErrors_view[j].pixelErrors().errorType == j % 256); + assert(digiErrors_view[j].pixelErrors().fedId == j % 256); + } + } + }; + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digiErrors_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digiErrors_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digiErrors_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc new file mode 100644 index 0000000000000..f1d9ce9cd2b37 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigisSoAView digis_view, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigisSoACollection digis_d(1000, queue); + testDigisSoA::runKernels(digis_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigisHost digis_h(digis_d.view().metadata().size(), queue); + + std::cout << digis_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, digis_h.buffer(), digis_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc new file mode 100644 index 0000000000000..9bb35bfc4d7f8 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAView digi_view) const { + for (int32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + digi_view[j].clus() = j; + digi_view[j].rawIdArr() = j * 2; + digi_view[j].xx() = j * 3; + digi_view[j].moduleId() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAConstView digi_view) const { + for (uint32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + assert(digi_view[j].clus() == int(j)); + assert(digi_view[j].rawIdArr() == j * 2); + assert(digi_view[j].xx() == j * 3); + assert(digi_view[j].moduleId() == j * 4); + } + } + }; + + void runKernels(SiPixelDigisSoAView digi_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digi_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digi_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digi_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelRawData/src/classes.h b/DataFormats/SiPixelRawData/src/classes.h index 7a07e9f35f388..9adc3a440e27b 100644 --- a/DataFormats/SiPixelRawData/src/classes.h +++ b/DataFormats/SiPixelRawData/src/classes.h @@ -1,10 +1,13 @@ #ifndef SIPIXELRAWDATA_CLASSES_H #define SIPIXELRAWDATA_CLASSES_H -#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" -#include "DataFormats/Common/interface/Wrapper.h" -#include "DataFormats/Common/interface/DetSetVector.h" #include +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" + #endif // SIPIXELRAWDATA_CLASSES_H diff --git a/DataFormats/SiPixelRawData/src/classes_def.xml b/DataFormats/SiPixelRawData/src/classes_def.xml index fd2b5dcf27965..3535bbc430a53 100644 --- a/DataFormats/SiPixelRawData/src/classes_def.xml +++ b/DataFormats/SiPixelRawData/src/classes_def.xml @@ -5,16 +5,23 @@ - + + - + + + + + - - - - - + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/BuildFile.xml b/DataFormats/TrackingRecHitSoA/BuildFile.xml new file mode 100644 index 0000000000000..a7c80171ef4df --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/BuildFile.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h new file mode 100644 index 0000000000000..06205906d8d2f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h @@ -0,0 +1,20 @@ +#ifndef DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H +#define DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H + +#include + +// more information on bit fields : https://en.cppreference.com/w/cpp/language/bit_field +struct SiPixelHitStatus { + bool isBigX : 1; // ∈[0,1] + bool isOneX : 1; // ∈[0,1] + bool isBigY : 1; // ∈[0,1] + bool isOneY : 1; // ∈[0,1] + uint8_t qBin : 3; // ∈[0,1,...,7] +}; + +struct SiPixelHitStatusAndCharge { + SiPixelHitStatus status; + uint32_t charge : 24; +}; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h new file mode 100644 index 0000000000000..c0fc252729df7 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h @@ -0,0 +1,44 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitSoADevice_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitSoADevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitDevice : public PortableDeviceCollection, TDev> { +public: + using hitSoA = TrackingRecHitSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableDeviceCollection, TDev>::view; + using PortableDeviceCollection, TDev>::const_view; + using PortableDeviceCollection, TDev>::buffer; + + TrackingRecHitDevice() = default; + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitDevice(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableDeviceCollection, TDev>(nHits, queue) { + const auto device = alpaka::getDev(queue); + + auto start_h = cms::alpakatools::make_host_view(hitsModuleStart, TrackerTraits::numberOfModules + 1); + auto start_d = + cms::alpakatools::make_device_view(device, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1); + alpaka::memcpy(queue, start_d, start_h); + + auto off_h = cms::alpakatools::make_host_view(offsetBPIX2); + auto off_d = cms::alpakatools::make_device_view(device, view().offsetBPIX2()); + alpaka::memcpy(queue, off_d, off_h); + alpaka::wait(queue); + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; +#endif // DataFormats_RecHits_interface_TrackingRecHitSoADevice_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h new file mode 100644 index 0000000000000..ce3f57232ac93 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h @@ -0,0 +1,43 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitHost : public PortableHostCollection> { +public: + using hitSoA = TrackingRecHitSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableHostCollection>::view; + using PortableHostCollection>::const_view; + using PortableHostCollection>::buffer; + + TrackingRecHitHost() = default; + + template + explicit TrackingRecHitHost(uint32_t nHits, TQueue queue) + : PortableHostCollection>(nHits, queue) {} + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitHost(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableHostCollection>(nHits, queue) { + std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().data()); + view().offsetBPIX2() = offsetBPIX2; + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; + +using TrackingRecHitHostPhase1 = TrackingRecHitHost; +using TrackingRecHitHostPhase2 = TrackingRecHitHost; +using TrackingRecHitHostHIonPhase1 = TrackingRecHitHost; + +#endif // DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h new file mode 100644 index 0000000000000..7e45a75043951 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h @@ -0,0 +1,55 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsSoA_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsSoA_h + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +struct TrackingRecHitSoA { + using hindex_type = typename TrackerTraits::hindex_type; + using PhiBinner = cms::alpakatools::HistoContainer; //28 for phase2 geometry + using PhiBinnerView = typename PhiBinner::View; + using PhiBinnerStorageType = typename PhiBinner::index_type; + using AverageGeometry = pixelTopology::AverageGeometryT; + using HitLayerStartArray = std::array; + using HitModuleStartArray = std::array; + + GENERATE_SOA_LAYOUT(Layout, + SOA_COLUMN(float, xLocal), + SOA_COLUMN(float, yLocal), + SOA_COLUMN(float, xerrLocal), + SOA_COLUMN(float, yerrLocal), + SOA_COLUMN(float, xGlobal), + SOA_COLUMN(float, yGlobal), + SOA_COLUMN(float, zGlobal), + SOA_COLUMN(float, rGlobal), + SOA_COLUMN(int16_t, iphi), + SOA_COLUMN(SiPixelHitStatusAndCharge, chargeAndStatus), + SOA_COLUMN(int16_t, clusterSizeX), + SOA_COLUMN(int16_t, clusterSizeY), + SOA_COLUMN(uint16_t, detectorIndex), + SOA_SCALAR(int32_t, offsetBPIX2), + SOA_COLUMN(PhiBinnerStorageType, phiBinnerStorage), + SOA_SCALAR(HitModuleStartArray, hitsModuleStart), + SOA_SCALAR(HitLayerStartArray, hitsLayerStart), + SOA_SCALAR(AverageGeometry, averageGeometry), + SOA_SCALAR(PhiBinner, phiBinner)); +}; + +template +using TrackingRecHitLayout = typename TrackingRecHitSoA::template Layout<>; +template +using TrackingRecHitSoAView = typename TrackingRecHitSoA::template Layout<>::View; +template +using TrackingRecHitSoAConstView = typename TrackingRecHitSoA::template Layout<>::ConstView; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h new file mode 100644 index 0000000000000..0e0e848afcfd9 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h @@ -0,0 +1,46 @@ +#ifndef DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection +#define DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + using TrackingRecHitsSoACollection = std::conditional_t, + TrackingRecHitHost, + TrackingRecHitDevice>; + + //Classes definition for Phase1/Phase2, to make the classes_def lighter. Not actually used in the code. + using TrackingRecHitSoAPhase1 = TrackingRecHitsSoACollection; + using TrackingRecHitSoAPhase2 = TrackingRecHitsSoACollection; + using TrackingRecHitSoAHIonPhase1 = TrackingRecHitsSoACollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, TrackingRecHitDevice const& deviceData) { + TrackingRecHitHost hostData(deviceData.view().metadata().size(), queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); +#ifdef GPU_DEBUG + printf("TrackingRecHitsSoACollection: I'm copying to host.\n"); +#endif + return hostData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase1, TrackingRecHitHostPhase1); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase2, TrackingRecHitHostPhase2); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAHIonPhase1, TrackingRecHitHostHIonPhase1); + +#endif // DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..402be81b7081e --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h +#define DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..80c267b57d585 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..6af162021dd47 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h +#define DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..bc4c969137121 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/classes.cc b/DataFormats/TrackingRecHitSoA/src/classes.cc new file mode 100644 index 0000000000000..58167c21cef4f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes.cc @@ -0,0 +1,7 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/classes.h b/DataFormats/TrackingRecHitSoA/src/classes.h new file mode 100644 index 0000000000000..d405a88ed6ace --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_classes_h +#define DataFormats_TrackingRecHitSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_classes_h diff --git a/DataFormats/TrackingRecHitSoA/src/classes_def.xml b/DataFormats/TrackingRecHitSoA/src/classes_def.xml new file mode 100644 index 0000000000000..f3107e8587327 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes_def.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/BuildFile.xml b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..5b61a3460fb7d --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc new file mode 100644 index 0000000000000..378bb95db7b30 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc @@ -0,0 +1,47 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +#include +#include + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testTrackingRecHitSoA { + + template + void runKernels(TrackingRecHitSoAView& hits, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // inner scope to deallocate memory before destroying the queue + { + uint32_t nHits = 2000; + int32_t offset = 100; + uint32_t moduleStart[pixelTopology::Phase1::numberOfModules + 1]; + + for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; i++) { + moduleStart[i] = i * 2; + } + TrackingRecHitsSoACollection tkhit(nHits, offset, &moduleStart[0], queue); + + testTrackingRecHitSoA::runKernels(tkhit.view(), queue); + alpaka::wait(queue); + } + return 0; +} diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc new file mode 100644 index 0000000000000..79d8bd69cbc3a --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -0,0 +1,65 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testTrackingRecHitSoA { + + template + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitSoAView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + soa.offsetBPIX2() = 22; + soa[10].xLocal() = 1.11; + } + + soa[i].iphi() = i % 10; + soa.hitsLayerStart()[j] = j; + } + }; + + template + class ShowKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitSoAConstView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + printf("nbins = %d \n", soa.phiBinner().nbins()); + printf("offsetBPIX %d ->%d \n", i, soa.offsetBPIX2()); + printf("nHits %d ->%d \n", i, soa.metadata().size()); + //printf("hitsModuleStart %d ->%d \n", i, soa.hitsModuleStart().at(28)); + } + + if (i < 10) // can be increased to soa.nHits() for debugging + printf("iPhi %d ->%d \n", i, soa[i].iphi()); + } + }; + + template + void runKernels(TrackingRecHitSoAView& view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, view); + alpaka::exec(queue, workDiv, ShowKernel{}, view); + } + + template void runKernels(TrackingRecHitSoAView& view, Queue& queue); + template void runKernels(TrackingRecHitSoAView& view, Queue& queue); + + } // namespace testTrackingRecHitSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 212738e941533..87123219d44e4 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -2,6 +2,7 @@ + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc new file mode 100644 index 0000000000000..ab762b8f4d97c --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc @@ -0,0 +1,130 @@ +#include + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" + +class SiPixelDigiErrorsFromSoAAlpaka : public edm::stream::EDProducer<> { +public: + explicit SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::ESGetToken cablingToken_; + const edm::EDGetTokenT digiErrorsSoAGetToken_; + const edm::EDGetTokenT fmtErrorsGetToken_; + const edm::EDPutTokenT> errorPutToken_; + const edm::EDPutTokenT tkErrorPutToken_; + const edm::EDPutTokenT userErrorPutToken_; + const edm::EDPutTokenT> disabledChannelPutToken_; + + edm::ESWatcher cablingWatcher_; + std::unique_ptr cabling_; + + const std::vector tkerrorlist_; + const std::vector usererrorlist_; + + const bool usePhase1_; +}; + +SiPixelDigiErrorsFromSoAAlpaka::SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig) + : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + digiErrorsSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + fmtErrorsGetToken_{consumes(iConfig.getParameter("fmtErrorsSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter("UsePhase1")) {} + +void SiPixelDigiErrorsFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigis")); + desc.add("fmtErrorsSoASrc", edm::InputTag("siPixelDigis")); + // the configuration parameters here are named following those in SiPixelRawToDigi + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UsePhase1", false)->setComment("## Use phase1"); + desc.add>("ErrorList", std::vector{29}) + ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add>("UserErrorList", std::vector{40}) + ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsFromSoAAlpaka::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // pack errors into collection + + // initialize cabling map or update if necessary + if (cablingWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:") << cabling_->version(); + } + + const auto& digiErrors = iEvent.get(digiErrorsSoAGetToken_); + const auto& formatterErrors = iEvent.get(fmtErrorsGetToken_); + + edm::DetSetVector errorcollection{}; + DetIdCollection tkerror_detidcollection{}; + DetIdCollection usererror_detidcollection{}; + edmNew::DetSetVector disabled_channelcollection{}; + + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + auto errors = formatterErrors; // make a copy + PixelDataFormatter::DetErrors nodeterrors; + + // if (digiErrors.view().size() > 0) { // TODO: need to know if this size will be useful or not and how to use it + uint32_t size = digiErrors.view().metadata().size(); + for (auto i = 0U; i < size; i++) { + SiPixelErrorCompact err = digiErrors.view()[i].pixelErrors(); + if (err.errorType != 0) { + SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); + errors[err.rawId].push_back(error); + } + } + // } + + formatter.unpackFEDErrors(errors, + tkerrorlist_, + usererrorlist_, + errorcollection, + tkerror_detidcollection, + usererror_detidcollection, + disabled_channelcollection, + nodeterrors); + + const uint32_t dummydetid = 0xffffffff; + edm::DetSet& errorDetSet = errorcollection.find_or_insert(dummydetid); + errorDetSet.data = nodeterrors; + + iEvent.emplace(errorPutToken_, std::move(errorcollection)); + iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); + iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); + iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); +} + +DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoAAlpaka); diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py index b5484afd2fafa..f5139f1cb418b 100644 --- a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -23,6 +23,12 @@ from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoA_cfi import siPixelDigiErrorsFromSoA as _siPixelDigiErrorsFromSoA siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() +# Alpaka modifier +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoAAlpaka_cfi import siPixelDigiErrorsFromSoAAlpaka as _siPixelDigiErrorsFromSoAAlpaka + +alpaka.toReplaceWith(siPixelDigiErrors, _siPixelDigiErrorsFromSoAAlpaka.clone()) + # use the Phase 1 settings from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigiErrors, diff --git a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h index e02f4e92f813e..ad950999517f4 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h +++ b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h @@ -806,6 +806,210 @@ namespace cms::alpakatools { return alpaka::getIdx(acc) == Vec>::zeros(); } + /********************************************* + * RANGE COMPUTATION + ********************************************/ + + /* + * Computes the range of the elements indexes, local to the block. + * Warning: the max index is not truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_block(const TAcc& acc, + const Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Take into account the thread index in block. + const Idx threadIdxLocal(alpaka::getIdx(acc)[dimIndex]); + const Idx threadDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Compute the elements indexes in block. + // Obviously relevant for CPU only. + // For GPU, threadDimension == 1, and elementIdx == firstElementIdx == threadIdx + elementIdxShift. + const Idx firstElementIdxLocal = threadIdxLocal * threadDimension; + const Idx firstElementIdx = firstElementIdxLocal + elementIdxShift; // Add the shift! + const Idx endElementIdxUncut = firstElementIdx + threadDimension; + + // Return element indexes, shifted by elementIdxShift. + return {firstElementIdx, endElementIdxUncut}; + } + + /* + * Computes the range of the elements indexes, local to the block. + * Truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_block_truncated(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Check dimension + //static_assert(alpaka::Dim::value == Dim1::value, + // "Accelerator and maxNumberOfElements need to have same dimension."); + auto [firstElementIdxLocal, endElementIdxLocal] = element_index_range_in_block(acc, elementIdxShift, dimIndex); + + // Truncate + endElementIdxLocal = std::min(endElementIdxLocal, maxNumberOfElements); + + // Return element indexes, shifted by elementIdxShift, and truncated by maxNumberOfElements. + return {firstElementIdxLocal, endElementIdxLocal}; + } + + /* + * Computes the range of the elements indexes in grid. + * Warning: the max index is not truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_grid(const TAcc& acc, + Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Take into account the block index in grid. + const Idx blockIdxInGrid(alpaka::getIdx(acc)[dimIndex]); + const Idx blockDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Shift to get global indices in grid (instead of local to the block) + elementIdxShift += blockIdxInGrid * blockDimension; + + // Return element indexes, shifted by elementIdxShift. + return element_index_range_in_block(acc, elementIdxShift, dimIndex); + } + + /* + * Loop on all (CPU) elements. + * Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift. + * Indexes are local to the BLOCK. + */ + template + ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + const auto& [firstElementIdx, endElementIdx] = + element_index_range_in_block_truncated(acc, maxNumberOfElements, elementIdxShift, dimIndex); + + for (Idx elementIdx = firstElementIdx; elementIdx < endElementIdx; ++elementIdx) { + func(elementIdx); + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_block(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + + /************************************************************** + * LOOP ON ALL ELEMENTS WITH ONE LOOP + **************************************************************/ + + /* + * Case where the input index i has reached the end of threadDimension: strides the input index. + * Otherwise: do nothing. + * NB 1: This helper function is used as a trick to only have one loop (like in legacy), instead of 2 loops + * (like in all the other Alpaka helpers, 'for_each_element_in_block_strided' for example, + * because of the additional loop over elements in Alpaka model). + * This allows to keep the 'continue' and 'break' statements as-is from legacy code, + * and hence avoids a lot of legacy code reshuffling. + * NB 2: Modifies i, firstElementIdx and endElementIdx. + */ + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided( + Idx& i, Idx& firstElementIdx, Idx& endElementIdx, const Idx stride, const Idx maxNumberOfElements) { + bool isNextStrideElementValid = true; + if (i == endElementIdx) { + firstElementIdx += stride; + endElementIdx += stride; + i = firstElementIdx; + if (i >= maxNumberOfElements) { + isNextStrideElementValid = false; + } + } + return isNextStrideElementValid; + } + + template + ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + // Get thread / element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + element_index_range_in_block(acc, elementIdxShift, dimIndex); + + // Stride = block size. + const Idx blockDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Strided access. + for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride; + threadIdx < maxNumberOfElements; + threadIdx += blockDimension, endElementIdx += blockDimension) { + // (CPU) Loop on all elements. + if (endElementIdx > maxNumberOfElements) { + endElementIdx = maxNumberOfElements; + } + for (Idx i = threadIdx; i < endElementIdx; ++i) { + func(i); + } + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_block_strided(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + + template + ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + // Get thread / element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + element_index_range_in_grid(acc, elementIdxShift, dimIndex); + + // Stride = grid size. + const Idx gridDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Strided access. + for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride; + threadIdx < maxNumberOfElements; + threadIdx += gridDimension, endElementIdx += gridDimension) { + // (CPU) Loop on all elements. + if (endElementIdx > maxNumberOfElements) { + endElementIdx = maxNumberOfElements; + } + for (Idx i = threadIdx; i < endElementIdx; ++i) { + func(i); + } + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_grid_strided(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + } // namespace cms::alpakatools #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h diff --git a/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h new file mode 100644 index 0000000000000..971e74f4cd683 --- /dev/null +++ b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h @@ -0,0 +1,27 @@ +#ifndef RecoLocalTracker_Records_PixelCPEFastParamsRecord_h +#define RecoLocalTracker_Records_PixelCPEFastParamsRecord_h + +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/IdealGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "CondFormats/DataRecord/interface/SiPixelLorentzAngleRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "CalibTracker/Records/interface/SiPixelTemplateDBObjectESProducerRcd.h" +#include "CalibTracker/Records/interface/SiPixel2DTemplateDBObjectESProducerRcd.h" + +#include "FWCore/Utilities/interface/mplVector.h" + +class PixelCPEFastParamsRecord + : public edm::eventsetup::DependentRecordImplementation > {}; + +#endif // RecoLocalTracker_Records_PixelCPEFastParamsRecord_h diff --git a/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc new file mode 100644 index 0000000000000..1410d7c1e66bf --- /dev/null +++ b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(PixelCPEFastParamsRecord); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h similarity index 82% rename from RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h rename to RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h index f9ebb16ea2c7c..c224483bda40a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h @@ -1,5 +1,9 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h +#ifndef RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h +#define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h + +/* This struct is an implementation detail of this package. + * It's in the interface directory because it needs to be shared by the legacy, CUDA, and Alpaka plugins. + */ struct SiPixelClusterThresholds { inline constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept { @@ -51,4 +55,4 @@ struct SiPixelClusterThresholds { phase2KinkADC(phase2KinkADC) {} }; -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h +#endif // RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1bc0c60a0d298..83bdae62636e0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,16 +1,20 @@ - - + + + - + + + + @@ -18,3 +22,14 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 0bf734b6cd589..925ac2febcef0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -14,13 +14,14 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // local include(s) #include "PixelClusterizerBase.h" -#include "SiPixelClusterThresholds.h" +// #define GPU_DEBUG template class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { public: @@ -122,7 +123,7 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, for (int32_t ic = 0; ic < nclus + 1; ++ic) { auto const& acluster = aclusters[ic]; // in any case we cannot go out of sync with gpu... - if (!std::is_base_of::value and acluster.charge < clusterThreshold) + if (acluster.charge < clusterThreshold) edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' << acluster.charge; @@ -148,6 +149,10 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, spc.abort(); }; +#ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = " << nDigis << std::endl; +#endif + for (uint32_t i = 0; i < nDigis; i++) { // check for uninitialized digis if (digis.rawIdArr(i) == 0) @@ -161,6 +166,9 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, assert(digis.rawIdArr(i) > 109999); #endif if (detId != digis.rawIdArr(i)) { +#ifdef GPU_DEBUG + std::cout << ">> Closed module --" << detId << "; nclus = " << nclus << std::endl; +#endif // new module fillClusters(detId); #ifdef EDM_ML_DEBUG @@ -178,6 +186,12 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, } } PixelDigi dig(digis.pdigi(i)); + +#ifdef GPU_DEBUG + std::cout << i << ";" << digis.rawIdArr(i) << ";" << digis.clus(i) << ";" << digis.pdigi(i) << ";" << digis.adc(i) + << ";" << dig.row() << ";" << dig.column() << std::endl; +#endif + if (storeDigis_) (*detDigis).data.emplace_back(dig); // fill clusters diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc new file mode 100644 index 0000000000000..ad05ad3ff60c9 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc @@ -0,0 +1,240 @@ +#include + +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// local include(s) +#include "PixelClusterizerBase.h" + +// #define EDM_ML_DEBUG +// #define GPU_DEBUG +template +class SiPixelDigisClustersFromSoAAlpaka : public edm::global::EDProducer<> { +public: + explicit SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigisClustersFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + edm::ESGetToken const topoToken_; + edm::EDGetTokenT const digisHostToken_; + const SiPixelClusterThresholds clusterThresholds_; // Cluster threshold in electrons + const bool produceDigis_; + const bool storeDigis_; + + edm::EDPutTokenT> digisPutToken_; + edm::EDPutTokenT clustersPutToken_; +}; + +template +SiPixelDigisClustersFromSoAAlpaka::SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig) + : topoToken_(esConsumes()), + digisHostToken_(consumes(iConfig.getParameter("src"))), + clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers")), + produceDigis_(iConfig.getParameter("produceDigis")), + storeDigis_(produceDigis_ && iConfig.getParameter("storeDigis")), + clustersPutToken_(produces()) { + if (produceDigis_) + digisPutToken_ = produces>(); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelDigisSoA")); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("produceDigis", true); + desc.add("storeDigis", true); + + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::produce(edm::StreamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + const auto& digisHost = iEvent.get(digisHostToken_); + const auto& digisView = digisHost.const_view(); + const uint32_t nDigis = digisHost.nDigis(); + + const auto& ttopo = iSetup.getData(topoToken_); + constexpr auto maxModules = TrackerTraits::numberOfModules; + + std::unique_ptr> outputDigis; + if (produceDigis_) + outputDigis = std::make_unique>(); + if (storeDigis_) + outputDigis->reserve(maxModules); + auto outputClusters = std::make_unique(); + outputClusters->reserve(maxModules, nDigis / 2); + + edm::DetSet* detDigis = nullptr; + uint32_t detId = 0; + + for (uint32_t i = 0; i < nDigis; i++) { + // check for uninitialized digis + // this is set in RawToDigi_kernel in SiPixelRawToClusterGPUKernel.cu + if (digisView[i].rawIdArr() == 0) + continue; + + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + + detId = digisView[i].rawIdArr(); + if (storeDigis_) { + detDigis = &outputDigis->find_or_insert(detId); + + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + } + + break; + } + + int32_t nclus = -1; + PixelClusterizerBase::AccretionCluster aclusters[TrackerTraits::maxNumClustersPerModules]; +#ifdef EDM_ML_DEBUG + auto totClustersFilled = 0; +#endif + + auto fillClusters = [&](uint32_t detId) { + if (nclus < 0) + return; // this in reality should never happen + edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); + auto layer = (DetId(detId).subdetId() == 1) ? ttopo.pxbLayer(detId) : 0; + auto clusterThreshold = clusterThresholds_.getThresholdForLayerOnCondition(layer == 1); + for (int32_t ic = 0; ic < nclus + 1; ++ic) { + auto const& acluster = aclusters[ic]; + // in any case we cannot go out of sync with gpu... + if (acluster.charge < clusterThreshold) + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") + << "cluster below charge Threshold " + << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' + << acluster.charge << "\n"; + // sort by row (x) + spc.emplace_back(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); + aclusters[ic].clear(); +#ifdef EDM_ML_DEBUG + ++totClustersFilled; + const auto& cluster{spc.back()}; + // LogDebug("SiPixelDigisClustersFromSoAAlpaka") + std::cout << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size() + << "\n"; +#endif + std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + } + nclus = -1; + // sort by row (x) + std::sort_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + if (spc.empty()) + spc.abort(); + }; + +#ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = " << nDigis << std::endl; +#endif + for (uint32_t i = 0; i < nDigis; i++) { +#ifdef GPU_DEBUG + PixelDigi dig2{digisView[i].pdigi()}; + std::cout << i << ";" << digisView[i].rawIdArr() << ";" << digisView[i].clus() << ";" << digisView[i].pdigi() << ";" + << digisView[i].adc() << ";" << dig2.row() << ";" << dig2.column() << std::endl; +#endif + + // check for uninitialized digis + if (digisView[i].rawIdArr() == 0) + continue; + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + if (digisView[i].clus() >= -pixelClustering::invalidClusterId) + continue; // not in cluster; TODO add an assert for the size + if (digisView[i].clus() == pixelClustering::invalidModuleId) + continue; // from clusters killed by charge cut +#ifdef EDM_ML_DEBUG + assert(digisView[i].rawIdArr() > 109999); +#endif + if (detId != digisView[i].rawIdArr()) { +#ifdef GPU_DEBUG + std::cout << ">> Closed module --" << detId << "; nclus = " << nclus << std::endl; +#endif + // new module + fillClusters(detId); +#ifdef EDM_ML_DEBUG + assert(nclus == -1); +#endif + detId = digisView[i].rawIdArr(); + if (storeDigis_) { + detDigis = &outputDigis->find_or_insert(detId); + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + else { + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") + << "Problem det present twice in input! " << (*detDigis).detId(); + } + } + } + PixelDigi dig{digisView[i].pdigi()}; + + if (storeDigis_) + (*detDigis).data.emplace_back(dig); + // fill clusters +#ifdef EDM_ML_DEBUG + assert(digisView[i].clus() >= 0); + assert(digisView[i].clus() < static_cast(TrackerTraits::maxNumClustersPerModules)); +#endif + nclus = std::max(digisView[i].clus(), nclus); + auto row = dig.row(); + auto col = dig.column(); + SiPixelCluster::PixelPos pix(row, col); + aclusters[digisView[i].clus()].add(pix, digisView[i].adc()); + } + + // fill final clusters + if (detId > 0) + fillClusters(detId); + +#ifdef EDM_ML_DEBUG + LogDebug("SiPixelDigisClustersFromSoAAlpaka") << "filled " << totClustersFilled << " clusters"; +#endif + + if (produceDigis_) + iEvent.put(digisPutToken_, std::move(outputDigis)); + + iEvent.put(clustersPutToken_, std::move(outputClusters)); +} + +#include "FWCore/Framework/interface/MakerMacros.h" + +using SiPixelDigisClustersFromSoAAlpakaPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase1); + +using SiPixelDigisClustersFromSoAAlpakaPhase2 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase2); + +using SiPixelDigisClustersFromSoAAlpakaHIonPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc index e68c8074d8535..e270d31515842 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc @@ -9,37 +9,20 @@ #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" -#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" #include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/FEDRawData/interface/FEDRawData.h" -#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" #include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/ESWatcher.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "RecoTracker/Record/interface/CkfComponentsRecord.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // local includes -#include "SiPixelClusterThresholds.h" #include "SiPixelRawToClusterGPUKernel.h" class SiPixelPhase2DigiToClusterCUDA : public edm::stream::EDProducer { @@ -176,4 +159,5 @@ void SiPixelPhase2DigiToClusterCUDA::produce(edm::Event& iEvent, const edm::Even } // define as framework plugin +#include "FWCore/Framework/interface/MakerMacros.h" DEFINE_FWK_MODULE(SiPixelPhase2DigiToClusterCUDA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index e426661eb3c33..0a763793d35fd 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -10,8 +10,8 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" @@ -36,10 +36,10 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" // local includes -#include "SiPixelClusterThresholds.h" #include "SiPixelRawToClusterGPUKernel.h" template diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 7b92dfc267e79..2d10b81af000e 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -26,11 +26,12 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" + // local includes #include "SiPixelRawToClusterGPUKernel.h" +#include "gpuCalibPixel.h" +#include "gpuClusterChargeCut.h" +#include "gpuClustering.h" // #define GPU_DEBUG diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 802ad2eb42c7e..06b30da68c8cd 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -2,25 +2,24 @@ #define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h #include + #include +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" #include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "FWCore/Utilities/interface/typedefs.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // #define GPU_DEBUG -// local include(s) -#include "SiPixelClusterThresholds.h" - struct SiPixelROCsStatusAndMapping; class SiPixelGainForHLTonGPU; @@ -131,6 +130,14 @@ namespace pixelgpudetails { digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); assert(nModules_Clusters_h[2] <= nModules_Clusters_h[1]); clusters_d.setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + +#ifdef GPU_DEBUG + std::cout << "SiPixelClusterizerCUDA results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif // need to explicitly deallocate while the associated CUDA // stream is still alive // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h new file mode 100644 index 0000000000000..ff885b5bad07f --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h @@ -0,0 +1,136 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h + +#include +#include +#include +#include + +#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG + +namespace calibPixel { + using namespace cms::alpakatools; + + constexpr uint16_t InvId = std::numeric_limits::max() - 1; + // must be > MaxNumModules + + struct CalibDigis { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView view, + SiPixelClustersSoAView clus_view, + const SiPixelGainCalibrationForHLTSoAConstView gains, + int numElements) const { + const float VCaltoElectronGain = clusterThresholds.vCaltoElectronGain; + const float VCaltoElectronGain_L1 = clusterThresholds.vCaltoElectronGain_L1; + const float VCaltoElectronOffset = clusterThresholds.vCaltoElectronOffset; + const float VCaltoElectronOffset_L1 = clusterThresholds.vCaltoElectronOffset_L1; + + // zero for next kernels... + if (cms::alpakatools::once_per_grid(acc)) { + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + } + + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase1PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (dvgi.moduleId() != InvId) { + bool isDeadColumn = false, isNoisyColumn = false; + int row = dvgi.xx(); + int col = dvgi.yy(); + auto ret = SiPixelGainUtilities::getPedAndGain(gains, dvgi.moduleId(), col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; + float gain = ret.second; + if (isDeadColumn | isNoisyColumn) { + dvgi.moduleId() = InvId; + dvgi.adc() = 0; + printf("bad pixel at %d in %d\n", i, dvgi.moduleId()); + } else { + float vcal = dvgi.adc() * gain - pedestal * gain; + + float conversionFactor = dvgi.moduleId() < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = dvgi.moduleId() < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; +#ifdef GPU_DEBUG + auto old_adc = dvgi.adc(); +#endif + dvgi.adc() = std::max(100, int(vcal * conversionFactor + offset)); +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf( + "module %d pixel %d -> old_adc = %d; vcal = %.2f; conversionFactor = %.2f; offset = %.2f; new_adc = " + "%d \n", + dvgi.moduleId(), + i, + old_adc, + vcal, + conversionFactor, + offset, + dvgi.adc()); + } +#endif + } + } + }); + } + }; + struct CalibDigisPhase2 { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView view, + SiPixelClustersSoAView clus_view, + int numElements) const { + const float ElectronPerADCGain = clusterThresholds.electronPerADCGain; + const int8_t Phase2ReadoutMode = clusterThresholds.phase2ReadoutMode; + const uint16_t Phase2DigiBaseline = clusterThresholds.phase2DigiBaseline; + const uint8_t Phase2KinkADC = clusterThresholds.phase2KinkADC; + + // zero for next kernels... + if (cms::alpakatools::once_per_grid(acc)) { + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + } + + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase2PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (pixelClustering::invalidModuleId != dvgi.moduleId()) { + const int mode = (Phase2ReadoutMode < -1 ? -1 : Phase2ReadoutMode); + int adc_int = dvgi.adc(); + if (mode < 0) + adc_int = int(adc_int * ElectronPerADCGain); + else { + if (adc_int < Phase2KinkADC) + adc_int = int((adc_int + 0.5) * ElectronPerADCGain); + else { + const int8_t dspp = (Phase2ReadoutMode < 10 ? Phase2ReadoutMode : 10); + const int8_t ds = int8_t(dspp <= 1 ? 1 : (dspp - 1) * (dspp - 1)); + adc_int -= Phase2KinkADC; + adc_int *= ds; + adc_int += Phase2KinkADC; + adc_int = ((adc_int + 0.5 * ds) * ElectronPerADCGain); + } + adc_int += int(Phase2DigiBaseline); + } + dvgi.adc() = std::min(adc_int, int(std::numeric_limits::max())); + } + }); + } + }; +} // namespace calibPixel + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h new file mode 100644 index 0000000000000..c149707e41d9a --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -0,0 +1,207 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h + +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG + +namespace pixelClustering { + + template + struct ClusterChargeCut { + template + ALPAKA_FN_ACC void operator()( + const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + SiPixelClusterThresholds + clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers) + const uint32_t numElements) const { + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules; + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + if (blockIdx >= endModule) + return; + + auto& charge = alpaka::declareSharedVar(acc); + auto& ok = alpaka::declareSharedVar(acc); + auto& newclusId = alpaka::declareSharedVar(acc); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + + ALPAKA_ASSERT_OFFLOAD(nMaxModules < maxNumModules); + ALPAKA_ASSERT_OFFLOAD(startBPIX2 < nMaxModules); + + uint32_t nclus = clus_view[thisModuleId].clusInModule(); + if (nclus == 0) + return; + + if (cms::alpakatools::once_per_block(acc) && nclus > maxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + module, + nclus, + maxNumClustersPerModules); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + + if (nclus > maxNumClustersPerModules) { + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + // remove excess FIXME find a way to cut charge first.... + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (digi_view[i].clus() >= maxNumClustersPerModules) { + digi_view[i].moduleId() = invalidModuleId; + digi_view[i].clus() = invalidModuleId; + } + } + nclus = maxNumClustersPerModules; + } + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module); +#endif + + ALPAKA_ASSERT_OFFLOAD(nclus <= maxNumClustersPerModules); + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { charge[i] = 0; }); + alpaka::syncBlockThreads(acc); + + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + alpaka::atomicAdd(acc, + &charge[digi_view[i].clus()], + static_cast(digi_view[i].adc()), + alpaka::hierarchy::Threads{}); + } + alpaka::syncBlockThreads(acc); + + auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2); + bool allGood = true; + + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { + newclusId[i] = ok[i] = (charge[i] > chargeCut) ? 1 : 0; + if (ok[i] == 0) + allGood = allGood && false; + + // #ifdef GPU_DEBUG + // printf("module %d -> chargeCut = %d; cluster %d; charge = %d; ok = %s\n",thisModuleId, chargeCut,i,charge[i],ok[i] > 0 ? " -> good" : "-> cut"); + // #endif + }); + alpaka::syncBlockThreads(acc); + + // if all clusters above threshold do nothing + // if (allGood) + // continue; + + // renumber + auto& ws = alpaka::declareSharedVar(acc); + constexpr uint32_t maxThreads = 1024; + auto minClust = std::min(nclus, maxThreads); + + cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws); + + if constexpr (maxNumClustersPerModules > maxThreads) //only if needed + { + for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) { + cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws); + + cms::alpakatools::for_each_element_in_block_strided(acc, nclus - offset, [&](uint32_t i) { + uint32_t prevBlockEnd = ((i + offset / maxThreads) * maxThreads) - 1; + newclusId[i] += newclusId[prevBlockEnd]; + }); + alpaka::syncBlockThreads(acc); + } + } + + ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]); + + if (nclus == newclusId[nclus - 1]) + return; + + clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; + alpaka::syncBlockThreads(acc); + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("module %d -> chargeCut = %d; nclus (pre cut) = %d; nclus (after cut) = %d\n", + thisModuleId, + chargeCut, + nclus, + clus_view[thisModuleId].clusInModule()); +#endif + // mark bad cluster again + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { + if (0 == ok[i]) + newclusId[i] = invalidModuleId + 1; + }); + + alpaka::syncBlockThreads(acc); + + // reassign id + firstElementIdx = firstElementIdxNoStride; + endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (0 == ok[digi_view[i].clus()]) + digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId; + else + digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // if (digi_view[i].clus() == invalidModuleId) + // digi_view[i].moduleId() = invalidModuleId; + } + + alpaka::syncBlockThreads(acc); + + //done + } + } + }; + +} // namespace pixelClustering + +#endif // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h new file mode 100644 index 0000000000000..616ccbd3eb8c7 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -0,0 +1,454 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h + +#include +#include +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace pixelClustering { + +#ifdef GPU_DEBUG + template >> + ALPAKA_STATIC_ACC_MEM_GLOBAL uint32_t gMaxHit = 0; +#endif + + namespace pixelStatus { + // Phase-1 pixel modules + constexpr uint32_t pixelSizeX = pixelTopology::Phase1::numRowsInModule; + constexpr uint32_t pixelSizeY = pixelTopology::Phase1::numColsInModule; + + // Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones + enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; + + constexpr uint32_t bits = 2; + constexpr uint32_t mask = (0x01 << bits) - 1; + constexpr uint32_t valuesPerWord = sizeof(uint32_t) * 8 / bits; + constexpr uint32_t size = pixelSizeX * pixelSizeY / valuesPerWord; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getIndex(uint16_t x, uint16_t y) { + return (pixelSizeX * y + x) / valuesPerWord; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getShift(uint16_t x, uint16_t y) { + return (x % valuesPerWord) * 2; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Status getStatus(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + return Status{(status[index] >> shift) & mask}; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr bool isDuplicate(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + return getStatus(status, x, y) == kDuplicate; + } + + /* FIXME + * In the more general case (e.g. a multithreaded CPU backend) there is a potential race condition + * between the read of status[index] at line NNN and the atomicCas at line NNN. + * We should investigate: + * - if `status` should be read through a `volatile` pointer (CUDA/ROCm) + * - if `status` should be read with an atomic load (CPU) + */ + template >> + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr void promote(TAcc const& acc, + uint32_t* __restrict__ status, + const uint16_t x, + const uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + uint32_t old_word = status[index]; + uint32_t expected = old_word; + do { + expected = old_word; + Status old_status{(old_word >> shift) & mask}; + if (kDuplicate == old_status) { + // nothing to do + return; + } + Status new_status = (kEmpty == old_status) ? kFound : kDuplicate; + uint32_t new_word = old_word | (static_cast(new_status) << shift); + old_word = alpaka::atomicCas(acc, &status[index], expected, new_word, alpaka::hierarchy::Blocks{}); + } while (expected != old_word); + } + + } // namespace pixelStatus + + template + struct CountModules { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf("Starting to count modules to set module starts:"); + } +#endif + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + digi_view[i].clus() = i; + if (::pixelClustering::invalidModuleId != digi_view[i].moduleId()) { + int j = i - 1; + while (j >= 0 and digi_view[j].moduleId() == ::pixelClustering::invalidModuleId) + --j; + if (j < 0 or digi_view[j].moduleId() != digi_view[i].moduleId()) { + // boundary... + auto loc = alpaka::atomicInc( + acc, clus_view.moduleStart(), std::decay_t(nMaxModules), alpaka::hierarchy::Blocks{}); +#ifdef GPU_DEBUG + printf("> New module (no. %d) found at digi %d \n", loc, i); +#endif + clus_view[loc + 1].moduleStart() = i; + } + } + }); + } + }; + + template + struct FindClus { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + constexpr bool isPhase2 = std::is_base_of::value; + constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : pixelStatus::size; + + // packed words array used to store the pixelStatus of each pixel + auto& status = alpaka::declareSharedVar(acc); + + // find the index of the first pixel not belonging to this module (or invalid) + auto& msize = alpaka::declareSharedVar(acc); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + if (blockIdx >= clus_view[0].moduleStart()) + return; + + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("start clusterizer for module %d in block %d\n", thisModuleId, module); +#endif + + msize = numElements; + alpaka::syncBlockThreads(acc); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + + // skip threads not associated to an existing pixel + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + auto id = digi_view[i].moduleId(); + if (id == ::pixelClustering::invalidModuleId) // skip invalid pixels + continue; + if (id != thisModuleId) { // find the first pixel in a different module + alpaka::atomicMin(acc, &msize, i, alpaka::hierarchy::Threads{}); + break; + } + } + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = TrackerTraits::maxPixInModule; + constexpr auto nbins = TrackerTraits::clusterBinning; + constexpr auto nbits = TrackerTraits::clusterBits; + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& ws = alpaka::declareSharedVar(acc); + cms::alpakatools::for_each_element_in_block_strided( + acc, Hist::totbins(), [&](uint32_t j) { hist.off[j] = 0; }); + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD((msize == numElements) or + ((msize < numElements) and (digi_view[msize].moduleId() != thisModuleId))); + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (cms::alpakatools::once_per_grid(acc)) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } + } + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD(msize - firstPixel <= maxPixInModule); + +#ifdef GPU_DEBUG + auto& totGood = alpaka::declareSharedVar(acc); + totGood = 0; + alpaka::syncBlockThreads(acc); +#endif + // remove duplicate pixels + if constexpr (not isPhase2) { //FIXME remove THIS + if (msize > 1) { + cms::alpakatools::for_each_element_in_block_strided( + acc, pixelStatus::size, [&](uint32_t i) { status[i] = 0; }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + pixelStatus::promote(acc, status, digi_view[i].xx(), digi_view[i].yy()); + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + if (pixelStatus::isDuplicate(status, digi_view[i].xx(), digi_view[i].yy())) { + digi_view[i].moduleId() = ::pixelClustering::invalidModuleId; + digi_view[i].rawIdArr() = 0; + } + }); + alpaka::syncBlockThreads(acc); + } + } + // fill histo + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.count(acc, digi_view[i].yy()); +#ifdef GPU_DEBUG + alpaka::atomicAdd(acc, &totGood, 1u, alpaka::hierarchy::Blocks{}); +#endif + } + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block(acc, 32u, [&](uint32_t i) { + ws[i] = 0; // used by prefix scan... + }); + alpaka::syncBlockThreads(acc); + hist.finalize(acc, ws); + alpaka::syncBlockThreads(acc); +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(hist.size() == totGood); + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("histo size %d\n", hist.size()); +#endif + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.fill(acc, digi_view[i].yy(), i - firstPixel); + } + }); + // Assume that we can cover the whole module with up to 16 blockDimension-wide iterations + // This maxiter value was tuned for GPU, with 256 or 512 threads per block. + // Hence, also works for CPU case, with 256 or 512 elements per thread. + // Real constrainst is maxiter = hist.size() / blockDimension, + // with blockDimension = threadPerBlock * elementsPerThread. + // Hence, maxiter can be tuned accordingly to the workdiv. + constexpr unsigned int maxiter = 16; + ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) <= maxiter); + + // NB: can be tuned. + constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v ? 1 : 256; + +#ifndef NDEBUG + [[maybe_unused]] const uint32_t runTimeThreadDimension( + alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(runTimeThreadDimension <= threadDimension); +#endif + + // nearest neighbour + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + uint16_t nn[maxiter][threadDimension][maxNeighbours]; + uint8_t nnn[maxiter][threadDimension]; // number of nn + for (uint32_t elementIdx = 0; elementIdx < threadDimension; ++elementIdx) { + for (uint32_t k = 0; k < maxiter; ++k) { + nnn[k][elementIdx] = 0; + } + } + + alpaka::syncBlockThreads(acc); // for hit filling! + +#ifdef GPU_DEBUG + // look for anomalous high occupancy + auto& n40 = alpaka::declareSharedVar(acc); + auto& n60 = alpaka::declareSharedVar(acc); + n40 = n60 = 0; + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, Hist::nbins(), [&](uint32_t j) { + if (hist.size(j) > 60) + alpaka::atomicAdd(acc, &n60, 1u, alpaka::hierarchy::Blocks{}); + if (hist.size(j) > 40) + alpaka::atomicAdd(acc, &n40, 1u, alpaka::hierarchy::Blocks{}); + }); + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); + } + alpaka::syncBlockThreads(acc); +#endif + // fill NN + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + const uint32_t jEquivalentClass = j % threadDimension; + k = j / blockDimension; + ALPAKA_ASSERT_OFFLOAD(k < maxiter); + auto p = hist.begin() + j; + auto i = *p + firstPixel; + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() == thisModuleId); // same module + int be = Hist::bin(digi_view[i].yy() + 1); + auto e = hist.end(be); + ++p; + ALPAKA_ASSERT_OFFLOAD(0 == nnn[k][jEquivalentClass]); + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); + if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { + auto l = nnn[k][jEquivalentClass]++; + ALPAKA_ASSERT_OFFLOAD(l < maxNeighbours); + nn[k][jEquivalentClass][l] = *p; + } + } + }); + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool more = true; + int nloops = 0; + while (alpaka::syncBlockThreadsPredicate(acc, more)) { + if (1 == nloops % 2) { + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + auto m = digi_view[i].clus(); + while (m != digi_view[m].clus()) + m = digi_view[m].clus(); + digi_view[i].clus() = m; + }); + } else { + more = false; + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + k = j / blockDimension; + const uint32_t jEquivalentClass = j % threadDimension; + auto p = hist.begin() + j; + auto i = *p + firstPixel; + for (int kk = 0; kk < nnn[k][jEquivalentClass]; ++kk) { + auto l = nn[k][jEquivalentClass][kk]; + auto m = l + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + auto old = + alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); + if (old != digi_view[i].clus()) { + // end the loop only if no changes were applied + more = true; + } + alpaka::atomicMin(acc, &digi_view[i].clus(), old, alpaka::hierarchy::Blocks{}); + } // nnloop + }); // pixel loop + } + ++nloops; + } // end while +#ifdef GPU_DEBUG + { + auto& n0 = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) + n0 = nloops; + alpaka::syncBlockThreads(acc); +#ifndef NDEBUG + [[maybe_unused]] auto ok = n0 == nloops; + ALPAKA_ASSERT_OFFLOAD(alpaka::syncBlockThreadsPredicate(acc, ok)); +#endif + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("# loops %d\n", nloops); + } +#endif + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() == static_cast(i)) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + digi_view[i].clus() = -(old + 1); + } + } + }); + alpaka::syncBlockThreads(acc); + + // propagate the negative id to all the pixels in the cluster. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() >= 0) { + // mark each pixel in a cluster with the same id as the first one + digi_view[i].clus() = digi_view[digi_view[i].clus()].clus(); + } + } + }); + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) { // skip invalid pixels + digi_view[i].clus() = ::pixelClustering::invalidClusterId; + } else { + digi_view[i].clus() = -digi_view[i].clus() - 1; + } + }); + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + clus_view[thisModuleId].clusInModule() = foundClusters; + clus_view[module].moduleId() = thisModuleId; +#ifdef GPU_DEBUG + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); + } + // if (thisModuleId % 100 == 1) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); +#endif + } + } // module loop + } + }; + } // namespace pixelClustering +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // plugin_SiPixelClusterizer_alpaka_PixelClustering.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc new file mode 100644 index 0000000000000..5d0b355d1eebc --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc @@ -0,0 +1,158 @@ +// C++ includes +#include +#include +#include +#include +#include + +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelPhase2DigiToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig); + ~SiPixelPhase2DigiToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT> pixelDigiToken_; + + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + Algo Algo_; + + const bool includeErrors_; + const SiPixelClusterThresholds clusterThresholds_; + uint32_t nDigis_ = 0; + + SiPixelDigisSoACollection digis_d; + }; + + SiPixelPhase2DigiToCluster::SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + pixelDigiToken_(consumes>(iConfig.getParameter("InputDigis"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + includeErrors_(iConfig.getParameter("IncludeErrors")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("ElectronPerADCGain")), + static_cast(iConfig.getParameter("Phase2ReadoutMode")), + static_cast(iConfig.getParameter("Phase2DigiBaseline")), + static_cast(iConfig.getParameter("Phase2KinkADC"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + } + } + + void SiPixelPhase2DigiToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("IncludeErrors", true); + desc.add("clusterThreshold_layer1", + pixelClustering::clusterThresholdPhase2LayerOne); //FIXME (fix the CUDA) + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdPhase2OtherLayers); + desc.add("ElectronPerADCGain", 1500.); + desc.add("Phase2ReadoutMode", 3); + desc.add("Phase2DigiBaseline", 1000); + desc.add("Phase2KinkADC", 8); + desc.add("InputDigis", edm::InputTag("simSiPixelDigis:Pixel")); + descriptions.addWithDefaultLabel(desc); + } + + void SiPixelPhase2DigiToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + auto const& input = iEvent.get(pixelDigiToken_); + + const TrackerGeometry* geom_ = &iSetup.getData(geomToken_); + + uint32_t nDigis = 0; + + for (const auto& det : input) { + nDigis += det.size(); + } + + if (nDigis_ == 0) + return; + + SiPixelDigisHost digis_h(nDigis, iEvent.queue()); + nDigis_ = nDigis; + + nDigis = 0; + for (const auto& det : input) { + unsigned int detid = det.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto const gind = genericDet->index(); + for (auto const& px : det) { + digis_h.view()[nDigis].moduleId() = uint16_t(gind); + + digis_h.view()[nDigis].xx() = uint16_t(px.row()); + digis_h.view()[nDigis].yy() = uint16_t(px.column()); + digis_h.view()[nDigis].adc() = uint16_t(px.adc()); + + digis_h.view()[nDigis].pdigi() = uint32_t(px.packedData()); + + digis_h.view()[nDigis].rawIdArr() = uint32_t(detid); + + nDigis++; + } + } + + digis_d = SiPixelDigisSoACollection(nDigis, iEvent.queue()); + alpaka::memcpy(iEvent.queue(), digis_d.buffer(), digis_h.buffer()); + + Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis); + } + + void SiPixelPhase2DigiToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + SiPixelClustersSoACollection clusters_d{pixelTopology::Phase1::numberOfModules, iEvent.queue()}; + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, std::move(clusters_d)); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, SiPixelDigiErrorsSoACollection()); + } + return; + } + + digis_d.setNModulesDigis(Algo_.nModules(), nDigis_); + + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + } + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelPhase2DigiToCluster); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc new file mode 100644 index 0000000000000..f3e13bade8e10 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc @@ -0,0 +1,289 @@ +#include +#include +#include +#include +#include + +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/FEDRawData/interface/FEDRawData.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + class SiPixelRawToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelRawToCluster(const edm::ParameterSet& iConfig); + ~SiPixelRawToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + edm::EDGetTokenT rawGetToken_; + edm::EDPutTokenT fmtErrorToken_; + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + edm::ESWatcher recordWatcher_; + const device::ESGetToken mapToken_; + const device::ESGetToken gainsToken_; + const edm::ESGetToken cablingMapToken_; + + std::unique_ptr cabling_; + std::vector fedIds_; + const SiPixelFedCablingMap* cablingMap_ = nullptr; + std::unique_ptr regions_; + + Algo Algo_; + PixelDataFormatter::Errors errors_; + + const bool includeErrors_; + const bool useQuality_; + uint32_t nDigis_; + const SiPixelClusterThresholds clusterThresholds_; + }; + + template + SiPixelRawToCluster::SiPixelRawToCluster(const edm::ParameterSet& iConfig) + : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + mapToken_(esConsumes()), + gainsToken_(esConsumes()), + cablingMapToken_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("VCaltoElectronGain")), + static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), + static_cast(iConfig.getParameter("VCaltoElectronOffset")), + static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + fmtErrorToken_ = produces(); + } + + // regions + if (!iConfig.getParameter("Regions").getParameterNames().empty()) { + regions_ = std::make_unique(iConfig, consumesCollector()); + } + } + + template + void SiPixelRawToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("IncludeErrors", true); + desc.add("UseQualityInfo", false); + // Note: this parameter is obsolete: it is ignored and will have no effect. + // It is kept to avoid breaking older configurations, and will not be printed in the generated cfi.py file. + desc.addOptionalNode(edm::ParameterDescription("MaxFEDWords", 0, true), false) + ->setComment("This parameter is obsolete and will be ignored."); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("VCaltoElectronGain", 47.f); + desc.add("VCaltoElectronGain_L1", 50.f); + desc.add("VCaltoElectronOffset", -60.f); + desc.add("VCaltoElectronOffset_L1", -670.f); + + desc.add("InputLabel", edm::InputTag("rawDataCollector")); + { + edm::ParameterSetDescription psd0; + psd0.addOptional>("inputs"); + psd0.addOptional>("deltaPhi"); + psd0.addOptional>("maxZ"); + psd0.addOptional("beamSpot"); + desc.add("Regions", psd0) + ->setComment("## Empty Regions PSet means complete unpacking"); + } + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRawToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + [[maybe_unused]] auto const& hMap = iSetup.getData(mapToken_); + auto const& dGains = iSetup.getData(gainsToken_); + auto gains = SiPixelGainCalibrationForHLTDevice(1, iEvent.queue()); + auto modulesToUnpackRegional = + cms::alpakatools::make_device_buffer(iEvent.queue(), ::pixelgpudetails::MAX_SIZE); + const unsigned char* modulesToUnpack; + // initialize cabling map or update if necessary + if (recordWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + cablingMap_ = &iSetup.getData(cablingMapToken_); + fedIds_ = cablingMap_->fedIds(); + cabling_ = cablingMap_->cablingTree(); + LogDebug("map version:") << cablingMap_->version(); + } + if (regions_) { + regions_->run(iEvent, iSetup); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() + << " " << regions_->nForwardModules() << " " << regions_->nModules(); + + modulesToUnpackRegional = SiPixelMappingUtilities::getModToUnpRegionalAsync( + *(regions_->modulesToUnpack()), cabling_.get(), fedIds_, iEvent.queue()); + modulesToUnpack = modulesToUnpackRegional.data(); + } else { + modulesToUnpack = hMap->modToUnpDefault(); + } + + const auto& buffers = iEvent.get(rawGetToken_); + + errors_.clear(); + + // GPU specific: Data extraction for RawToDigi GPU + unsigned int wordCounter = 0; + unsigned int fedCounter = 0; + bool errorsInEvent = false; + std::vector index(fedIds_.size(), 0); + std::vector start(fedIds_.size(), nullptr); + std::vector words(fedIds_.size(), 0); + // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() + ErrorChecker errorcheck; + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + const int fedId = fedIds_[i]; + if (regions_ && !regions_->mayUnpackFED(fedId)) + continue; + + // for GPU + // first 150 index stores the fedId and next 150 will store the + // start index of word in that fed + assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); + fedCounter++; + + // get event data for this fed + const FEDRawData& rawData = buffers.FEDData(fedId); + + // GPU specific + int nWords = rawData.size() / sizeof(cms_uint64_t); + if (nWords == 0) { + continue; + } + // check CRC bit + const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); + if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { + continue; + } + // check headers + const cms_uint64_t* header = reinterpret_cast(rawData.data()); + header--; + bool moreHeaders = true; + while (moreHeaders) { + header++; + bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); + moreHeaders = headerStatus; + } + + // check trailers + bool moreTrailers = true; + trailer++; + while (moreTrailers) { + trailer--; + bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); + moreTrailers = trailerStatus; + } + + const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); + const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); + + assert(0 == (ew - bw) % 2); + index[i] = wordCounter; + start[i] = bw; + words[i] = (ew - bw); + wordCounter += (ew - bw); + + } // end of for loop + nDigis_ = wordCounter; + if (nDigis_ == 0) + return; + + // copy the FED data to a single cpu buffer + pixelDetails::WordFedAppender wordFedAppender(nDigis_); + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]); + } + Algo_.makePhase1ClustersAsync(iEvent.queue(), + clusterThresholds_, + hMap.const_view(), + modulesToUnpack, + dGains.const_view(), + wordFedAppender, + wordCounter, + fedCounter, + useQuality_, + includeErrors_, + edm::MessageDrop::instance()->debugEnabled); + } + + template + void SiPixelRawToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + // Cannot use the default constructor here, as it would not allocate memory. + // In the case of no digis, clusters_d are not being instantiated, but are + // still used downstream to initialize TrackingRecHitSoADevice. If there + // are no valid pointers to clusters' Collection columns, instantiation + // of TrackingRecHits fail. Example: workflow 11604.0 + + iEvent.emplace(digiPutToken_, nDigis_, iEvent.queue()); + iEvent.emplace(clusterPutToken_, pixelTopology::Phase1::numberOfModules, iEvent.queue()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_); + iEvent.emplace(fmtErrorToken_); + } + return; + } + + iEvent.emplace(digiPutToken_, Algo_.getDigis()); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + iEvent.emplace(fmtErrorToken_, std::move(errors_)); + } + } + + using SiPixelRawToClusterPhase1 = SiPixelRawToCluster; + using SiPixelRawToClusterHIonPhase1 = SiPixelRawToCluster; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc new file mode 100644 index 0000000000000..3e7caf8b2b3a4 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -0,0 +1,799 @@ +// C++ includes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// CMSSW includes +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h" + +// local includes +#include "CalibPixel.h" +#include "ClusterChargeCut.h" +#include "PixelClustering.h" +#include "SiPixelRawToClusterKernel.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + //////////////////// + + ALPAKA_FN_ACC uint32_t getLink(uint32_t ww) { + return ((ww >> ::sipixelconstants::LINK_shift) & ::sipixelconstants::LINK_mask); + } + + ALPAKA_FN_ACC uint32_t getRoc(uint32_t ww) { + return ((ww >> ::sipixelconstants::ROC_shift) & ::sipixelconstants::ROC_mask); + } + + ALPAKA_FN_ACC uint32_t getADC(uint32_t ww) { + return ((ww >> ::sipixelconstants::ADC_shift) & ::sipixelconstants::ADC_mask); + } + + ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } + + ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, + uint8_t fed, + uint32_t link, + uint32_t roc) { + using namespace ::pixelDetails; + uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + ::pixelDetails::DetIdGPU detId = { + cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]}; + return detId; + } + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelDetails::global pixel + ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion( + bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) { + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } // if roc + } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc; + } else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } + } + + } else { // fpix + if (side == -1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + } else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + + } // side + } + + uint32_t gRow = rowOffset + slopeRow * local.row; + uint32_t gCol = colOffset + slopeCol * local.col; + ::pixelDetails::Pixel global = {gRow, gCol}; + return global; + } + + ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { + uint8_t errorType = 0; + + switch (status) { + case 1: { + if (debug) + printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); + errorType = 35; + break; + } + case 2: { + if (debug) + printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case 3: { + if (debug) + printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case 4: { + if (debug) + printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: + if (debug) + printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + } + + ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; + + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + } + + ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } + + ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, + uint8_t fedId, + uint32_t link, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask; + if (errorType < 25) + return 0; + bool errorFound = false; + + switch (errorType) { + case (25): { + errorFound = true; + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1; + if (index > 1 && index <= cablingMap.size()) { + if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index])) + errorFound = false; + } + if (debug and errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case (26): { + if (debug) + printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case (27): { + if (debug) + printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case (28): { + if (debug) + printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case (29): { + if (debug) + printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> ::pixelDetails::OMIT_ERR_shift) & ::pixelDetails::OMIT_ERR_mask) { + if (debug) + printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case (30): { + if (debug) + printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if (StateMatch != 1 && StateMatch != 8) { + if (debug) + printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if (StateMatch == 1) + errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case (31): { + if (debug) + printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: + errorFound = false; + }; + + return errorFound ? errorType : 0; + } + + ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, + uint32_t errWord, + uint32_t errorType, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25: + case 30: + case 31: + case 36: + case 40: { + uint32_t roc = 1; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 29: { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5; + if (BLOCK % 2 == 0) + chanNmbr = (BLOCK / 2) * 9 + localCH; + else + chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH; + if ((chanNmbr < 1) || (chanNmbr > 36)) + break; // signifies unexpected result + + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 37: + case 38: { + uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + default: + break; + }; + + return rID; + } + + // Kernel to perform Raw to Digi conversion + struct RawToDigi_kernel { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const uint32_t wordCounter, + const uint32_t *word, + const uint8_t *fedIds, + SiPixelDigisSoAView digisView, + SiPixelDigiErrorsSoAView err, + bool useQualityInfo, + bool includeErrors, + bool debug) const { + cms::alpakatools::for_each_element_in_grid_strided(acc, wordCounter, [&](uint32_t iloop) { + auto gIndex = iloop; + auto dvgi = digisView[gIndex]; + dvgi.xx() = 0; + dvgi.yy() = 0; + dvgi.adc() = 0; + bool skipROC = false; + + if (gIndex == 0) + err[gIndex].size() = 0; + + err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0}; + + uint8_t fedId = fedIds[gIndex / 2]; // +1200; + + // initialize (too many coninue below) + dvgi.pdigi() = 0; + dvgi.rawIdArr() = 0; + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + dvgi.moduleId() = invalidModuleId; + + uint32_t ww = word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization + return; + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + + uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) { + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rID, ww, errorType, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + return; + } + + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc; + if (useQualityInfo) { + skipROC = cablingMap.badRocs()[index]; + if (skipROC) + return; + } + skipROC = modToUnp[index]; + if (skipROC) + return; + + uint32_t layer = 0; //, ladder =0; + int side = 0, panel = 0, module = 0; //disk = 0, blade = 0 + + if (barrel) { + layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask; + module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask; + side = (module < 5) ? -1 : 1; + } else { + // endcap ids + layer = 0; + panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_; + side = (panel == 1) ? -1 : 1; + //blade = (rawId >> bladeStartBit_) & bladeMask_; + } + + // ***special case of layer to 1 be handled here + ::pixelDetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> ::pixelDetails::COL_shift) & ::pixelDetails::COL_mask; + uint32_t row = (ww >> ::pixelDetails::ROW_shift) & ::pixelDetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("BPIX1 Error status: %i\n", error); + return; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> ::pixelDetails::DCOL_shift) & ::pixelDetails::DCOL_mask; + uint32_t pxid = (ww >> ::pixelDetails::PXID_shift) & ::pixelDetails::PXID_mask; + uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2; + uint32_t col = dcol * 2 + pxid % 2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint8_t error = conversionError(fedId, 3, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + return; + } + } + + ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + dvgi.xx() = globalPix.row; // origin shifting by 1 0-159 + dvgi.yy() = globalPix.col; // origin shifting by 1 0-415 + dvgi.adc() = getADC(ww); + dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc()); + dvgi.moduleId() = detId.moduleId; + dvgi.rawIdArr() = rawId; + }); // end of stride on grid + + } // end of Raw to Digi kernel operator() + }; // end of Raw to Digi struct + + template + struct FillHitsModuleStart { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { + ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + + constexpr int nMaxModules = TrackerTraits::numberOfModules; + constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; + +#ifndef NDEBUG + [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal); + [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(1 == gridDimension); +#endif + + // limit to maxHitsInModule; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules, [&](uint32_t i) { + clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule()); + }); + + constexpr bool isPhase2 = std::is_base_of::value; + constexpr auto leftModules = isPhase2 ? 1024 : nMaxModules - 1024; + + auto &&ws = alpaka::declareSharedVar(acc); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws); + + if constexpr (isPhase2) { + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws); + cms::alpakatools::blockPrefixScan(acc, + clus_view.clusModuleStart() + 3072 + 1, + clus_view.clusModuleStart() + 3072 + 1, + nMaxModules - 3072, + ws); + } + + constexpr auto lastModule = isPhase2 ? 2049u : nMaxModules + 1; + cms::alpakatools::for_each_element_in_block_strided(acc, lastModule, 1025u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + if constexpr (isPhase2) { + cms::alpakatools::for_each_element_in_block_strided(acc, 3073u, 2049u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, 3073u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + } +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart()); + auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); + ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[nMaxModules].moduleStart() >= clus_view[1025].moduleStart()); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (0 != i) + ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - i].moduleStart()); + // Check BPX2 (1), FP1 (4) + constexpr auto bpix2 = TrackerTraits::layerStart[1]; + constexpr auto fpix1 = TrackerTraits::layerStart[4]; + if (i == bpix2 || i == fpix1) + printf("moduleStart %d %d\n", i, clus_view[i].moduleStart()); + }); +#endif + // avoid overflow + constexpr auto MAX_HITS = TrackerTraits::maxNumberOfHits; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (clus_view[i].clusModuleStart() > MAX_HITS) + clus_view[i].clusModuleStart() = MAX_HITS; + }); + + } // end of FillHitsModuleStart kernel operator() + }; // end of FillHitsModuleStart struct + + // Interface to outside + template + void SiPixelRawToClusterKernel::makePhase1ClustersAsync( + Queue &queue, + const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView &gains, + const WordFedAppender &wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug) { + nDigis = wordCounter; + +#ifdef GPU_DEBUG + std::cout << "decoding " << wordCounter << " digis." << std::endl; +#endif + constexpr int numberOfModules = TrackerTraits::numberOfModules; + digis_d = SiPixelDigisSoACollection(wordCounter, queue); + if (includeErrors) { + digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue); + } + clusters_d = SiPixelClustersSoACollection(numberOfModules, queue); + // protect in case of empty event.... + if (wordCounter) { + const int threadsPerBlockOrElementsPerThread = + cms::alpakatools::requires_single_thread_per_block_v ? 32 : 512; + // fill it all + const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + assert(0 == wordCounter % 2); + // wordCounter is the total no of words in each event to be trasfered on device + auto word_d = cms::alpakatools::make_device_buffer(queue, wordCounter); + // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated. + // However, only the first half of elements end up eventually used: + // hence, here, only wordCounter/2 elements are allocated. + auto fedId_d = cms::alpakatools::make_device_buffer(queue, wordCounter / 2); + alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter); + alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2); + // Launch rawToDigi kernel + alpaka::exec(queue, + workDiv, + RawToDigi_kernel{}, + cablingMap, + modToUnp, + wordCounter, + word_d.data(), + fedId_d.data(), + digis_d->view(), + digiErrors_d->view(), + useQualityInfo, + includeErrors, + debug); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "RawToDigi_kernel was run smoothly!" << std::endl; +#endif + } + // End of Raw2Digi and passing data for clustering + + { + // clusterizer + using namespace pixelClustering; + // calibrations + using namespace calibPixel; + const int threadsPerBlockOrElementsPerThread = []() { + if constexpr (std::is_same_v) { + // NB: MPORTANT: This could be tuned to benefit from innermost loop. + return 32; + } else { + return 256; + } + }(); + const auto blocks = cms::alpakatools::divide_up_by(std::max(wordCounter, numberOfModules), + threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec( + queue, workDiv, CalibDigis{}, clusterThresholds, digis_d->view(), clusters_d->view(), gains, wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread + << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec( + queue, workDiv, CountModules{}, digis_d->view(), clusters_d->view(), wordCounter); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + constexpr auto threadsPerBlockFindClus = 512; + const auto workDivMaxNumModules = + cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + // NB: With present FindClus() / chargeCut() algorithm, + // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance. + // Though, it does not have to be the same number for CPU/GPU cases. + +#ifdef GPU_DEBUG + std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec( + queue, workDivMaxNumModules, FindClus{}, digis_d->view(), clusters_d->view(), wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::ClusterChargeCut{}, + digis_d->view(), + clusters_d->view(), + clusterThresholds, + wordCounter); + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u); + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelClusterizerAlpaka results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif + + } // end clusterizer scope + } + + template + void SiPixelRawToClusterKernel::makePhase2ClustersAsync( + Queue &queue, + const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView &digis_view, + const uint32_t numDigis) { + using namespace pixelClustering; + using pixelTopology::Phase2; + nDigis = numDigis; + constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules; + clusters_d = SiPixelClustersSoACollection(numberOfModules, queue); + const auto threadsPerBlockOrElementsPerThread = 512; + const auto blocks = + cms::alpakatools::divide_up_by(std::max(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec( + queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread + << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec( + queue, workDiv, CountModules{}, digis_view, clusters_d->view(), numDigis); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + + /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) + + const auto threadsPerBlockFindClus = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; + const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec( + queue, workDivMaxNumModules, FindClus{}, digis_view, clusters_d->view(), numDigis); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::ClusterChargeCut{}, + digis_view, + clusters_d->view(), + clusterThresholds, + numDigis); + + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u); + constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1]; + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelPhase2DigiToCluster: results \n" + << " > no. of digis: " << numDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif + } // + + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + + } // namespace pixelDetails + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h new file mode 100644 index 0000000000000..b7b9071506652 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -0,0 +1,199 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h +#define RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h + +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" + +namespace pixelDetails { + + constexpr auto MAX_LINK = pixelgpudetails::MAX_LINK; + constexpr auto MAX_SIZE = pixelgpudetails::MAX_SIZE; + constexpr auto MAX_ROC = pixelgpudetails::MAX_ROC; + // Phase 1 geometry constants + constexpr uint32_t layerStartBit = 20; + constexpr uint32_t ladderStartBit = 12; + constexpr uint32_t moduleStartBit = 2; + + constexpr uint32_t panelStartBit = 10; + constexpr uint32_t diskStartBit = 18; + constexpr uint32_t bladeStartBit = 12; + + constexpr uint32_t layerMask = 0xF; + constexpr uint32_t ladderMask = 0xFF; + constexpr uint32_t moduleMask = 0x3FF; + constexpr uint32_t panelMask = 0x3; + constexpr uint32_t diskMask = 0xF; + constexpr uint32_t bladeMask = 0x3F; + + constexpr uint32_t LINK_bits = 6; + constexpr uint32_t ROC_bits = 5; + constexpr uint32_t DCOL_bits = 5; + constexpr uint32_t PXID_bits = 8; + constexpr uint32_t ADC_bits = 8; + + // special for layer 1 + constexpr uint32_t LINK_bits_l1 = 6; + constexpr uint32_t ROC_bits_l1 = 5; + constexpr uint32_t COL_bits_l1 = 6; + constexpr uint32_t ROW_bits_l1 = 7; + constexpr uint32_t OMIT_ERR_bits = 1; + + constexpr uint32_t maxROCIndex = 8; + constexpr uint32_t numRowsInRoc = 80; + constexpr uint32_t numColsInRoc = 52; + + constexpr uint32_t MAX_WORD = 2000; + + constexpr uint32_t ADC_shift = 0; + constexpr uint32_t PXID_shift = ADC_shift + ADC_bits; + constexpr uint32_t DCOL_shift = PXID_shift + PXID_bits; + constexpr uint32_t ROC_shift = DCOL_shift + DCOL_bits; + constexpr uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + constexpr uint32_t ROW_shift = ADC_shift + ADC_bits; + constexpr uint32_t COL_shift = ROW_shift + ROW_bits_l1; + constexpr uint32_t OMIT_ERR_shift = 20; + + constexpr uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + constexpr uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + constexpr uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + constexpr uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + constexpr uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + constexpr uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + constexpr uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + constexpr uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + constexpr uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t RawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr pixelchannelidentifierimpl::Packing packing() { + return PixelChannelIdentifier::thePacking; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr uint32_t pack(uint32_t row, + uint32_t col, + uint32_t adc, + uint32_t flag = 0) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + adc = std::min(adc, uint32_t(thePacking.max_adc)); + + return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); + } + + constexpr uint32_t pixelToChannel(int row, int col) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + +} // namespace pixelDetails + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + class WordFedAppender { + public: + WordFedAppender(); + ~WordFedAppender() = default; + + WordFedAppender(uint32_t words) + : word_{cms::alpakatools::make_host_buffer(words)}, + fedId_{cms::alpakatools::make_host_buffer(words)} {}; + + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const uint32_t* src, unsigned int length) { + std::memcpy(word_.data() + wordCounterGPU, src, sizeof(uint32_t) * length); + std::memset(fedId_.data() + wordCounterGPU / 2, fedId - 1200, length / 2); + } + auto word() const { return word_; } + auto fedId() const { return fedId_; } + + private: + cms::alpakatools::host_buffer word_; + cms::alpakatools::host_buffer fedId_; + }; + + template + class SiPixelRawToClusterKernel { + public: + SiPixelRawToClusterKernel() : nModules_Clusters_h{cms::alpakatools::make_host_buffer(3u)} {} + + ~SiPixelRawToClusterKernel() = default; + + SiPixelRawToClusterKernel(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel(SiPixelRawToClusterKernel&&) = delete; + SiPixelRawToClusterKernel& operator=(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel& operator=(SiPixelRawToClusterKernel&&) = delete; + + void makePhase1ClustersAsync(Queue& queue, + const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView& cablingMap, + const unsigned char* modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView& gains, + const WordFedAppender& wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug); + + void makePhase2ClustersAsync(Queue& queue, + const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView& digis_view, + const uint32_t numDigis); + + SiPixelDigisSoACollection getDigis() { + digis_d->setNModulesDigis(nModules_Clusters_h[0], nDigis); + return std::move(*digis_d); + } + + SiPixelClustersSoACollection getClusters() { + clusters_d->setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + return std::move(*clusters_d); + } + + SiPixelDigiErrorsSoACollection getErrors() { return std::move(*digiErrors_d); } + + auto nModules() { return nModules_Clusters_h[0]; } + + private: + uint32_t nDigis = 0; + + // Data to be put in the event + cms::alpakatools::host_buffer nModules_Clusters_h; + std::optional digis_d; + std::optional clusters_d; + std::optional digiErrors_d; + }; + + } // namespace pixelDetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // plugin_SiPixelClusterizer_alpaka_SiPixelRawToClusterKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 180b356db2c88..869beb74564b8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -8,11 +8,9 @@ #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -// local include(s) -#include "SiPixelClusterThresholds.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" namespace gpuCalibPixel { @@ -52,6 +50,7 @@ namespace gpuCalibPixel { int row = x[i]; int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); float pedestal = ret.first; float gain = ret.second; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index a96cd0bcc5c15..1ff62ed1c6c57 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -8,9 +8,7 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" - -// local include(s) -#include "SiPixelClusterThresholds.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" namespace gpuClustering { diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 4460dd6ab0240..8d78599d07d9c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from Configuration.Eras.Modifier_run3_common_cff import run3_common from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # conditions used *only* by the modules running on GPU from CalibTracker.SiPixelESProducers.siPixelROCsStatusAndMappingWrapperESProducer_cfi import siPixelROCsStatusAndMappingWrapperESProducer @@ -17,6 +18,7 @@ # reconstruct the pixel digis and clusters on the gpu from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAPhase1_cfi import siPixelRawToClusterCUDAPhase1 as _siPixelRawToClusterCUDA from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAHIonPhase1_cfi import siPixelRawToClusterCUDAHIonPhase1 as _siPixelRawToClusterCUDAHIonPhase1 + siPixelClustersPreSplittingCUDA = _siPixelRawToClusterCUDA.clone() # HIon Modifiers @@ -34,7 +36,6 @@ VCaltoElectronOffset = 0, VCaltoElectronOffset_L1 = 0) - from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase1_cfi import siPixelDigisClustersFromSoAPhase1 as _siPixelDigisClustersFromSoAPhase1 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase2_cfi import siPixelDigisClustersFromSoAPhase2 as _siPixelDigisClustersFromSoAPhase2 @@ -93,3 +94,94 @@ siPixelDigisClustersPreSplitting, # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA siPixelClustersPreSplitting)) + +###################################################################### + +### Alpaka Pixel Clusters Reco + +#from CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi import siPixelCablingSoAESProducer +#from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi import siPixelGainCalibrationForHLTSoAESProducer + +def _addProcessCalibTrackerAlpakaES(process): + process.load("CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi") + process.load("CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi") + +modifyConfigurationCalibTrackerAlpakaES_ = alpaka.makeProcessModifier(_addProcessCalibTrackerAlpakaES) + +# reconstruct the pixel digis and clusters with alpaka on the device +from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterPhase1_cfi import siPixelRawToClusterPhase1 as _siPixelRawToClusterAlpaka +siPixelClustersPreSplittingAlpaka = _siPixelRawToClusterAlpaka.clone() + +(alpaka & run3_common).toModify(siPixelClustersPreSplittingAlpaka, + # use the pixel channel calibrations scheme for Run 3 + clusterThreshold_layer1 = 4000, + VCaltoElectronGain = 1, # all gains=1, pedestals=0 + VCaltoElectronGain_L1 = 1, + VCaltoElectronOffset = 0, + VCaltoElectronOffset_L1 = 0) + +from RecoLocalTracker.SiPixelClusterizer.siPixelPhase2DigiToCluster_cfi import siPixelPhase2DigiToCluster as _siPixelPhase2DigiToCluster + +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingAlpaka, _siPixelPhase2DigiToCluster.clone( + Phase2ReadoutMode = PixelDigitizerAlgorithmCommon.Phase2ReadoutMode.value(), # flag to decide Readout Mode : linear TDR (-1), dual slope with slope parameters (+1,+2,+3,+4 ...) with threshold subtraction + Phase2DigiBaseline = int(PixelDigitizerAlgorithmCommon.ThresholdInElectrons_Barrel.value()), # same for barrel and endcap + Phase2KinkADC = 8, + ElectronPerADCGain = PixelDigitizerAlgorithmCommon.ElectronPerAdc.value() +)) + +# reconstruct the pixel digis and clusters with alpaka on the cpu, for validation +siPixelClustersPreSplittingAlpakaSerial = siPixelClustersPreSplittingAlpaka.clone( + #alpaka = dict( backend = '*' ) + alpaka = None +) +siPixelClustersPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelClustersPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +(alpaka & phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase2.clone( + clusterThreshold_layer1 = 4000, + clusterThreshold_otherLayers = 4000, + src = "siPixelClustersPreSplittingAlpaka", + storeDigis = False, + produceDigis = False +)) + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +alpaka.toModify(siPixelClustersPreSplitting, + cpu = cms.EDAlias( + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("SiPixelClusteredmNewDetSetVector")) + ) + ) +) + +# Run 3 +alpaka.toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # reconstruct the pixel clusters with alpaka + siPixelClustersPreSplittingAlpaka, + # reconstruct the pixel clusters with alpaka on the cpu (if requested by the validation) + siPixelClustersPreSplittingAlpakaSerial, + # convert from host SoA to legacy formats (digis and clusters) + siPixelDigisClustersPreSplitting, + # EDAlias for the clusters + siPixelClustersPreSplitting) +) + +# Phase 2 +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # reconstruct the pixel clusters with alpaka from copied digis + siPixelClustersPreSplittingAlpaka, + # reconstruct the pixel clusters with alpaka from copied digis on the cpu (if requested by the validation) + siPixelClustersPreSplittingAlpakaSerial, + # convert the pixel digis (except errors) and clusters to the legacy format + siPixelDigisClustersPreSplitting, + # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA + siPixelClustersPreSplitting) +) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index c0291ed9f32f8..5b70ded261ddf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -16,12 +16,13 @@ #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #endif // __CUDACC__ -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" - #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// local includes, for testing only +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" int main(void) { #ifdef __CUDACC__ diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index 70a2970420c51..62787f4c989c1 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,15 +1,20 @@ + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h new file mode 100644 index 0000000000000..9a2139ab2e355 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class PixelCPEFastParamsDevice { +public: + using Buffer = cms::alpakatools::device_buffer>; + using ConstBuffer = cms::alpakatools::const_device_buffer>; + + template + PixelCPEFastParamsDevice(TQueue queue) + : buffer_(cms::alpakatools::make_device_buffer>(queue)) {} + + // non-copyable + PixelCPEFastParamsDevice(PixelCPEFastParamsDevice const&) = delete; + PixelCPEFastParamsDevice& operator=(PixelCPEFastParamsDevice const&) = delete; + + // movable + PixelCPEFastParamsDevice(PixelCPEFastParamsDevice&&) = default; + PixelCPEFastParamsDevice& operator=(PixelCPEFastParamsDevice&&) = default; + + // default destructor + ~PixelCPEFastParamsDevice() = default; + + // access the buffer + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + + auto size() const { return alpaka::getExtentProduct(buffer_); } + + pixelCPEforDevice::ParamsOnDeviceT const* data() const { return buffer_.data(); } + +private: + Buffer buffer_; +}; + +#endif // RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h new file mode 100644 index 0000000000000..7d57c46dd7a13 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h @@ -0,0 +1,66 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h + +#include + +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class PixelCPEFastParamsHost : public PixelCPEGenericBase { +public: + using Buffer = cms::alpakatools::host_buffer>; + using ConstBuffer = cms::alpakatools::const_host_buffer>; + + PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth); + + // non-copyable + PixelCPEFastParamsHost(PixelCPEFastParamsHost const&) = delete; + PixelCPEFastParamsHost& operator=(PixelCPEFastParamsHost const&) = delete; + + // movable + PixelCPEFastParamsHost(PixelCPEFastParamsHost&&) = default; + PixelCPEFastParamsHost& operator=(PixelCPEFastParamsHost&&) = default; + + // default destructor + ~PixelCPEFastParamsHost() override = default; + + // access the buffer + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + + auto size() const { return alpaka::getExtentProduct(buffer_); } + + pixelCPEforDevice::ParamsOnDeviceT const* data() const { return buffer_.data(); } + + static void fillPSetDescription(edm::ParameterSetDescription& desc); + +private: + LocalPoint localPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + LocalError localError(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + + void errorFromTemplates(DetParam const& theDetParam, ClusterParamGeneric& theClusterParam, float qclus) const; + + std::vector thePixelGenError_; + + void fillParamsForDevice(); + + Buffer buffer_; +}; + +#endif // RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h index 1c7b9646d037f..2f18d86a39944 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h @@ -1,9 +1,10 @@ #ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEGenericBase_H #define RecoLocalTracker_SiPixelRecHits_PixelCPEGenericBase_H -#include "PixelCPEBase.h" #include +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" + class PixelCPEGenericBase : public PixelCPEBase { public: struct ClusterParamGeneric : ClusterParam { diff --git a/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h new file mode 100644 index 0000000000000..4e66d24604aec --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h @@ -0,0 +1,40 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h +#define RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + using PixelCPEFastParams = std::conditional_t, + PixelCPEFastParamsHost, + PixelCPEFastParamsDevice>; + + using PixelCPEFastParamsPhase1 = PixelCPEFastParams; + using PixelCPEFastParamsPhase2 = PixelCPEFastParams; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PixelCPEFastParamsHost const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PixelCPEFastParamsDevice dstData(queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_PixelCPEFastParamsoA_interface_alpaka_PixelCPEFastParamsCollection_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h new file mode 100644 index 0000000000000..ac99af3146904 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h @@ -0,0 +1,433 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h +#define RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h + +#include +#include +#include +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +namespace pixelCPEforDevice { + + // From https://cmssdt.cern.ch/dxr/CMSSW/source/CondFormats/SiPixelTransient/src/SiPixelGenError.cc#485-486 + // qbin: int (0-4) describing the charge of the cluster + // [0: 1.5; + using Rotation = SOARotation; + + // SOA (on device) + + template + struct ClusParamsT { + uint32_t minRow[N]; + uint32_t maxRow[N]; + uint32_t minCol[N]; + uint32_t maxCol[N]; + + int32_t q_f_X[N]; + int32_t q_l_X[N]; + int32_t q_f_Y[N]; + int32_t q_l_Y[N]; + + int32_t charge[N]; + + float xpos[N]; + float ypos[N]; + + float xerr[N]; + float yerr[N]; + + int16_t xsize[N]; // (*8) clipped at 127 if negative is edge.... + int16_t ysize[N]; + + Status status[N]; + }; + + // all modules are identical! + struct CommonParams { + float theThicknessB; + float theThicknessE; + float thePitchX; + float thePitchY; + + uint16_t maxModuleStride; + uint8_t numberOfLaddersInBarrel; + }; + + struct DetParams { + bool isBarrel; + bool isPosZ; + uint16_t layer; + uint16_t index; + uint32_t rawId; + + float shiftX; + float shiftY; + float chargeWidthX; + float chargeWidthY; + uint16_t pixmx; // max pix charge + + uint16_t nRowsRoc; //we don't need 2^16 columns, is worth to use 15 + 1 for sign + uint16_t nColsRoc; + uint16_t nRows; + uint16_t nCols; + + uint32_t numPixsInModule; + + float x0, y0, z0; // the vertex in the local coord of the detector + + float apeXX, apeYY; // ape^2 + uint8_t sx2, sy1, sy2; + uint8_t sigmax[kNumErrorBins], sigmax1[kNumErrorBins], + sigmay[kNumErrorBins]; // in micron + float xfact[kGenErrorQBins], yfact[kGenErrorQBins]; + int minCh[kGenErrorQBins]; + + Frame frame; + }; + + template + struct LayerGeometryT { + uint32_t layerStart[TrackerTopology::numberOfLayers + 1]; + uint8_t layer[pixelTopology::layerIndexSize]; + uint16_t maxModuleStride; + }; + + constexpr int32_t MaxHitsInIter = pixelClustering::maxHitsInIter(); + using ClusParams = ClusParamsT; + + constexpr inline void computeAnglesFromDet( + DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { + // x,y local position on det + auto gvx = x - detParams.x0; + auto gvy = y - detParams.y0; + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... + // calculate angles + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; + } + + constexpr inline float correction(int sizeM1, + int q_f, //!< Charge in the first pixel. + int q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float w_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto w_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + w_eff = std::abs(w_pred) - w_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use w_pred-w_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + + // this can produce "large" regressions for very small numeric differences + simple = (w_eff < 0.0f) | (w_eff > pitch); + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) + sum_of_edge += 1.0f; + if (last_is_big) + sum_of_edge += 1.0f; + w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float qdiff = q_l - q_f; + float qsum = q_l + q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (qsum == 0) + qsum = 1.0f; + + return 0.5f * (qdiff / qsum) * w_eff; + } + + template + constexpr inline void position(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + constexpr int maxSize = TrackerTraits::maxSizeCluster; + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic] + 1; + uint16_t lly = cp.minCol[ic] + 1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + uint16_t llxl = llx, llyl = lly, urxl = urx, uryl = ury; + + llxl = TrackerTraits::localX(llx); + llyl = TrackerTraits::localY(lly); + urxl = TrackerTraits::localX(urx); + uryl = TrackerTraits::localY(ury); + + auto mx = llxl + urxl; + auto my = llyl + uryl; + + int xsize = int(urxl) + 2 - int(llxl); + int ysize = int(uryl) + 2 - int(llyl); + assert(xsize >= 0); // 0 if bixpix... + assert(ysize >= 0); + + if (TrackerTraits::isBigPixX(cp.minRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixX(cp.maxRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixY(cp.minCol[ic])) + ++ysize; + if (TrackerTraits::isBigPixY(cp.maxCol[ic])) + ++ysize; + + int unbalanceX = 8.f * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); + int unbalanceY = 8.f * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); + + xsize = 8 * xsize - unbalanceX; + ysize = 8 * ysize - unbalanceY; + + cp.xsize[ic] = std::min(xsize, maxSize); + cp.ysize[ic] = std::min(ysize, maxSize); + + if (cp.minRow[ic] == 0 || cp.maxRow[ic] == uint32_t(detParams.nRows - 1)) + cp.xsize[ic] = -cp.xsize[ic]; + + if (cp.minCol[ic] == 0 || cp.maxCol[ic] == uint32_t(detParams.nCols - 1)) + cp.ysize[ic] = -cp.ysize[ic]; + + // apply the lorentz offset correction + float xoff = 0.5f * float(detParams.nRows) * comParams.thePitchX; + float yoff = 0.5f * float(detParams.nCols) * comParams.thePitchY; + + //correction for bigpixels for phase1 + xoff = xoff + TrackerTraits::bigPixXCorrection * comParams.thePitchX; + yoff = yoff + TrackerTraits::bigPixYCorrection * comParams.thePitchY; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + (comParams.thePitchX * 0.5f * float(mx)) - xoff; + auto yPos = detParams.shiftY + (comParams.thePitchY * 0.5f * float(my)) - yoff; + + float cotalpha = 0, cotbeta = 0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], + cp.q_f_X[ic], + cp.q_l_X[ic], + llxl, + urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + TrackerTraits::isBigPixX(cp.minRow[ic]), + TrackerTraits::isBigPixX(cp.maxRow[ic])); + + auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], + cp.q_f_Y[ic], + cp.q_l_Y[ic], + llyl, + uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + TrackerTraits::isBigPixY(cp.minCol[ic]), + TrackerTraits::isBigPixY(cp.maxCol[ic])); + + cp.xpos[ic] = xPos + xcorr; + cp.ypos[ic] = yPos + ycorr; + } + + template + constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050; + cp.yerr[ic] = 0.0085; + + // FIXME these are errors form Run1 + float xerr_barrel_l1_def = TrackerTraits::xerr_barrel_l1_def; + float yerr_barrel_l1_def = TrackerTraits::yerr_barrel_l1_def; + float xerr_barrel_ln_def = TrackerTraits::xerr_barrel_ln_def; + float yerr_barrel_ln_def = TrackerTraits::yerr_barrel_ln_def; + float xerr_endcap_def = TrackerTraits::xerr_endcap_def; + float yerr_endcap_def = TrackerTraits::yerr_endcap_def; + + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; //TODO MOVE THESE SOMEWHERE ELSE + constexpr float yerr_barrel_l1[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; + constexpr float yerr_barrel_ln[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_endcap[] = {0.0020, 0.0020}; + constexpr float yerr_endcap[] = {0.00210}; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + + // is one and big? + bool isBig1X = ((0 == sx) && TrackerTraits::isBigPixX(cp.minRow[ic])); + bool isBig1Y = ((0 == sy) && TrackerTraits::isBigPixY(cp.minCol[ic])); + + if (!isEdgeX && !isBig1X) { + if (not detParams.isBarrel) { + cp.xerr[ic] = sx < std::size(xerr_endcap) ? xerr_endcap[sx] : xerr_endcap_def; + } else if (detParams.layer == 1) { + cp.xerr[ic] = sx < std::size(xerr_barrel_l1) ? xerr_barrel_l1[sx] : xerr_barrel_l1_def; + } else { + cp.xerr[ic] = sx < std::size(xerr_barrel_ln) ? xerr_barrel_ln[sx] : xerr_barrel_ln_def; + } + } + + if (!isEdgeY && !isBig1Y) { + if (not detParams.isBarrel) { + cp.yerr[ic] = sy < std::size(yerr_endcap) ? yerr_endcap[sy] : yerr_endcap_def; + } else if (detParams.layer == 1) { + cp.yerr[ic] = sy < std::size(yerr_barrel_l1) ? yerr_barrel_l1[sy] : yerr_barrel_l1_def; + } else { + cp.yerr[ic] = sy < std::size(yerr_barrel_ln) ? yerr_barrel_ln[sy] : yerr_barrel_ln_def; + } + } + } + + template + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050f; + cp.yerr[ic] = 0.0085f; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? (size is set negative: see above) + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + // is one and big? + bool isOneX = (0 == sx); + bool isOneY = (0 == sy); + bool isBigX = TrackerTraits::isBigPixX(cp.minRow[ic]); + bool isBigY = TrackerTraits::isBigPixY(cp.minCol[ic]); + + auto ch = cp.charge[ic]; + auto bin = 0; + for (; bin < kGenErrorQBins - 1; ++bin) + // find first bin which minimum charge exceeds cluster charge + if (ch < detParams.minCh[bin + 1]) + break; + + // in detParams qBins are reversed bin0 -> smallest charge, bin4-> largest charge + // whereas in CondFormats/SiPixelTransient/src/SiPixelGenError.cc it is the opposite + // so we reverse the bin here -> kGenErrorQBins - 1 - bin + cp.status[ic].qBin = kGenErrorQBins - 1 - bin; + cp.status[ic].isOneX = isOneX; + cp.status[ic].isBigX = (isOneX & isBigX) | isEdgeX; + cp.status[ic].isOneY = isOneY; + cp.status[ic].isBigY = (isOneY & isBigY) | isEdgeY; + + auto xoff = -float(TrackerTraits::xOffset) * comParams.thePitchX; + int low_value = 0; + int high_value = kNumErrorBins - 1; + int bin_value = float(kNumErrorBins) * (cp.xpos[ic] + xoff) / (2 * xoff); + // return estimated bin value truncated to [0, 15] + int jx = std::clamp(bin_value, low_value, high_value); + + auto toCM = [](uint8_t x) { return float(x) * 1.e-4f; }; + + if (not isEdgeX) { + cp.xerr[ic] = isOneX ? toCM(isBigX ? detParams.sx2 : detParams.sigmax1[jx]) + : detParams.xfact[bin] * toCM(detParams.sigmax[jx]); + } + + auto ey = cp.ysize[ic] > 8 ? detParams.sigmay[std::min(cp.ysize[ic] - 9, 15)] : detParams.sy1; + if (not isEdgeY) { + cp.yerr[ic] = isOneY ? toCM(isBigY ? detParams.sy2 : detParams.sy1) : detParams.yfact[bin] * toCM(ey); + } + } + + //for Phase2 -> fallback to error from size + template <> + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + errorFromSize(comParams, detParams, cp, ic); + } + + template + struct ParamsOnDeviceT { + using LayerGeometry = LayerGeometryT; + using AverageGeometry = pixelTopology::AverageGeometryT; + + CommonParams m_commonParams; + // Will contain an array of DetParams instances + DetParams m_detParams[TrackerTopology::numberOfModules]; + LayerGeometry m_layerGeometry; + AverageGeometry m_averageGeometry; + + constexpr CommonParams const& __restrict__ commonParams() const { return m_commonParams; } + constexpr DetParams const& __restrict__ detParams(int i) const { return m_detParams[i]; } + constexpr LayerGeometry const& __restrict__ layerGeometry() const { return m_layerGeometry; } + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return m_averageGeometry; } + + CommonParams& commonParams() { return m_commonParams; } + DetParams& detParams(int i) { return m_detParams[i]; } + LayerGeometry& layerGeometry() { return m_layerGeometry; } + AverageGeometry& averageGeometry() { return m_averageGeometry; } + + constexpr uint8_t layer(uint16_t id) const { return m_layerGeometry.layer[id / TrackerTopology::maxModuleStride]; }; + }; + +} // namespace pixelCPEforDevice + +#endif // RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 00c88eadd4b51..35a973120e9fd 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,12 +1,10 @@ - - - - + + + + - - @@ -14,5 +12,16 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc new file mode 100644 index 0000000000000..9881aeab46bab --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc @@ -0,0 +1,187 @@ +#include +#include + +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { + using HitModuleStartArray = typename TrackingRecHitSoA::HitModuleStartArray; + using hindex_type = typename TrackerTraits::hindex_type; + using HMSstorage = typename std::vector; + +public: + explicit SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + // Data has been implicitly copied from Device to Host by the framework + using HitsOnHost = TrackingRecHitHost; + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT hitsToken_; // Alpaka hits + const edm::EDGetTokenT clusterToken_; // legacy clusters + const edm::EDPutTokenT rechitsPutToken_; // legacy rechits + const edm::EDPutTokenT hostPutToken_; +}; + +template +SiPixelRecHitFromSoAAlpaka::SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + hitsToken_(consumes(iConfig.getParameter("pixelRecHitSrc"))), + clusterToken_(consumes(iConfig.getParameter("src"))), + rechitsPutToken_(produces()), + hostPutToken_(produces()) {} + +template +void SiPixelRecHitFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + auto const& hits = iEvent.get(hitsToken_); + auto nHits = hits.view().metadata().size(); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " Hits"; + + // allocate a buffer for the indices of the clusters + constexpr auto nMaxModules = TrackerTraits::numberOfModules; + + SiPixelRecHitCollection output; + output.reserve(nMaxModules, nHits); + + HMSstorage hmsp(nMaxModules + 1); + + if (0 == nHits) { + hmsp.clear(); + iEvent.emplace(rechitsPutToken_, std::move(output)); + iEvent.emplace(hostPutToken_, std::move(hmsp)); + return; + } + + // fill content of HMSstorage product, and put it into the Event + for (unsigned int idx = 0; idx < hmsp.size(); ++idx) { + hmsp[idx] = hits.view().hitsModuleStart()[idx]; + } + iEvent.emplace(hostPutToken_, std::move(hmsp)); + + auto xl = hits.view().xLocal(); + auto yl = hits.view().yLocal(); + auto xe = hits.view().xerrLocal(); + auto ye = hits.view().yerrLocal(); + + TrackerGeometry const& geom = iSetup.getData(geomToken_); + + auto const hclusters = iEvent.getHandle(clusterToken_); + + constexpr uint32_t maxHitsInModule = pixelClustering::maxHitsInModule(); + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + for (auto const& dsv : *hclusters) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); + auto gind = genericDet->index(); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); + auto fc = hits.view().hitsModuleStart()[gind]; + auto lc = hits.view().hitsModuleStart()[gind + 1]; + auto nhits = lc - fc; + + assert(lc > fc); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc << "\n"; + if (nhits > maxHitsInModule) + edm::LogWarning("SiPixelRecHitFromSoAAlpaka") + .format("Too many clusters {} in module {}. Only the first {} hits will be converted", + nhits, + gind, + maxHitsInModule); + + nhits = std::min(nhits, maxHitsInModule); + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << lc << ',' << fc; + + if (0 == nhits) + continue; + auto jnd = [&](int k) { return fc + k; }; + assert(nhits <= dsv.size()); + if (nhits != dsv.size()) { + edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); + } + for (auto const& clust : dsv) { + assert(clust.originalId() >= 0); + assert(clust.originalId() < dsv.size()); + if (clust.originalId() >= nhits) + continue; + auto ij = jnd(clust.originalId()); + LocalPoint lp(xl[ij], yl[ij]); + LocalError le(xe[ij], 0, ye[ij]); + SiPixelRecHitQuality::QualWordType rqw = 0; + + numberOfClusters++; + + /* cpu version.... (for reference) + std::tuple tuple = cpe_->getParameters( clust, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + */ + + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); + // Make a RecHit and add it to the DetSet + recHitsOnDetUnit.emplace_back(lp, le, rqw, *genericDet, cluster); + // ============================= + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; + + } // <-- End loop on Clusters + + // LogDebug("SiPixelRecHitGPU") + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; + + } // <-- End loop on DetUnits + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << numberOfDetUnits << " dets, " << numberOfClusters + << " clusters"; + + iEvent.emplace(rechitsPutToken_, std::move(output)); +} + +using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1); + +using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc new file mode 100644 index 0000000000000..73059a13dc636 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc @@ -0,0 +1,120 @@ +#include +#include +#include +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" + +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" + +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class PixelCPEFastParamsESProducerAlpaka : public ESProducer { + public: + PixelCPEFastParamsESProducerAlpaka(edm::ParameterSet const& iConfig); + std::unique_ptr> produce(const PixelCPEFastParamsRecord& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken magfieldToken_; + edm::ESGetToken pDDToken_; + edm::ESGetToken hTTToken_; + edm::ESGetToken lorentzAngleToken_; + edm::ESGetToken lorentzAngleWidthToken_; + edm::ESGetToken genErrorDBObjectToken_; + + edm::ParameterSet pset_; + bool useErrorsFromTemplates_; + }; + + using namespace edm; + + template + PixelCPEFastParamsESProducerAlpaka::PixelCPEFastParamsESProducerAlpaka(const edm::ParameterSet& p) + : ESProducer(p), pset_(p) { + auto const& myname = p.getParameter("ComponentName"); + auto const& magname = p.getParameter("MagneticFieldRecord"); + useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + + auto cc = setWhatProduced(this, myname); + magfieldToken_ = cc.consumes(magname); + pDDToken_ = cc.consumes(); + hTTToken_ = cc.consumes(); + lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); + lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); + if (useErrorsFromTemplates_) { + genErrorDBObjectToken_ = cc.consumes(); + } + } + + template + std::unique_ptr> PixelCPEFastParamsESProducerAlpaka::produce( + const PixelCPEFastParamsRecord& iRecord) { + // add the new la width object + const SiPixelLorentzAngle* lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); + + const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; + + // Errors take only from new GenError + if (useErrorsFromTemplates_) { // do only when generrors are needed + genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); + //} else { + //std::cout<<" pass an empty GenError pointer"<>(pset_, + &iRecord.get(magfieldToken_), + iRecord.get(pDDToken_), + iRecord.get(hTTToken_), + &iRecord.get(lorentzAngleToken_), + genErrorDBObjectProduct, + lorentzAngleWidthProduct); + } + + template + void PixelCPEFastParamsESProducerAlpaka::fillDescriptions( + edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + // from PixelCPEBase + PixelCPEBase::fillPSetDescription(desc); + + // from PixelCPEFast + PixelCPEFastParamsHost::fillPSetDescription(desc); + + // used by PixelCPEFast + desc.add("EdgeClusterErrorX", 50.0); + desc.add("EdgeClusterErrorY", 85.0); + desc.add("UseErrorsFromTemplates", true); + desc.add("TruncatePixelCharge", true); + + std::string name = "PixelCPEFastParams"; + name += TrackerTraits::nameModifier; + desc.add("ComponentName", name); + desc.add("MagneticFieldRecord", edm::ESInputTag()); + + descriptions.addWithDefaultLabel(desc); + } + + using PixelCPEFastParamsESProducerAlpakaPhase1 = PixelCPEFastParamsESProducerAlpaka; + using PixelCPEFastParamsESProducerAlpakaPhase2 = PixelCPEFastParamsESProducerAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase1); +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h new file mode 100644 index 0000000000000..2fc1404a03bb7 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h @@ -0,0 +1,45 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h +#define RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h + +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelgpudetails { + using namespace cms::alpakatools; + + template + class PixelRecHitKernel { + public: + PixelRecHitKernel() = default; + ~PixelRecHitKernel() = default; + + PixelRecHitKernel(const PixelRecHitKernel&) = delete; + PixelRecHitKernel(PixelRecHitKernel&&) = delete; + PixelRecHitKernel& operator=(const PixelRecHitKernel&) = delete; + PixelRecHitKernel& operator=(PixelRecHitKernel&&) = delete; + + using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + + TrackingRecHitsSoACollection makeHitsAsync(SiPixelDigisSoACollection const& digis_d, + SiPixelClustersSoACollection const& clusters_d, + BeamSpotPOD const* bs_d, + ParamsOnDevice const* cpeParams, + Queue queue) const; + }; + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc new file mode 100644 index 0000000000000..f0d61a646c0ce --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -0,0 +1,143 @@ +// C++ headers +#include +#include + +// Alpaka headers +#include + +// CMSSW headers +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "PixelRecHitKernel.h" +#include "PixelRecHits.h" + +//#define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + template + class setHitsLayerStart { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint32_t const* __restrict__ hitsModuleStart, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + uint32_t* __restrict__ hitsLayerStart) const { + assert(0 == hitsModuleStart[0]); + + for (int32_t i : cms::alpakatools::elements_with_stride(acc, TrackerTraits::numberOfLayers + 1)) { + hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; +#ifdef GPU_DEBUG + int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; + printf("LayerStart %d/%d at module %d: %d - %d\n", + i, + TrackerTraits::numberOfLayers, + cpeParams->layerGeometry().layerStart[i], + hitsLayerStart[i], + hitsLayerStart[i] - old); +#endif + } + } + }; + + namespace pixelgpudetails { + + template + TrackingRecHitsSoACollection PixelRecHitKernel::makeHitsAsync( + SiPixelDigisSoACollection const& digis_d, + SiPixelClustersSoACollection const& clusters_d, + BeamSpotPOD const* bs_d, + pixelCPEforDevice::ParamsOnDeviceT const* cpeParams, + Queue queue) const { + using namespace pixelRecHits; + auto nHits = clusters_d.nClusters(); + auto offsetBPIX2 = clusters_d.offsetBPIX2(); + + TrackingRecHitsSoACollection hits_d(nHits, offsetBPIX2, clusters_d->clusModuleStart(), queue); + + int activeModulesWithDigis = digis_d.nModules(); + + // protect from empty events + if (activeModulesWithDigis) { + int threadsPerBlock = 128; + int blocks = activeModulesWithDigis; + const auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + +#ifdef GPU_DEBUG + std::cout << "launching GetHits kernel on " << alpaka::core::demangled << " with " << blocks << " blocks" + << std::endl; +#endif + alpaka::exec(queue, + workDiv1D, + GetHits{}, + cpeParams, + bs_d, + digis_d.view(), + digis_d.nDigis(), + clusters_d.view(), + hits_d.view()); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // assuming full warp of threads is better than a smaller number... + if (nHits) { + const auto workDiv1D = cms::alpakatools::make_workdiv(1, 32); + alpaka::exec(queue, + workDiv1D, + setHitsLayerStart{}, + clusters_d->clusModuleStart(), + cpeParams, + hits_d.view().hitsLayerStart().data()); + constexpr auto nLayers = TrackerTraits::numberOfLayers; + + // Use a view since it's runtime sized and can't use the implicit definition + // see HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h:100 + typename TrackingRecHitSoA::PhiBinnerView hrv_d; + hrv_d.assoc = &(hits_d.view().phiBinner()); + hrv_d.offSize = -1; + hrv_d.offStorage = nullptr; + hrv_d.contentSize = nHits; + hrv_d.contentStorage = hits_d.view().phiBinnerStorage(); + + // fillManyFromVector(h_d.data(), nParts, v_d.data(), offsets_d.data(), offsets[10], 256, queue); + /* cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), + nLayers, + hits_d.view().iphi(), + hits_d.view().hitsLayerStart().data(), + nHits, + (uint32_t)256, + queue); +*/ + cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), + hrv_d, + nLayers, + hits_d.view().iphi(), + hits_d.view().hitsLayerStart().data(), + nHits, + (uint32_t)256, + queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + } + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "PixelRecHitKernel -> DONE!" << std::endl; +#endif + + return hits_d; + } + + template class PixelRecHitKernel; + template class PixelRecHitKernel; + template class PixelRecHitKernel; + + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h new file mode 100644 index 0000000000000..220a91b85ced3 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -0,0 +1,240 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h +#define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h + +#include +#include +#include +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +//#define GPU_DEBUG 1 +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelRecHits { + + template + class GetHits { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + BeamSpotPOD const* __restrict__ bs, + SiPixelDigisSoAConstView digis, + uint32_t numElements, + SiPixelClustersSoAConstView clusters, + TrackingRecHitSoAView hits) const { + // FIXME + // the compiler seems NOT to optimize loads from views (even in a simple test case) + // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + + ALPAKA_ASSERT_OFFLOAD(cpeParams); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + + // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) + if (0 == blockIdx) { + auto& agc = hits.averageGeometry(); + auto const& ag = cpeParams->averageGeometry(); + auto nLadders = TrackerTraits::numberOfLaddersInBarrel; + + cms::alpakatools::for_each_element_in_block_strided(acc, nLadders, [&](uint32_t il) { + agc.ladderZ[il] = ag.ladderZ[il] - bs->z; + agc.ladderX[il] = ag.ladderX[il] - bs->x; + agc.ladderY[il] = ag.ladderY[il] - bs->y; + agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); + agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; + agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; + }); + + if (cms::alpakatools::once_per_block(acc)) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; + } + } + + // to be moved in common namespace... + using pixelClustering::invalidModuleId; + constexpr int32_t MaxHitsInIter = pixelCPEforDevice::MaxHitsInIter; + + using ClusParams = pixelCPEforDevice::ClusParams; + + // as usual one block per module + auto& clusParams = alpaka::declareSharedVar(acc); + + auto me = clusters[blockIdx].moduleId(); + int nclus = clusters[me].clusInModule(); + + if (0 == nclus) + return; +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_block(acc)) { + auto k = clusters[1 + blockIdx].moduleStart(); + while (digis[k].moduleId() == invalidModuleId) + ++k; + ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me); + } + + if (me % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf( + "hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart()); +#endif + + for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { + auto first = clusters[1 + blockIdx].moduleStart(); + + int nClusInIter = alpaka::math::min(acc, MaxHitsInIter, endClus - startClus); + int lastClus = startClus + nClusInIter; + assert(nClusInIter <= nclus); + assert(nClusInIter > 0); + assert(lastClus <= nclus); + + assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); + + // init + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + clusParams.minRow[ic] = std::numeric_limits::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + clusParams.charge[ic] = 0; + clusParams.q_f_X[ic] = 0; + clusParams.q_l_X[ic] = 0; + clusParams.q_f_Y[ic] = 0; + clusParams.q_l_Y[ic] = 0; + }); + + alpaka::syncBlockThreads(acc); + + // one thread per "digi" + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, first); + uint32_t rowsColsFirstElementIdx = firstElementIdxNoStride; + uint32_t rowsColsEndElementIdx = endElementIdxNoStride; + for (uint32_t i = rowsColsFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, rowsColsFirstElementIdx, rowsColsEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + auto pixmx = cpeParams->detParams(me).pixmx; + uint32_t chargeFirstElementIdx = firstElementIdxNoStride; + uint32_t chargeEndElementIdx = endElementIdxNoStride; + for (uint32_t i = chargeFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, chargeFirstElementIdx, chargeEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + auto ch = digis[i].adc(); + alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + ch = alpaka::math::min(acc, ch, pixmx); + if (clusParams.minRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.minCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + // next one cluster per thread... + first = clusters[me].clusModuleStart() + startClus; + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + auto h = first + ic; // output index in global memory + + assert(h < (uint32_t)hits.metadata().size()); + assert(h < clusters[me + 1].clusModuleStart()); + + pixelCPEforDevice::position( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + pixelCPEforDevice::errorFromDB( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + // store it + hits[h].chargeAndStatus().charge = clusParams.charge[ic]; + hits[h].chargeAndStatus().status = clusParams.status[ic]; + hits[h].detectorIndex() = me; + + float xl, yl; + hits[h].xLocal() = xl = clusParams.xpos[ic]; + hits[h].yLocal() = yl = clusParams.ypos[ic]; + + hits[h].clusterSizeX() = clusParams.xsize[ic]; + hits[h].clusterSizeY() = clusParams.ysize[ic]; + + hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX; + hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY; + + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; + + hits[h].xGlobal() = xg; + hits[h].yGlobal() = yg; + hits[h].zGlobal() = zg; + + hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg); + hits[h].iphi() = unsafe_atan2s<7>(yg, xg); + }); + alpaka::syncBlockThreads(acc); + } // end loop on batches + } + }; + + } // namespace pixelRecHits +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc new file mode 100644 index 0000000000000..46fd8a6b8c2ca --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc @@ -0,0 +1,100 @@ +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/BeamSpot/interface/alpaka/BeamSpotDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" + +#include "PixelRecHitKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class SiPixelRecHitAlpaka : public global::EDProducer<> { + public: + explicit SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + void produce(edm::StreamID streamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + + const device::ESGetToken, PixelCPEFastParamsRecord> cpeToken_; + const device::EDGetToken tBeamSpot; + const device::EDGetToken tokenClusters_; + const device::EDGetToken tokenDigi_; + const device::EDPutToken> tokenHit_; + + const pixelgpudetails::PixelRecHitKernel Algo_; + }; + + template + SiPixelRecHitAlpaka::SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig) + : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + tBeamSpot(consumes(iConfig.getParameter("beamSpot"))), + tokenClusters_(consumes(iConfig.getParameter("src"))), + tokenDigi_(consumes(iConfig.getParameter("src"))), + tokenHit_(produces()) {} + + template + void SiPixelRecHitAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpotDevice")); + desc.add("src", edm::InputTag("siPixelClustersPreSplittingAlpaka")); + + std::string cpe = "PixelCPEFastParams"; + cpe += TrackerTraits::nameModifier; + desc.add("CPE", cpe); + + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRecHitAlpaka::produce(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& es) const { + auto& fcpe = es.getData(cpeToken_); + + auto const& clusters = iEvent.get(tokenClusters_); + + auto const& digis = iEvent.get(tokenDigi_); + + auto const& bs = iEvent.get(tBeamSpot); + + iEvent.emplace(tokenHit_, + Algo_.makeHitsAsync(digis, clusters, bs.data(), fcpe.const_buffer().data(), iEvent.queue())); + } + using SiPixelRecHitAlpakaPhase1 = SiPixelRecHitAlpaka; + using SiPixelRecHitAlpakaPhase2 = SiPixelRecHitAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index 686b0afc335c4..52efaece5e4df 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.alpaka_cff import alpaka # # Load all Pixel Cluster Position Estimator ESProducers @@ -18,3 +19,10 @@ # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * + +def _addProcessCPEsAlpaka(process): + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase1_cfi") + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase2_cfi") + +modifyConfigurationForAlpakaCPEs_ = alpaka.makeProcessModifier(_addProcessCPEsAlpaka) + diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index f45b41861995d..e6b2c9832600c 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # legacy pixel rechit producer siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", @@ -112,9 +113,6 @@ ) ) - -#(gpu & pixelNtupletFit & phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting , cuda = _siPixelRecHitFromCUDAPhase2.clone()) - (gpu & pixelNtupletFit).toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( # reconstruct the pixel rechits on the gpu or on the cpu # (normally only one of the two is run because only one is consumed from later stages) @@ -125,3 +123,49 @@ # producing and converting on cpu (if needed) siPixelRecHitsPreSplittingSoA )) + +###################################################################### + +### Alpaka Pixel Hits Reco +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase1_cfi import siPixelRecHitAlpakaPhase1 as _siPixelRecHitAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase2_cfi import siPixelRecHitAlpakaPhase2 as _siPixelRecHitAlpakaPhase2 + +# Hit SoA producer on the device +siPixelRecHitsPreSplittingAlpaka = _siPixelRecHitAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +) +phase2_tracker.toReplaceWith(siPixelRecHitsPreSplittingAlpaka,_siPixelRecHitAlpakaPhase2.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +# Hit SoA producer on the cpu, for validation +siPixelRecHitsPreSplittingAlpakaSerial = siPixelRecHitsPreSplittingAlpaka.clone( + src = "siPixelClustersPreSplittingAlpakaSerial", + #alpaka = dict( backend = '*' ) + alpaka = None +) +siPixelRecHitsPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelRecHitsPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') + +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase1.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + +(alpaka & phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase2.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + + +alpaka.toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( + # Reconstruct the pixel hits with alpaka on the device + siPixelRecHitsPreSplittingAlpaka, + # Reconstruct the pixel hits with alpaka on the cpu (if requested by the validation) + siPixelRecHitsPreSplittingAlpakaSerial, + # Convert hit soa on host to legacy formats + siPixelRecHitsPreSplitting)) diff --git a/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..804f817bdb6e0 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsHostPhase1 = PixelCPEFastParamsHost; +using PixelCPEFastParamsHostPhase2 = PixelCPEFastParamsHost; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc new file mode 100644 index 0000000000000..d98c84e5860f4 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "FWCore/Utilities/interface/typelookup.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsPhase1 = PixelCPEFastParamsDevice; +using PixelCPEFastParamsPhase2 = PixelCPEFastParamsDevice; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase2); \ No newline at end of file diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc new file mode 100644 index 0000000000000..36c127259a383 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc @@ -0,0 +1,482 @@ +#include + +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" + +//----------------------------------------------------------------------------- +//! The constructor. +//----------------------------------------------------------------------------- +template +PixelCPEFastParamsHost::PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth) + : PixelCPEGenericBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, lorentzAngleWidth), + buffer_(cms::alpakatools::make_host_buffer>()) { + // Use errors from templates or from GenError + if (useErrorsFromTemplates_) { + if (!SiPixelGenError::pushfile(*genErrorDBObject_, this->thePixelGenError_)) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << (*genErrorDBObject_).version(); + } + + fillParamsForDevice(); +} + +template +void PixelCPEFastParamsHost::fillParamsForDevice() { + // this code executes only once per job, computation inefficiency is not an issue + // many code blocks are repeated: better keep the computation local and self consistent as blocks may in future move around, be deleted ... + // It is valid only for Phase1 and the version of GenError in DB used in late 2018 and in 2021 + + buffer_->commonParams().theThicknessB = m_DetParams.front().theThickness; + buffer_->commonParams().theThicknessE = m_DetParams.back().theThickness; + buffer_->commonParams().thePitchX = m_DetParams[0].thePitchX; + buffer_->commonParams().thePitchY = m_DetParams[0].thePitchY; + + buffer_->commonParams().numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + + LogDebug("PixelCPEFastParamsHost") << "pitch & thickness " << buffer_->commonParams().thePitchX << ' ' + << buffer_->commonParams().thePitchY << " " + << buffer_->commonParams().theThicknessB << ' ' + << buffer_->commonParams().theThicknessE; + + // zero average geometry + memset(&buffer_->averageGeometry(), 0, sizeof(pixelTopology::AverageGeometryT)); + + uint32_t oldLayer = 0; + uint32_t oldLadder = 0; + float rl = 0; + float zl = 0; + float miz = 500, mxz = 0; + float pl = 0; + int nl = 0; + + assert(m_DetParams.size() <= TrackerTraits::numberOfModules); + for (auto i = 0U; i < m_DetParams.size(); ++i) { + auto& p = m_DetParams[i]; + auto& g = buffer_->detParams(i); + + g.nRowsRoc = p.theDet->specificTopology().rowsperroc(); + g.nColsRoc = p.theDet->specificTopology().colsperroc(); + g.nRows = p.theDet->specificTopology().rocsX() * g.nRowsRoc; + g.nCols = p.theDet->specificTopology().rocsY() * g.nColsRoc; + + g.numPixsInModule = g.nRows * g.nCols; + + assert(p.theDet->index() == int(i)); + assert(buffer_->commonParams().thePitchY == p.thePitchY); + assert(buffer_->commonParams().thePitchX == p.thePitchX); + + g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); + g.isPosZ = p.theDet->surface().position().z() > 0; + g.layer = ttopo_.layer(p.theDet->geographicalId()); + g.index = i; // better be! + g.rawId = p.theDet->geographicalId(); + auto thickness = g.isBarrel ? buffer_->commonParams().theThicknessB : buffer_->commonParams().theThicknessE; + assert(thickness == p.theThickness); + + auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); + if (oldLayer != g.layer) { + oldLayer = g.layer; + LogDebug("PixelCPEFastParamsHost") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << g.layer << " starting at " << g.rawId << '\n' + << "old layer had " << nl << " ladders"; + nl = 0; + } + if (oldLadder != ladder) { + oldLadder = ladder; + LogDebug("PixelCPEFastParamsHost") << "new ladder at " << i + << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << ladder + << " starting at " << g.rawId << '\n' + << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f + << ' ' << miz << ' ' << mxz; + rl = 0; + zl = 0; + pl = 0; + miz = 500; + mxz = 0; + nl++; + } + + g.shiftX = 0.5f * p.lorentzShiftInCmX; + g.shiftY = 0.5f * p.lorentzShiftInCmY; + g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; + g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; + + g.x0 = p.theOrigin.x(); + g.y0 = p.theOrigin.y(); + g.z0 = p.theOrigin.z(); + + auto vv = p.theDet->surface().position(); + auto rr = pixelCPEforDevice::Rotation(p.theDet->surface().rotation()); + g.frame = pixelCPEforDevice::Frame(vv.x(), vv.y(), vv.z(), rr); + + zl += vv.z(); + miz = std::min(miz, std::abs(vv.z())); + mxz = std::max(mxz, std::abs(vv.z())); + rl += vv.perp(); + pl += vv.phi(); // (not obvious) + + // errors ..... + ClusterParamGeneric cp; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if (lape.invalid()) + lape = LocalError(); // zero.... + + g.apeXX = lape.xx(); + g.apeYY = lape.yy(); + + auto toMicron = [&](float x) { return std::min(511, int(x * 1.e4f + 0.5f)); }; + + // average angle + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + { + // calculate angles (fed into errorFromTemplates) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + + errorFromTemplates(p, cp, 20000.); + } + +#ifdef EDM_ML_DEBUG + auto m = 10000.f; + for (float qclus = 15000; qclus < 35000; qclus += 15000) { + errorFromTemplates(p, cp, qclus); + LogDebug("PixelCPEFastParamsHost") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' + << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 + << ' ' << m * cp.sy2; + } + LogDebug("PixelCPEFastParamsHost") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); +#endif // EDM_ML_DEBUG + + g.pixmx = std::max(0, cp.pixmx); + g.sx2 = toMicron(cp.sx2); + g.sy1 = std::max(21, toMicron(cp.sy1)); // for some angles sy1 is very small + g.sy2 = std::max(55, toMicron(cp.sy2)); // sometimes sy2 is smaller than others (due to angle?) + + //sample xerr as function of position + // moduleOffsetX is the definition of TrackerTraits::xOffset, + // needs to be calculated because for Phase2 the modules are not uniform + float moduleOffsetX = -(0.5f * float(g.nRows) + TrackerTraits::bigPixXCorrection); + auto const xoff = moduleOffsetX * buffer_->commonParams().thePitchX; + + for (int ix = 0; ix < pixelCPEforDevice::kNumErrorBins; ++ix) { + auto x = xoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() - x; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + g.sigmax[ix] = toMicron(cp.sigmax); + g.sigmax1[ix] = toMicron(cp.sx1); + LogDebug("PixelCPEFastParamsHost") << "sigmax vs x " << i << ' ' << x << ' ' << cp.cotalpha << ' ' + << int(g.sigmax[ix]) << ' ' << int(g.sigmax1[ix]) << ' ' << 10000.f * cp.sigmay + << std::endl; + } +#ifdef EDM_ML_DEBUG + // sample yerr as function of position + // moduleOffsetY is the definition of TrackerTraits::yOffset (removed) + float moduleOffsetY = 0.5f * float(g.nCols) + TrackerTraits::bigPixYCorrection; + auto const yoff = -moduleOffsetY * buffer_->commonParams().thePitchY; + + for (int ix = 0; ix < pixelCPEforDevice::kNumErrorBins; ++ix) { + auto y = yoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchY; + auto gvy = p.theOrigin.y() - y; + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + LogDebug("PixelCPEFastParamsHost") << "sigmay vs y " << i << ' ' << y << ' ' << cp.cotbeta << ' ' + << 10000.f * cp.sigmay << std::endl; + } +#endif // EDM_ML_DEBUG + + // calculate angles (repeated) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + auto aveCB = cp.cotbeta; + + // sample x by charge + int qbin = pixelCPEforDevice::kGenErrorQBins; // low charge + int k = 0; + for (int qclus = 1000; qclus < 200000; qclus += 1000) { + errorFromTemplates(p, cp, qclus); + if (cp.qBin_ == qbin) + continue; + qbin = cp.qBin_; + g.xfact[k] = cp.sigmax; + g.yfact[k] = cp.sigmay; + g.minCh[k++] = qclus; +#ifdef EDM_ML_DEBUG + LogDebug("PixelCPEFastParamsHost") << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << qclus << ' ' << cp.qBin_ + << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' + << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 + << std::endl; +#endif // EDM_ML_DEBUG + } + + assert(k <= pixelCPEforDevice::kGenErrorQBins); + + // fill the rest (sometimes bin 4 is missing) + for (int kk = k; kk < pixelCPEforDevice::kGenErrorQBins; ++kk) { + g.xfact[kk] = g.xfact[k - 1]; + g.yfact[kk] = g.yfact[k - 1]; + g.minCh[kk] = g.minCh[k - 1]; + } + auto detx = 1.f / g.xfact[0]; + auto dety = 1.f / g.yfact[0]; + for (int kk = 0; kk < pixelCPEforDevice::kGenErrorQBins; ++kk) { + g.xfact[kk] *= detx; + g.yfact[kk] *= dety; + } + // sample y in "angle" (estimated from cluster size) + float ys = 8.f - 4.f; // apperent bias of half pixel (see plot) + // plot: https://indico.cern.ch/event/934821/contributions/3974619/attachments/2091853/3515041/DigilessReco.pdf page 25 + // sample yerr as function of "size" + for (int iy = 0; iy < pixelCPEforDevice::kNumErrorBins; ++iy) { + ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3) + if (pixelCPEforDevice::kNumErrorBins - 1 == iy) + ys += 8.f; // last bin for "overflow" + // cp.cotalpha = ys*(buffer_->commonParams().thePitchX/(8.f*thickness)); // use this to print sampling in "x" (and comment the line below) + cp.cotbeta = std::copysign(ys * (buffer_->commonParams().thePitchY / (8.f * thickness)), aveCB); + errorFromTemplates(p, cp, 20000.f); + g.sigmay[iy] = toMicron(cp.sigmay); + LogDebug("PixelCPEFastParamsHost") << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha + << '/' << cp.cotbeta << ' ' << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) + << std::endl; + } + } // loop over det + + constexpr int numberOfModulesInLadder = TrackerTraits::numberOfModulesInLadder; + constexpr int numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + constexpr int numberOfModulesInBarrel = TrackerTraits::numberOfModulesInBarrel; + + constexpr float ladderFactor = 1.f / float(numberOfModulesInLadder); + + constexpr int firstEndcapPos = TrackerTraits::firstEndcapPos; + constexpr int firstEndcapNeg = TrackerTraits::firstEndcapNeg; + + // compute ladder baricenter (only in global z) for the barrel + // + auto& aveGeom = buffer_->averageGeometry(); + int il = 0; + for (int im = 0, nm = numberOfModulesInBarrel; im < nm; ++im) { + auto const& g = buffer_->detParams(im); + il = im / numberOfModulesInLadder; + assert(il < int(numberOfLaddersInBarrel)); + auto z = g.frame.z(); + aveGeom.ladderZ[il] += ladderFactor * z; + aveGeom.ladderMinZ[il] = std::min(aveGeom.ladderMinZ[il], z); + aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); + aveGeom.ladderX[il] += ladderFactor * g.frame.x(); + aveGeom.ladderY[il] += ladderFactor * g.frame.y(); + aveGeom.ladderR[il] += ladderFactor * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); + } + assert(il + 1 == int(numberOfLaddersInBarrel)); + // add half_module and tollerance + constexpr float moduleLength = TrackerTraits::moduleLength; + constexpr float module_tolerance = 0.2f; + for (int il = 0, nl = numberOfLaddersInBarrel; il < nl; ++il) { + aveGeom.ladderMinZ[il] -= (0.5f * moduleLength - module_tolerance); + aveGeom.ladderMaxZ[il] += (0.5f * moduleLength - module_tolerance); + } + + // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) + for (auto im = TrackerTraits::layerStart[firstEndcapPos]; im < TrackerTraits::layerStart[firstEndcapPos + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); + } + for (auto im = TrackerTraits::layerStart[firstEndcapNeg]; im < TrackerTraits::layerStart[firstEndcapNeg + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); + } + // correct for outer ring being closer + aveGeom.endCapZ[0] -= TrackerTraits::endcapCorrection; + aveGeom.endCapZ[1] += TrackerTraits::endcapCorrection; +#ifdef EDM_ML_DEBUG + for (int jl = 0, nl = numberOfLaddersInBarrel; jl < nl; ++jl) { + LogDebug("PixelCPEFastParamsHost") << jl << ':' << aveGeom.ladderR[jl] << '/' + << std::sqrt(aveGeom.ladderX[jl] * aveGeom.ladderX[jl] + + aveGeom.ladderY[jl] * aveGeom.ladderY[jl]) + << ',' << aveGeom.ladderZ[jl] << ',' << aveGeom.ladderMinZ[jl] << ',' + << aveGeom.ladderMaxZ[jl] << '\n'; + } + LogDebug("PixelCPEFastParamsHost") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; +#endif // EDM_ML_DEBUG + + // fill Layer and ladders geometry + memset(&buffer_->layerGeometry(), 0, sizeof(pixelCPEforDevice::LayerGeometryT)); + memcpy(buffer_->layerGeometry().layerStart, + TrackerTraits::layerStart, + sizeof(pixelCPEforDevice::LayerGeometryT::layerStart)); + memcpy(buffer_->layerGeometry().layer, + pixelTopology::layer.data(), + pixelTopology::layer.size()); + buffer_->layerGeometry().maxModuleStride = pixelTopology::maxModuleStride; +} + +template +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + LogDebug("PixelCPEFastParamsHost") << "PixelCPEFastParamsHost::localPosition(...) : locBz = " << locBz; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(this->thePixelGenError_); + int gtemplID = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin(gtemplID, + theClusterParam.cotalpha, + theClusterParam.cotbeta, + locBz, + locBx, + qclus, + false, + theClusterParam.pixmx, + theClusterParam.sigmay, + dummy, + theClusterParam.sigmax, + dummy, + theClusterParam.sy1, + dummy, + theClusterParam.sy2, + dummy, + theClusterParam.sx1, + dummy, + theClusterParam.sx2, + dummy); + + theClusterParam.sigmax = theClusterParam.sigmax * pixelCPEforDevice::micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * pixelCPEforDevice::micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * pixelCPEforDevice::micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * pixelCPEforDevice::micronsToCm; +} + +template <> +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + theClusterParam.qBin_ = 0.0f; +} + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +template +LocalPoint PixelCPEFastParamsHost::localPosition(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + if (useErrorsFromTemplates_) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { + theClusterParam.qBin_ = 0; + } + + int q_f_X; //!< Q of the first pixel in X + int q_l_X; //!< Q of the last pixel in X + int q_f_Y; //!< Q of the first pixel in Y + int q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); + + // do GPU like ... + pixelCPEforDevice::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.q_f_X[0] = q_f_X; + cp.q_l_X[0] = q_l_X; + cp.q_f_Y[0] = q_f_Y; + cp.q_l_Y[0] = q_l_Y; + + cp.charge[0] = theClusterParam.theCluster->charge(); + + auto ind = theDetParam.theDet->index(); + pixelCPEforDevice::position(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; + + // set the error (mind ape....) + pixelCPEforDevice::errorFromDB(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + theClusterParam.sigmax = cp.xerr[0]; + theClusterParam.sigmay = cp.yerr[0]; + + LogDebug("PixelCPEFastParamsHost") << " in PixelCPEFastParamsHost:localPosition - pos = " << xPos << " " << yPos + << " size " << cp.maxRow[0] - cp.minRow[0] << ' ' << cp.maxCol[0] - cp.minCol[0]; + + //--- Now put the two together + LocalPoint pos_in_local(xPos, yPos); + return pos_in_local; +} + +//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ + +//------------------------------------------------------------------------- +// Hit error in the local frame +//------------------------------------------------------------------------- +template +LocalError PixelCPEFastParamsHost::localError(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + auto xerr = theClusterParam.sigmax; + auto yerr = theClusterParam.sigmay; + + LogDebug("PixelCPEFastParamsHost") << " errors " << xerr << " " << yerr; + + auto xerr_sq = xerr * xerr; + auto yerr_sq = yerr * yerr; + + return LocalError(xerr_sq, 0, yerr_sq); +} + +template +void PixelCPEFastParamsHost::fillPSetDescription(edm::ParameterSetDescription& desc) { + // call PixelCPEGenericBase fillPSetDescription to add common rechit errors + PixelCPEGenericBase::fillPSetDescription(desc); +} + +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; diff --git a/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..3b4a2f74a8869 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase2); From 18a5acb98a55061a0e54e999eb432acf96da583b Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 24 Jan 2024 14:16:58 +0100 Subject: [PATCH 2/2] Resolve conflict between alpaka and legacy SiPixelDigisSoA Reuse the new SiPixelDigisLayout types in place of the old SiPixelDigisSoALayout and related types, and move the old SiPixelDigisSoA class into the legacy namespace. --- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 19 +---- .../SiPixelDigi/interface/SiPixelDigisSoA.h | 76 +++++++++++-------- .../SiPixelDigi/src/SiPixelDigisSoA.cc | 10 --- DataFormats/SiPixelDigi/src/classes.h | 3 +- DataFormats/SiPixelDigi/src/classes_def.xml | 4 +- .../plugins/SiPixelDigisSoAFromCUDA.cc | 9 +-- .../plugins/SiPixelDigisClustersFromSoA.cc | 4 +- .../plugins/SiPixelRawToClusterGPUKernel.cu | 2 +- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 2 +- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 2 +- 10 files changed, 60 insertions(+), 71 deletions(-) delete mode 100644 DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 5888cd04a6128..3beeaa4830c83 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -7,29 +7,18 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" #include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" -GENERATE_SOA_LAYOUT(SiPixelDigisSoALayout, - SOA_COLUMN(int32_t, clus), - SOA_COLUMN(uint32_t, pdigi), - SOA_COLUMN(uint32_t, rawIdArr), - SOA_COLUMN(uint16_t, adc), - SOA_COLUMN(uint16_t, xx), - SOA_COLUMN(uint16_t, yy), - SOA_COLUMN(uint16_t, moduleId)) - -using SiPixelDigisCUDASOA = SiPixelDigisSoALayout<>; -using SiPixelDigisCUDASOAView = SiPixelDigisCUDASOA::View; -using SiPixelDigisCUDASOAConstView = SiPixelDigisCUDASOA::ConstView; - // TODO: The class is created via inheritance of the PortableDeviceCollection. // This is generally discouraged, and should be done via composition. // See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection> { +class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection { public: SiPixelDigisCUDA() = default; explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) - : PortableDeviceCollection>(maxFedWords + 1, stream) {} + : PortableDeviceCollection(maxFedWords + 1, stream) {} + ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default; diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h index f352754e31d17..a97dfadea52c4 100644 --- a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -5,36 +5,46 @@ #include #include -// The main purpose of this class is to deliver digi and cluster data -// from an EDProducer that transfers the data from GPU to host to an -// EDProducer that converts the SoA to legacy data products. The class -// is independent of any GPU technology, and in prunciple could be -// produced by host code, and be used for other purposes than -// conversion-to-legacy as well. -class SiPixelDigisSoA { -public: - SiPixelDigisSoA() = default; - explicit SiPixelDigisSoA( - size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus); - ~SiPixelDigisSoA() = default; - - auto size() const { return pdigi_.size(); } - - uint32_t pdigi(size_t i) const { return pdigi_[i]; } - uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } - uint16_t adc(size_t i) const { return adc_[i]; } - int32_t clus(size_t i) const { return clus_[i]; } - - const std::vector& pdigiVector() const { return pdigi_; } - const std::vector& rawIdArrVector() const { return rawIdArr_; } - const std::vector& adcVector() const { return adc_; } - const std::vector& clusVector() const { return clus_; } - -private: - std::vector pdigi_; // packed digi (row, col, adc) of each pixel - std::vector rawIdArr_; // DetId of each pixel - std::vector adc_; // ADC of each pixel - std::vector clus_; // cluster id of each pixel -}; - -#endif +namespace legacy { + + // The main purpose of this class is to deliver digi and cluster data + // from an EDProducer that transfers the data from GPU to host to an + // EDProducer that converts the SoA to legacy data products. The class + // is independent of any GPU technology, and in prunciple could be + // produced by host code, and be used for other purposes than + // conversion-to-legacy as well. + + class SiPixelDigisSoA { + public: + SiPixelDigisSoA() = default; + explicit SiPixelDigisSoA( + size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus) + : pdigi_(pdigi, pdigi + nDigis), + rawIdArr_(rawIdArr, rawIdArr + nDigis), + adc_(adc, adc + nDigis), + clus_(clus, clus + nDigis) {} + + ~SiPixelDigisSoA() = default; + + auto size() const { return pdigi_.size(); } + + uint32_t pdigi(size_t i) const { return pdigi_[i]; } + uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } + uint16_t adc(size_t i) const { return adc_[i]; } + int32_t clus(size_t i) const { return clus_[i]; } + + const std::vector& pdigiVector() const { return pdigi_; } + const std::vector& rawIdArrVector() const { return rawIdArr_; } + const std::vector& adcVector() const { return adc_; } + const std::vector& clusVector() const { return clus_; } + + private: + std::vector pdigi_; // packed digi (row, col, adc) of each pixel + std::vector rawIdArr_; // DetId of each pixel + std::vector adc_; // ADC of each pixel + std::vector clus_; // cluster id of each pixel + }; + +} // namespace legacy + +#endif // DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc deleted file mode 100644 index b95c004a50a25..0000000000000 --- a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" - -#include - -SiPixelDigisSoA::SiPixelDigisSoA( - size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus) - : pdigi_(pdigi, pdigi + nDigis), - rawIdArr_(rawIdArr, rawIdArr + nDigis), - adc_(adc, adc + nDigis), - clus_(clus, clus + nDigis) {} diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 1360ee6e469d9..be707668d0dfc 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -1,6 +1,8 @@ #ifndef SIPIXELDIGI_CLASSES_H #define SIPIXELDIGI_CLASSES_H +#include + #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelDigi/interface/PixelDigiCollection.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" @@ -9,6 +11,5 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" -#include #endif // SIPIXELDIGI_CLASSES_H diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index e6bc08de161fa..697b6c467d799 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -50,6 +50,6 @@ - - + + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 5b23f2dbda104..67b1b519d4089 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -26,16 +26,16 @@ class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer> digiGetToken_; - edm::EDPutTokenT digiPutToken_; + edm::EDPutTokenT digiPutToken_; - cms::cuda::PortableHostCollection> digis_h_; + cms::cuda::PortableHostCollection digis_h_; int nDigis_; }; SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) : digiGetToken_(consumes>(iConfig.getParameter("src"))), - digiPutToken_(produces()) {} + digiPutToken_(produces()) {} void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -52,8 +52,7 @@ void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const auto& digis_d = ctx.get(iEvent, digiGetToken_); nDigis_ = digis_d.nDigis(); - nDigis_ = digis_d.nDigis(); - digis_h_ = cms::cuda::PortableHostCollection>(digis_d.view().metadata().size(), ctx.stream()); + digis_h_ = cms::cuda::PortableHostCollection(digis_d.view().metadata().size(), ctx.stream()); cudaCheck(cudaMemcpyAsync(digis_h_.buffer().get(), digis_d.const_buffer().get(), digis_d.bufferSize(), diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 925ac2febcef0..820b6b237c7e5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -35,7 +35,7 @@ class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { const edm::ESGetToken topoToken_; - edm::EDGetTokenT digiGetToken_; + edm::EDGetTokenT digiGetToken_; edm::EDPutTokenT> digiPutToken_; edm::EDPutTokenT clusterPutToken_; @@ -49,7 +49,7 @@ class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { template SiPixelDigisClustersFromSoAT::SiPixelDigisClustersFromSoAT(const edm::ParameterSet& iConfig) : topoToken_(esConsumes()), - digiGetToken_(consumes(iConfig.getParameter("src"))), + digiGetToken_(consumes(iConfig.getParameter("src"))), clusterPutToken_(produces()), clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), iConfig.getParameter("clusterThreshold_otherLayers")), diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 2d10b81af000e..56718b4bdae14 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -289,7 +289,7 @@ namespace pixelgpudetails { const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, - SiPixelDigisCUDASOAView digisView, + SiPixelDigisSoA::View digisView, cms::cuda::SimpleVector *err, bool useQualityInfo, bool includeErrors) { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 8dc6ae93018ea..21da864c1c348 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -198,7 +198,7 @@ void SiPixelRecHitSoAFromLegacyT::produce(edm::StreamID streamID, ndigi += clust.size(); } - cms::cuda::PortableHostCollection> digis_h(ndigi); + cms::cuda::PortableHostCollection digis_h(ndigi); clusterRef.clear(); clusters_h.view()[0].moduleId() = gind; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 09d0b55030d9c..94ae258cc16fb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -19,7 +19,7 @@ namespace gpuPixelRecHits { template __global__ void getHits(pixelCPEforGPU::ParamsOnGPUT const* __restrict__ cpeParams, BeamSpotPOD const* __restrict__ bs, - SiPixelDigisCUDASOAConstView digis, + SiPixelDigisSoA::ConstView digis, int numElements, SiPixelClustersCUDASOAConstView clusters, TrackingRecHitSoAView hits) {