diff --git a/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py new file mode 100644 index 0000000000000..ebdb7d9e6981a --- /dev/null +++ b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier chain is for turning on DQM modules used for alpaka device/host validation for pixels + +alpakaValidationPixel = cms.Modifier() + diff --git a/Configuration/ProcessModifiers/python/alpakaValidation_cff.py b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py new file mode 100644 index 0000000000000..3399bdda7c4df --- /dev/null +++ b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py @@ -0,0 +1,11 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.ProcessModifiers.alpaka_cff import * +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import * + +# This modifier chain is for turning on DQM modules used for alpaka device/host validation + +alpakaValidation = cms.ModifierChain( + alpaka, + alpakaValidationPixel +) diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index 56da630d8b9c8..c6fd188a22c89 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -896,6 +896,7 @@ def setup_(self, step, stepName, stepDict, k, properties): # - HLT on CPU # - Pixel-only reconstruction on CPU, with DQM and validation # - harvesting + upgradeWFs['PatatrackPixelOnlyCPU'] = PatatrackWorkflow( digi = { # the HLT menu is already set up for using GPUs if available and if the "gpu" modifier is enabled @@ -1062,8 +1063,7 @@ def setup_(self, step, stepName, stepDict, k, properties): upgradeWFs['PatatrackECALOnlyAlpaka'] = PatatrackWorkflow( digi = { # customize the ECAL Local Reco part of the HLT menu for Alpaka - '--procModifiers': 'alpaka', - '--customise' : 'HLTrigger/Configuration/customizeHLTforAlpaka.customizeHLTforAlpakaEcalLocalReco' + '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka }, reco = { '-s': 'RAW2DIGI:RawToDigi_ecalOnly,RECO:reconstruction_ecalOnly,VALIDATION:@ecalOnlyValidation,DQM:@ecalOnly', @@ -1536,6 +1536,53 @@ def setup_(self, step, stepName, stepDict, k, properties): offset = 0.597, ) + +# Alpaka workflows + +upgradeWFs['PatatrackPixelOnlyAlpaka'] = PatatrackWorkflow( + digi = { + '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM', + '--procModifiers': 'alpaka' + }, + harvest = { + '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM' + }, + suffix = 'Patatrack_PixelOnlyAlpaka', + offset = 0.402, +) + +upgradeWFs['PatatrackPixelOnlyAlpakaValidation'] = PatatrackWorkflow( + digi = { + '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM', + '--procModifiers': 'alpakaValidation' + }, + harvest = { + '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM' + }, + suffix = 'Patatrack_PixelOnlyAlpaka_Validation', + offset = 0.403, +) + +upgradeWFs['PatatrackPixelOnlyAlpakaProfiling'] = PatatrackWorkflow( + digi = { + '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka + }, + reco = { + '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly', + '--procModifiers': 'alpaka', + '--customise' : 'RecoTracker/Configuration/customizePixelOnlyForProfiling.customizePixelOnlyForProfilingGPUOnly' + }, + harvest = None, + suffix = 'Patatrack_PixelOnlyAlpaka_Profiling', + offset = 0.404, +) + # end of Patatrack workflows class UpgradeWorkflow_ProdLike(UpgradeWorkflow): @@ -2741,7 +2788,7 @@ def condition(self, fragment, stepList, key, hasHarvest): }, '2022HI' : { 'Geom' : 'DB:Extended', - 'GT':'auto:phase1_2022_realistic_hi', + 'GT':'auto:phase1_2022_realistic_hi', 'HLTmenu': '@fake2', 'Era':'Run3_pp_on_PbPb', 'BeamSpot': 'DBrealistic', @@ -2749,7 +2796,7 @@ def condition(self, fragment, stepList, key, hasHarvest): }, '2022HIRP' : { 'Geom' : 'DB:Extended', - 'GT':'auto:phase1_2022_realistic_hi', + 'GT':'auto:phase1_2022_realistic_hi', 'HLTmenu': '@fake2', 'Era':'Run3_pp_on_PbPb_approxSiStripClusters', 'BeamSpot': 'DBrealistic', @@ -2757,7 +2804,7 @@ def condition(self, fragment, stepList, key, hasHarvest): }, '2023HI' : { 'Geom' : 'DB:Extended', - 'GT':'auto:phase1_2023_realistic_hi', + 'GT':'auto:phase1_2023_realistic_hi', 'HLTmenu': '@fake2', 'Era':'Run3_pp_on_PbPb', 'BeamSpot': 'DBrealistic', @@ -2765,7 +2812,7 @@ def condition(self, fragment, stepList, key, hasHarvest): }, '2023HIRP' : { 'Geom' : 'DB:Extended', - 'GT':'auto:phase1_2023_realistic_hi', + 'GT':'auto:phase1_2023_realistic_hi', 'HLTmenu': '@fake2', 'Era':'Run3_pp_on_PbPb_approxSiStripClusters', 'BeamSpot': 'DBrealistic', diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc index 65a6dc2802831..025bdfd988ea6 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc @@ -176,9 +176,9 @@ void SiPixelCompareTrackSoAAlpaka::analyze(const edm::Event& iEvent, const ed float ptHost = tsoaHost.view()[it].pt(); float etaHost = tsoaHost.view()[it].eta(); - float phiHost = helper::phi(tsoaHost.view(), it); - float zipHost = helper::zip(tsoaHost.view(), it); - float tipHost = helper::tip(tsoaHost.view(), it); + float phiHost = reco::phi(tsoaHost.view(), it); + float zipHost = reco::zip(tsoaHost.view(), it); + float tipHost = reco::tip(tsoaHost.view(), it); if (!(ptHost > 0.)) continue; @@ -193,7 +193,7 @@ void SiPixelCompareTrackSoAAlpaka::analyze(const edm::Event& iEvent, const ed for (auto gid : looseTrkidxDevice) { float etaDevice = tsoaDevice.view()[gid].eta(); - float phiDevice = helper::phi(tsoaDevice.view(), gid); + float phiDevice = reco::phi(tsoaDevice.view(), gid); float dr2 = reco::deltaR2(etaHost, phiHost, etaDevice, phiDevice); if (dr2 > dr2cut_) continue; // this is arbitrary @@ -210,22 +210,22 @@ void SiPixelCompareTrackSoAAlpaka::analyze(const edm::Event& iEvent, const ed nLooseAndAboveTracksHost_matchedDevice++; hchi2_->Fill(tsoaHost.view()[it].chi2(), tsoaDevice.view()[closestTkidx].chi2()); - hCharge_->Fill(reco::charge(tsoaHost.view(), it), reco::charge(tsoaDevice.view(), closestTkidx)); + hCharge_->Fill(reco::charge(tsoaHost.view(), it), reco::charge(tsoaDevice.view(), closestTkidx)); hnHits_->Fill(helper::nHits(tsoaHost.view(), it), helper::nHits(tsoaDevice.view(), closestTkidx)); hnLayers_->Fill(tsoaHost.view()[it].nLayers(), tsoaDevice.view()[closestTkidx].nLayers()); hpt_->Fill(tsoaHost.view()[it].pt(), tsoaDevice.view()[closestTkidx].pt()); hptLogLog_->Fill(tsoaHost.view()[it].pt(), tsoaDevice.view()[closestTkidx].pt()); heta_->Fill(etaHost, tsoaDevice.view()[closestTkidx].eta()); - hphi_->Fill(phiHost, helper::phi(tsoaDevice.view(), closestTkidx)); - hz_->Fill(zipHost, helper::zip(tsoaDevice.view(), closestTkidx)); - htip_->Fill(tipHost, helper::tip(tsoaDevice.view(), closestTkidx)); + hphi_->Fill(phiHost, reco::phi(tsoaDevice.view(), closestTkidx)); + hz_->Fill(zipHost, reco::zip(tsoaDevice.view(), closestTkidx)); + htip_->Fill(tipHost, reco::tip(tsoaDevice.view(), closestTkidx)); hptdiffMatched_->Fill(ptHost - tsoaDevice.view()[closestTkidx].pt()); - hCurvdiffMatched_->Fill((reco::charge(tsoaHost.view(), it) / tsoaHost.view()[it].pt()) - - (reco::charge(tsoaDevice.view(), closestTkidx) / tsoaDevice.view()[closestTkidx].pt())); + hCurvdiffMatched_->Fill((reco::charge(tsoaHost.view(), it) / tsoaHost.view()[it].pt()) - + (reco::charge(tsoaDevice.view(), closestTkidx) / tsoaDevice.view()[closestTkidx].pt())); hetadiffMatched_->Fill(etaHost - tsoaDevice.view()[closestTkidx].eta()); - hphidiffMatched_->Fill(reco::deltaPhi(phiHost, helper::phi(tsoaDevice.view(), closestTkidx))); - hzdiffMatched_->Fill(zipHost - helper::zip(tsoaDevice.view(), closestTkidx)); - htipdiffMatched_->Fill(tipHost - helper::tip(tsoaDevice.view(), closestTkidx)); + hphidiffMatched_->Fill(reco::deltaPhi(phiHost, reco::phi(tsoaDevice.view(), closestTkidx))); + hzdiffMatched_->Fill(zipHost - reco::zip(tsoaDevice.view(), closestTkidx)); + htipdiffMatched_->Fill(tipHost - reco::tip(tsoaDevice.view(), closestTkidx)); hpt_eta_tkAllHostMatched_->Fill(etaHost, tsoaHost.view()[it].pt()); //matched to gpu hphi_z_tkAllHostMatched_->Fill(etaHost, zipHost); } diff --git a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py index dfb83708c95cf..95245a3fea968 100644 --- a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py +++ b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py @@ -7,20 +7,35 @@ from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoA_cfi import * +# Alpaka Modules +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoAAlpaka_cfi import * # Run-3 sequence monitorpixelSoASource = cms.Sequence(siPixelPhase1MonitorRecHitsSoA * siPixelPhase1MonitorTrackSoA * siPixelMonitorVertexSoA) - +# Run-3 Alpaka sequence +monitorpixelSoASourceAlpaka = cms.Sequence(siPixelPhase1MonitorRecHitsSoAAlpaka * siPixelPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +alpaka.toReplaceWith(monitorpixelSoASource, monitorpixelSoASourceAlpaka) # Phase-2 sequence from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker _monitorpixelSoARecHitsSource = cms.Sequence(siPixelPhase2MonitorRecHitsSoA * siPixelPhase2MonitorTrackSoA * siPixelMonitorVertexSoA) -phase2_tracker.toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource) +(phase2_tracker & ~alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource) +_monitorpixelSoARecHitsSourceAlpaka = cms.Sequence(siPixelPhase2MonitorRecHitsSoAAlpaka * siPixelPhase2MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +(phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceAlpaka) # HIon Phase 1 sequence from Configuration.ProcessModifiers.pp_on_AA_cff import pp_on_AA _monitorpixelSoARecHitsSourceHIon = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoA * siPixelHIonPhase1MonitorTrackSoA * siPixelMonitorVertexSoA) (pp_on_AA & ~phase2_tracker).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIon) +_monitorpixelSoARecHitsSourceHIonAlpaka = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoAAlpaka * siPixelHIonPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka) +(pp_on_AA & ~phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIonAlpaka) #Define the sequence for GPU vs CPU validation #This should run:- individual monitor for the 2 collections + comparison module @@ -33,6 +48,14 @@ from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoA_cfi import * from DQM.SiPixelHeterogeneous.siPixelPhase1RawDataErrorComparator_cfi import * from DQM.SiPixelPhase1Common.SiPixelPhase1RawData_cfi import * +#Alpaka +from DQM.SiPixelHeterogeneous.siPixelPhase1CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareRecHitsSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase1CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelPhase2CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareTrackSoAAlpaka_cfi import * +from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoAAlpaka_cfi import * # digi errors SiPixelPhase1RawDataConfForCPU = copy.deepcopy(SiPixelPhase1RawDataConf) @@ -126,6 +149,43 @@ topFolderName = 'SiPixelHeterogeneous/PixelVertexSoAGPU', ) +### Alpaka + +# PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelRecHitsSoAMonitorSerial = siPixelPhase1MonitorRecHitsSoAAlpaka.clone( + pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpakaSerial' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsSerial' ) +) + +# PixelRecHits: monitor of Device product (Alpaka backend: '') +siPixelRecHitsSoAMonitorDevice = siPixelPhase1MonitorRecHitsSoAAlpaka.clone( + pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpaka' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsDevice' ) +) + +# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelTrackSoAMonitorSerial = siPixelPhase1MonitorTrackSoAAlpaka.clone( + pixelTrackSrc = cms.InputTag('pixelTracksAlpakaSerial'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackSerial') +) + +# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelTrackSoAMonitorDevice = siPixelPhase1MonitorTrackSoAAlpaka.clone( + pixelTrackSrc = cms.InputTag('pixelTracksAlpaka'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackDevice') +) + +# PixelVertices: monitor of CPUSerial product (Alpaka backend: 'serial_sync') +siPixelVertexSoAMonitorSerial = siPixelMonitorVertexSoAAlpaka.clone( + pixelVertexSrc = cms.InputTag("pixelVerticesAlpakaSerial"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexSerial') +) + +siPixelVertexSoAMonitorDevice = siPixelMonitorVertexSoAAlpaka.clone( + pixelVertexSrc = cms.InputTag("pixelVerticesAlpaka"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexDevice') +) + # Run-3 sequence monitorpixelSoACompareSource = cms.Sequence(siPixelPhase1MonitorRawDataACPU * siPixelPhase1MonitorRawDataAGPU * @@ -139,6 +199,17 @@ siPixelMonitorVertexSoAGPU * siPixelCompareVertexSoA * siPixelPhase1RawDataErrorComparator) +# and the Alpaka version +monitorpixelSoACompareSourceAlpaka = cms.Sequence( + siPixelRecHitsSoAMonitorSerial * + siPixelRecHitsSoAMonitorDevice * + siPixelPhase1CompareRecHitsSoAAlpaka * + siPixelTrackSoAMonitorSerial * + siPixelTrackSoAMonitorDevice * + siPixelPhase1CompareTrackSoAAlpaka * + siPixelVertexSoAMonitorSerial * + siPixelVertexSoAMonitorDevice * + siPixelCompareVertexSoAAlpaka ) # Phase-2 sequence _monitorpixelSoACompareSource = cms.Sequence(siPixelPhase2MonitorRecHitsSoACPU * @@ -166,3 +237,6 @@ from Configuration.ProcessModifiers.gpuValidationPixel_cff import gpuValidationPixel gpuValidationPixel.toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSource) + +from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel +(alpakaValidationPixel & ~gpuValidationPixel).toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSourceAlpaka) diff --git a/DataFormats/TrackSoA/interface/TracksSoA.h b/DataFormats/TrackSoA/interface/TracksSoA.h index bc3a8c4be9cb5..ed4ef2e5a4c93 100644 --- a/DataFormats/TrackSoA/interface/TracksSoA.h +++ b/DataFormats/TrackSoA/interface/TracksSoA.h @@ -1,7 +1,10 @@ -#ifndef DataFormats_Track_interface_TrackLayout_h -#define DataFormats_Track_interface_TrackLayout_h +#ifndef DataFormats_TrackSoA_interface_TracksSoA_h +#define DataFormats_TrackSoA_interface_TracksSoA_h + +#include #include + #include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" @@ -29,6 +32,7 @@ namespace reco { SOA_COLUMN(int8_t, nLayers), SOA_COLUMN(float, eta), SOA_COLUMN(float, pt), + // state at the beam spot: {phi, tip, 1/pt, cotan(theta), zip} SOA_EIGEN_COLUMN(Vector5f, state), SOA_EIGEN_COLUMN(Vector15f, covariance), SOA_SCALAR(int, nTracks), @@ -43,14 +47,56 @@ namespace reco { template using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks, - int32_t i) { + /* Implement a type trait to identify the specialisations of TrackSoAConstView + * + * This is done explicitly for all possible pixel topologies, because we did not find a way + * to use template deduction with a partial specialisation. + */ + template + struct IsTrackSoAConstView : std::false_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + template <> + struct IsTrackSoAConstView> : std::true_type {}; + + template + constexpr bool isTrackSoAConstView = IsTrackSoAConstView::value; + + template >> + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float charge(ConstView const& tracks, int32_t i) { //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23 float v = tracks[i].state()(2); return float((0.0f < v) - (v < 0.0f)); } + template >> + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float phi(ConstView const& tracks, int32_t i) { + return tracks[i].state()(0); + } + + template >> + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float tip(ConstView const& tracks, int32_t i) { + return tracks[i].state()(1); + } + + template >> + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const& tracks, int32_t i) { + return tracks[i].state()(4); + } + + template >> + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr bool isTriplet(ConstView const& tracks, int32_t i) { + return tracks[i].nLayers() == 3; + } + } // namespace reco -#endif +#endif // DataFormats_TrackSoA_interface_TracksSoA_h diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h index 8affb29845779..6b95d2843653f 100644 --- a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h +++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h @@ -12,29 +12,7 @@ struct TracksUtilities { using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; using hindex_type = typename reco::TrackSoA::hindex_type; - // State at the Beam spot - // phi,tip,1/pt,cotan(theta),zip - /* ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks, int32_t i) { - //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23 - float v = tracks[i].state()(2); - return float((0.0f < v) - (v < 0.0f)); - } -*/ - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float phi(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(0); - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float tip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(1); - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float zip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(4); - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr bool isTriplet(const TrackSoAConstView &tracks, int i) { - return tracks[i].nLayers() == 3; - } + // state at the beam spot: { phi, tip, 1/pt, cotan(theta), zip } template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromCircle( @@ -109,7 +87,6 @@ namespace pixelTrack { struct QualityCutsT> { using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; - using tracksHelper = TracksUtilities; float chi2Coeff[4]; float chi2MaxPt; // GeV float chi2Scale; @@ -130,8 +107,8 @@ namespace pixelTrack { // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm // (see CAHitNtupletGeneratorGPU.cc) auto const ®ion = (nHits > 3) ? quadruplet : triplet; - return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip); + return (std::abs(reco::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and + (std::abs(reco::zip(tracks, it)) < region.maxZip); } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const { @@ -172,7 +149,6 @@ namespace pixelTrack { struct QualityCutsT> { using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; - using tracksHelper = TracksUtilities; float maxChi2; float minPt; @@ -180,8 +156,8 @@ namespace pixelTrack { float maxZip; ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { - return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < maxZip); + return (std::abs(reco::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and + (std::abs(reco::zip(tracks, it)) < maxZip); } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const { return tracks.chi2(it) >= maxChi2; diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h index 4783184611401..17f3b64498711 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h +++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h @@ -1,4 +1,3 @@ - #ifndef DataFormats_TrackSoA_src_alpaka_classes_cuda_h #define DataFormats_TrackSoA_src_alpaka_classes_cuda_h @@ -9,6 +8,6 @@ #include "DataFormats/TrackSoA/interface/TracksDevice.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -using namespace reco; +using namespace pixelTopology; #endif // DataFormats_TrackSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml index c04ca173c49f9..9edee6439e63b 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml @@ -1,9 +1,10 @@ - + - + + diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h index 38143a6058c36..0267ddeb213d5 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h +++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h @@ -1,4 +1,3 @@ - #ifndef DataFormats_TrackSoA_src_alpaka_classes_rocm_h #define DataFormats_TrackSoA_src_alpaka_classes_rocm_h @@ -9,6 +8,6 @@ #include "DataFormats/TrackSoA/interface/TracksDevice.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -using namespace reco; +using namespace pixelTopology; #endif // DataFormats_TrackSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml index b7e40aedead42..772a1b385a957 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml @@ -1,9 +1,10 @@ - + - + + diff --git a/DataFormats/TrackSoA/src/classes.h b/DataFormats/TrackSoA/src/classes.h index 43d40e5f8f3ac..c97bb234d7e18 100644 --- a/DataFormats/TrackSoA/src/classes.h +++ b/DataFormats/TrackSoA/src/classes.h @@ -6,6 +6,5 @@ #include "DataFormats/TrackSoA/interface/TracksHost.h" using namespace pixelTopology; -using namespace reco; #endif // DataFormats_TrackSoA_src_classes_h diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml index fd8fc0781ee25..5ae5fbf55cd8f 100644 --- a/DataFormats/TrackSoA/src/classes_def.xml +++ b/DataFormats/TrackSoA/src/classes_def.xml @@ -1,32 +1,32 @@ - - - - + + + + - + - - - - + + + + - + - - - - + + + + - + diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc index 2c2d0961eb106..2b9807e3db054 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc @@ -17,12 +17,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestFillKernel { public: template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView tracks_view) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView tracks_view, int32_t nTracks) const { if (cms::alpakatools::once_per_grid(acc)) { - tracks_view.nTracks() = 420; + tracks_view.nTracks() = nTracks; } - for (int32_t j : elements_with_stride(acc, tracks_view.metadata().size())) { + for (int32_t j : elements_with_stride(acc, nTracks)) { tracks_view[j].pt() = (float)j; tracks_view[j].eta() = (float)j; tracks_view[j].chi2() = (float)j; @@ -39,17 +39,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestVerifyKernel { public: template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAConstView tracks_view) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + TrackSoAConstView tracks_view, + int32_t nTracks) const { if (cms::alpakatools::once_per_grid(acc)) { - ALPAKA_ASSERT_OFFLOAD(tracks_view.nTracks() == 420); + ALPAKA_ASSERT(tracks_view.nTracks() == nTracks); } for (int32_t j : elements_with_stride(acc, tracks_view.nTracks())) { - assert(abs(tracks_view[j].pt() - (float)j) < .0001); - assert(abs(tracks_view[j].eta() - (float)j) < .0001); - assert(abs(tracks_view[j].chi2() - (float)j) < .0001); - assert(tracks_view[j].quality() == (Quality)(j % 256)); - assert(tracks_view[j].nLayers() == j % 128); - assert(tracks_view.hitIndices().off[j] == uint32_t(j)); + ALPAKA_ASSERT(abs(tracks_view[j].pt() - (float)j) < .0001); + ALPAKA_ASSERT(abs(tracks_view[j].eta() - (float)j) < .0001); + ALPAKA_ASSERT(abs(tracks_view[j].chi2() - (float)j) < .0001); + ALPAKA_ASSERT(tracks_view[j].quality() == (Quality)(j % 256)); + ALPAKA_ASSERT(tracks_view[j].nLayers() == j % 128); + ALPAKA_ASSERT(tracks_view.hitIndices().off[j] == uint32_t(j)); } } }; @@ -57,14 +59,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // Host function which invokes the two kernels above template void runKernels(TrackSoAView tracks_view, Queue& queue) { + int32_t tracks = 420; uint32_t items = 64; - uint32_t groups = divide_up_by(tracks_view.metadata().size(), items); + uint32_t groups = divide_up_by(tracks, items); auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view); - alpaka::exec(queue, - workDiv, - TestVerifyKernel{}, - tracks_view); //TODO: wait for some PR that solves this and then check it!!! + alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view, tracks); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, tracks_view, tracks); } template void runKernels(TrackSoAView tracks_view, Queue& queue); diff --git a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py index d2b8fa901461c..0b97262093e86 100644 --- a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py +++ b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py @@ -1,7 +1,554 @@ import FWCore.ParameterSet.Config as cms + +## Pixel HLT in Alpaka +def customizeHLTforDQMGPUvsCPUPixel(process): + '''Ad-hoc changes to test HLT config containing only DQM_PixelReconstruction_v and DQMGPUvsCPU stream + only up to the Pixel Local Reconstruction + ''' + dqmPixelRecoPathName = None + for pathName in process.paths_(): + if pathName.startswith('DQM_PixelReconstruction_v'): + dqmPixelRecoPathName = pathName + break + + if dqmPixelRecoPathName == None: + return process + + process.hltPixelConsumerGPU.eventProducts = [ + 'hltSiPixelClusters', + 'hltSiPixelClustersSoA', + 'hltSiPixelDigis', + 'hltSiPixelRecHits', + 'hltSiPixelRecHitsSoA', + 'hltPixelTracks', + 'hltPixelTracksSoA', + 'hltPixelVertices', + 'hltPixelVerticesSoA', + ] + + process.hltPixelConsumerCPU.eventProducts = [] + for foo in process.hltPixelConsumerGPU.eventProducts: + process.hltPixelConsumerCPU.eventProducts += [foo+'CPUSerial'] + + # modify EventContent of DQMGPUvsCPU stream + if hasattr(process, 'hltOutputDQMGPUvsCPU'): + process.hltOutputDQMGPUvsCPU.outputCommands = [ + 'drop *', + 'keep *Cluster*_hltSiPixelClusters_*_*', + 'keep *Cluster*_hltSiPixelClustersLegacyFormatCPUSerial_*_*', + 'keep *_hltSiPixelDigis_*_*', + 'keep *_hltSiPixelDigiErrorsLegacyFormatCPUSerial_*_*', + 'keep *RecHit*_hltSiPixelRecHits_*_*', + 'keep *RecHit*_hltSiPixelRecHitsLegacyFormatCPUSerial_*_*', + 'keep *_hltPixelTracks_*_*', + 'keep *_hltPixelTracksLegacyFormatCPUSerial_*_*', + 'keep *_hltPixelVertices_*_*', + 'keep *_hltPixelVerticesLegacyFormatCPUSerial_*_*', + ] + + # PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync') + process.hltPixelRecHitsSoAMonitorCPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka', + pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCPU' ) + ) + + # PixelRecHits: monitor of GPU product (Alpaka backend: '') + process.hltPixelRecHitsSoAMonitorGPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka', + pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHitsSoA' ), + TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsGPU' ) + ) + + # PixelRecHits: 'GPUvsCPU' comparisons + process.hltPixelRecHitsSoACompareGPUvsCPU = cms.EDProducer('SiPixelPhase1CompareRecHitsSoAAlpaka', + pixelHitsSrcHost = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ), + pixelHitsSrcDevice = cms.InputTag( 'hltSiPixelRecHitsSoA' ), + topFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCompareGPUvsCPU' ), + minD2cut = cms.double( 1.0E-4 ) + ) + + process.hltPixelTracksSoAMonitorCPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka", + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrc = cms.InputTag('hltPixelTracksCPUSerial'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCPU'), + useQualityCut = cms.bool(True) + ) + + process.hltPixelTracksSoAMonitorGPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka", + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrc = cms.InputTag('hltPixelTracksSoA'), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackGPU'), + useQualityCut = cms.bool(True) + ) + + process.hltPixelTracksSoACompareGPUvsCPU = cms.EDProducer("SiPixelPhase1CompareTrackSoAAlpaka", + deltaR2cut = cms.double(0.04), + mightGet = cms.optional.untracked.vstring, + minQuality = cms.string('loose'), + pixelTrackSrcHost = cms.InputTag("hltPixelTracksCPUSerial"), + pixelTrackSrcDevice = cms.InputTag("hltPixelTracksSoA"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCompareGPUvsCPU'), + useQualityCut = cms.bool(True) + ) + + process.hltPixelVertexSoAMonitorCPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrc = cms.InputTag("hltPixelVerticesCPUSerial"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCPU') + ) + + process.hltPixelVertexSoAMonitorGPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrc = cms.InputTag("hltPixelVerticesSoA"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexGPU') + ) + + process.hltPixelVertexSoACompareGPUvsCPU = cms.EDProducer("SiPixelCompareVertexSoAAlpaka", + beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"), + dzCut = cms.double(1), + mightGet = cms.optional.untracked.vstring, + pixelVertexSrcHost = cms.InputTag("hltPixelVerticesCPUSerial"), + pixelVertexSrcDevice = cms.InputTag("hltPixelVerticesSoA"), + topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCompareGPUvsCPU') + ) + + process.HLTDQMPixelReconstruction = cms.Sequence( + process.hltPixelRecHitsSoAMonitorCPU + + process.hltPixelRecHitsSoAMonitorGPU + + process.hltPixelRecHitsSoACompareGPUvsCPU + + process.hltPixelTracksSoAMonitorCPU + + process.hltPixelTracksSoAMonitorGPU + + process.hltPixelTracksSoACompareGPUvsCPU + + process.hltPixelVertexSoAMonitorCPU + + process.hltPixelVertexSoAMonitorGPU + + process.hltPixelVertexSoACompareGPUvsCPU + ) + + # Add CPUSerial sequences to DQM_PixelReconstruction_v Path + dqmPixelRecoPath = getattr(process, dqmPixelRecoPathName) + try: + dqmPixelRecoPathIndex = dqmPixelRecoPath.index(process.HLTRecopixelvertexingSequence) + 1 + for cpuSeqName in [ + 'HLTDoLocalPixelCPUSerialSequence', + 'HLTRecopixelvertexingCPUSerialSequence', + ]: + dqmPixelRecoPath.insert(dqmPixelRecoPathIndex, getattr(process, cpuSeqName)) + dqmPixelRecoPathIndex += 1 + except: + dqmPixelRecoPathIndex = None + + return process + +def customizeHLTforAlpakaPixelRecoLocal(process): + '''Customisation to introduce the Local Pixel Reconstruction in Alpaka + ''' + process.hltESPSiPixelCablingSoA = cms.ESProducer('SiPixelCablingSoAESProducer@alpaka', + CablingMapLabel = cms.string(''), + UseQualityInfo = cms.bool(False), + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltESPSiPixelGainCalibrationForHLTSoA = cms.ESProducer('SiPixelGainCalibrationForHLTSoAESProducer@alpaka', + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltESPPixelCPEFastParamsPhase1 = cms.ESProducer('PixelCPEFastParamsESProducerAlpakaPhase1@alpaka', + appendToDataLabel = cms.string(''), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + ### + + # alpaka EDProducer + # consumes + # - reco::BeamSpot + # produces + # - BeamSpotDevice + process.hltOnlineBeamSpotDevice = cms.EDProducer('BeamSpotDeviceProducer@alpaka', + src = cms.InputTag('hltOnlineBeamSpot'), + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + # alpaka EDProducer + # consumes + # - FEDRawDataCollection + # produces (* optional) + # - SiPixelClustersSoA + # - SiPixelDigisSoACollection + # - SiPixelDigiErrorsSoACollection * + # - SiPixelFormatterErrors * + process.hltSiPixelClustersSoA = cms.EDProducer('SiPixelRawToClusterPhase1@alpaka', + mightGet = cms.optional.untracked.vstring, + IncludeErrors = cms.bool(True), + UseQualityInfo = cms.bool(False), + clusterThreshold_layer1 = cms.int32(4000), + clusterThreshold_otherLayers = cms.int32(4000), + VCaltoElectronGain = cms.double(1), # all gains=1, pedestals=0 + VCaltoElectronGain_L1 = cms.double(1), + VCaltoElectronOffset = cms.double(0), + VCaltoElectronOffset_L1 = cms.double(0), + InputLabel = cms.InputTag('rawDataCollector'), + Regions = cms.PSet( + inputs = cms.optional.VInputTag, + deltaPhi = cms.optional.vdouble, + maxZ = cms.optional.vdouble, + beamSpot = cms.optional.InputTag + ), + CablingMapLabel = cms.string(''), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltSiPixelClusters = cms.EDProducer('SiPixelDigisClustersFromSoAAlpakaPhase1', + src = cms.InputTag('hltSiPixelClustersSoA'), + clusterThreshold_layer1 = cms.int32(4000), + clusterThreshold_otherLayers = cms.int32(4000), + produceDigis = cms.bool(False), + storeDigis = cms.bool(False) + ) + + process.hltSiPixelClustersCache = cms.EDProducer('SiPixelClusterShapeCacheProducer', + src = cms.InputTag( 'hltSiPixelClusters' ), + onDemand = cms.bool( False ) + ) + + # legacy EDProducer + # consumes + # - SiPixelDigiErrorsHost + # - SiPixelFormatterErrors + # produces + # - edm::DetSetVector + # - DetIdCollection + # - DetIdCollection, 'UserErrorModules' + # - edmNew::DetSetVector + process.hltSiPixelDigis = cms.EDProducer('SiPixelDigiErrorsFromSoAAlpaka', + digiErrorSoASrc = cms.InputTag('hltSiPixelClustersSoA'), + fmtErrorsSoASrc = cms.InputTag('hltSiPixelClustersSoA'), + CablingMapLabel = cms.string(''), + UsePhase1 = cms.bool(True), + ErrorList = cms.vint32(29), + UserErrorList = cms.vint32(40) + ) + + # alpaka EDProducer + # consumes + # - BeamSpotDevice + # - SiPixelClustersSoA + # - SiPixelDigisSoACollection + # produces + # - TrackingRecHitsSoACollection + process.hltSiPixelRecHitsSoA = cms.EDProducer('SiPixelRecHitAlpakaPhase1@alpaka', + beamSpot = cms.InputTag('hltOnlineBeamSpotDevice'), + src = cms.InputTag('hltSiPixelClustersSoA'), + CPE = cms.string('PixelCPEFastParams'), + mightGet = cms.optional.untracked.vstring, + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltSiPixelRecHits = cms.EDProducer('SiPixelRecHitFromSoAAlpakaPhase1', + pixelRecHitSrc = cms.InputTag('hltSiPixelRecHitsSoA'), + src = cms.InputTag('hltSiPixelClusters'), + ) + + ### + ### Task: Pixel Local Reconstruction + ### + process.HLTDoLocalPixelTask = cms.ConditionalTask( + process.hltOnlineBeamSpotDevice, + process.hltSiPixelClustersSoA, + process.hltSiPixelClusters, # was: hltSiPixelClusters + process.hltSiPixelClustersCache, # really needed ?? + process.hltSiPixelDigis, # was: hltSiPixelDigis + process.hltSiPixelRecHitsSoA, + process.hltSiPixelRecHits, # was: hltSiPixelRecHits + ) + + ### + ### CPUSerial version of Pixel Local Reconstruction + ### + process.hltOnlineBeamSpotDeviceCPUSerial = process.hltOnlineBeamSpotDevice.clone( + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelClustersCPUSerial = process.hltSiPixelClustersSoA.clone( + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelClustersLegacyFormatCPUSerial = process.hltSiPixelClusters.clone( + src = 'hltSiPixelClustersCPUSerial' + ) + + process.hltSiPixelDigiErrorsLegacyFormatCPUSerial = process.hltSiPixelDigis.clone( + digiErrorSoASrc = 'hltSiPixelClustersCPUSerial', + fmtErrorsSoASrc = 'hltSiPixelClustersCPUSerial', + ) + + process.hltSiPixelRecHitsCPUSerial = process.hltSiPixelRecHitsSoA.clone( + beamSpot = 'hltOnlineBeamSpotDeviceCPUSerial', + src = 'hltSiPixelClustersCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltSiPixelRecHitsLegacyFormatCPUSerial = process.hltSiPixelRecHits.clone( + pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial', + src = 'hltSiPixelClustersLegacyFormatCPUSerial', + ) + + process.HLTDoLocalPixelCPUSerialTask = cms.ConditionalTask( + process.hltOnlineBeamSpotDeviceCPUSerial, + process.hltSiPixelClustersCPUSerial, + process.hltSiPixelClustersLegacyFormatCPUSerial, + process.hltSiPixelDigiErrorsLegacyFormatCPUSerial, + process.hltSiPixelRecHitsCPUSerial, + process.hltSiPixelRecHitsLegacyFormatCPUSerial, + ) + + process.HLTDoLocalPixelCPUSerialSequence = cms.Sequence( process.HLTDoLocalPixelCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelRecoTracking(process): + '''Customisation to introduce the Pixel-Track Reconstruction in Alpaka + ''' + + # alpaka EDProducer + # consumes + # - TrackingRecHitsSoACollection + # produces + # - TkSoADevice + process.hltPixelTracksSoA = cms.EDProducer('CAHitNtupletAlpakaPhase1@alpaka', + pixelRecHitSrc = cms.InputTag('hltSiPixelRecHitsSoA'), + CPE = cms.string('PixelCPEFastParams'), + ptmin = cms.double(0.9), + CAThetaCutBarrel = cms.double(0.002), + CAThetaCutForward = cms.double(0.003), + hardCurvCut = cms.double(0.0328407225), + dcaCutInnerTriplet = cms.double(0.15), + dcaCutOuterTriplet = cms.double(0.25), + earlyFishbone = cms.bool(True), + lateFishbone = cms.bool(False), + fillStatistics = cms.bool(False), + minHitsPerNtuplet = cms.uint32(3), + phiCuts = cms.vint32( + 522, 730, 730, 522, 626, + 626, 522, 522, 626, 626, + 626, 522, 522, 522, 522, + 522, 522, 522, 522 + ), + maxNumberOfDoublets = cms.uint32(524288), + minHitsForSharingCut = cms.uint32(10), + fitNas4 = cms.bool(False), + doClusterCut = cms.bool(True), + doZ0Cut = cms.bool(True), + doPtCut = cms.bool(True), + useRiemannFit = cms.bool(False), + doSharedHitCut = cms.bool(True), + dupPassThrough = cms.bool(False), + useSimpleTripletCleaner = cms.bool(True), + idealConditions = cms.bool(False), + includeJumpingForwardDoublets = cms.bool(True), + trackQualityCuts = cms.PSet( + chi2MaxPt = cms.double(10), + chi2Coeff = cms.vdouble(0.9, 1.8), + chi2Scale = cms.double(8), + tripletMinPt = cms.double(0.5), + tripletMaxTip = cms.double(0.3), + tripletMaxZip = cms.double(12), + quadrupletMinPt = cms.double(0.3), + quadrupletMaxTip = cms.double(0.5), + quadrupletMaxZip = cms.double(12) + ), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltPixelTracksCPUSerial = process.hltPixelTracksSoA.clone( + pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltPixelTracks = cms.EDProducer("PixelTrackProducerFromSoAAlpakaPhase1", + beamSpot = cms.InputTag("hltOnlineBeamSpot"), + minNumberOfHits = cms.int32(0), + minQuality = cms.string('loose'), + pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHits"), + trackSrc = cms.InputTag("hltPixelTracksSoA") + ) + + process.hltPixelTracksLegacyFormatCPUSerial = process.hltPixelTracks.clone( + pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHitsLegacyFormatCPUSerial"), + trackSrc = cms.InputTag("hltPixelTracksCPUSerial") + ) + + process.HLTRecoPixelTracksTask = cms.ConditionalTask( + process.hltPixelTracksSoA, + process.hltPixelTracks, + ) + + process.HLTRecoPixelTracksCPUSerialTask = cms.ConditionalTask( + process.hltPixelTracksCPUSerial, + process.hltPixelTracksLegacyFormatCPUSerial, + ) + + process.HLTRecoPixelTracksCPUSerialSequence = cms.Sequence( process.HLTRecoPixelTracksCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelRecoVertexing(process): + '''Customisation to introduce the Pixel-Vertex Reconstruction in Alpaka + ''' + + # alpaka EDProducer + # consumes + # - TkSoADevice + # produces + # - ZVertexDevice + process.hltPixelVerticesSoA = cms.EDProducer('PixelVertexProducerAlpakaPhase1@alpaka', + oneKernel = cms.bool(True), + useDensity = cms.bool(True), + useDBSCAN = cms.bool(False), + useIterative = cms.bool(False), + minT = cms.int32(2), + eps = cms.double(0.07), + errmax = cms.double(0.01), + chi2max = cms.double(9), + PtMin = cms.double(0.5), + PtMax = cms.double(75), + pixelTrackSrc = cms.InputTag('hltPixelTracksSoA'), + # autoselect the alpaka backend + alpaka = cms.untracked.PSet( + backend = cms.untracked.string('') + ) + ) + + process.hltPixelVerticesCPUSerial = process.hltPixelVerticesSoA.clone( + pixelTrackSrc = 'hltPixelTracksCPUSerial', + alpaka = dict( backend = 'serial_sync' ) + ) + + process.hltPixelVertices = cms.EDProducer("PixelVertexProducerFromSoAAlpaka", + TrackCollection = cms.InputTag("hltPixelTracks"), + beamSpot = cms.InputTag("hltOnlineBeamSpot"), + src = cms.InputTag("hltPixelVerticesSoA") + ) + + process.hltPixelVerticesLegacyFormatCPUSerial = process.hltPixelVertices.clone( + TrackCollection = cms.InputTag("hltPixelTracksLegacyFormatCPUSerial"), + src = cms.InputTag("hltPixelVerticesCPUSerial") + ) + + process.HLTRecopixelvertexingTask = cms.ConditionalTask( + process.HLTRecoPixelTracksTask, + process.hltPixelVerticesSoA, + process.hltPixelVertices, + process.hltTrimmedPixelVertices + ) + + process.HLTRecopixelvertexingCPUSerialTask = cms.ConditionalTask( + process.HLTRecoPixelTracksCPUSerialTask, + process.hltPixelVerticesCPUSerial, + process.hltPixelVerticesLegacyFormatCPUSerial, + ) + + process.HLTRecopixelvertexingCPUSerialSequence = cms.Sequence( process.HLTRecopixelvertexingCPUSerialTask ) + + return process + +def customizeHLTforAlpakaPixelRecoTheRest(process): + '''Customize HLT path depending on old SoA tracks + ''' + process.hltL2TauTagNNProducer = cms.EDProducer("L2TauNNProducerAlpaka", + BeamSpot = cms.InputTag("hltOnlineBeamSpot"), + L1Taus = cms.VPSet( + cms.PSet( + L1CollectionName = cms.string('DoubleTau'), + L1TauTrigger = cms.InputTag("hltL1sDoubleTauBigOR") + ), + cms.PSet( + L1CollectionName = cms.string('SingleTau'), + L1TauTrigger = cms.InputTag("hltL1sSingleTau") + ), + cms.PSet( + L1CollectionName = cms.string('MuXXTauYY'), + L1TauTrigger = cms.InputTag("hltL1sBigOrMuXXerIsoTauYYer") + ), + cms.PSet( + L1CollectionName = cms.string('Mu22Tau40'), + L1TauTrigger = cms.InputTag("hltL1sMu22erIsoTau40er") + ), + cms.PSet( + L1CollectionName = cms.string('DoubleTauJet'), + L1TauTrigger = cms.InputTag("hltL1sBigORDoubleTauJet") + ), + cms.PSet( + L1CollectionName = cms.string('VBFIsoTau'), + L1TauTrigger = cms.InputTag("hltL1VBFDiJetIsoTau") + ), + cms.PSet( + L1CollectionName = cms.string('Mu18TauXX'), + L1TauTrigger = cms.InputTag("hltL1sVeryBigORMu18erTauXXer2p1") + ), + cms.PSet( + L1CollectionName = cms.string('DoubleTauLowMass'), + L1TauTrigger = cms.InputTag("hltL1sDoubleTauBigORWithLowMass") + ) + ), + debugLevel = cms.int32(0), + ebInput = cms.InputTag("hltEcalRecHit","EcalRecHitsEB"), + eeInput = cms.InputTag("hltEcalRecHit","EcalRecHitsEE"), + fractionSumPt2 = cms.double(0.3), + graphPath = cms.string('RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb'), + hbheInput = cms.InputTag("hltHbhereco"), + hoInput = cms.InputTag("hltHoreco"), + maxVtx = cms.uint32(100), + minSumPt2 = cms.double(0.0), + normalizationDict = cms.string('RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json'), + pataTracks = cms.InputTag("hltPixelTracksSoA"), + pataVertices = cms.InputTag("hltPixelVerticesSoA"), + track_chi2_max = cms.double(99999.0), + track_pt_max = cms.double(10.0), + track_pt_min = cms.double(1.0) + ) + + return process + +def customizeHLTforAlpakaPixelReco(process): + '''Customisation to introduce the Pixel Local+Track+Vertex Reconstruction in Alpaka + ''' + + process = customizeHLTforAlpakaPixelRecoLocal(process) + process = customizeHLTforAlpakaPixelRecoTracking(process) + process = customizeHLTforAlpakaPixelRecoVertexing(process) + process = customizeHLTforDQMGPUvsCPUPixel(process) + process = customizeHLTforAlpakaPixelRecoTheRest(process) + + return process + +## ECAL HLT in Alpaka + def customizeHLTforAlpakaEcalLocalReco(process): - process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + if hasattr(process, 'hltEcalDigisGPU'): process.hltEcalDigisPortable = cms.EDProducer("EcalRawToDigiPortable@alpaka", FEDs = process.hltEcalDigisGPU.FEDs, @@ -96,7 +643,12 @@ def customizeHLTforAlpakaEcalLocalReco(process): return process def customizeHLTforAlpaka(process): + + process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + process.load('Configuration.StandardSequences.Accelerators_cff') + process = customizeHLTforAlpakaEcalLocalReco(process) + process = customizeHLTforAlpakaPixelReco(process) return process diff --git a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py index 2d457ddb337a0..bebd39c86a65b 100644 --- a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py +++ b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py @@ -233,7 +233,6 @@ def customiseForOffline(process): return process - def customizeHLTfor43025(process): for producer in producers_by_type(process, "PFClusterProducer"): @@ -270,14 +269,18 @@ def customizeHLTfor43774(process): filt.useAbs = cms.bool(True) return process - - # CMSSW version specific customizations def customizeHLTforCMSSW(process, menuType="GRun"): process = customiseForOffline(process) + # Alpaka HLT + from Configuration.ProcessModifiers.alpaka_cff import alpaka + from Configuration.Eras.Modifier_run3_common_cff import run3_common + from HLTrigger.Configuration.customizeHLTforAlpaka import customizeHLTforAlpaka + (alpaka & run3_common).makeProcessModifier(customizeHLTforAlpaka).apply(process) + # add call to action function in proper order: newest last! # process = customiseFor12718(process) diff --git a/HeterogeneousCore/AlpakaCore/python/functions.py b/HeterogeneousCore/AlpakaCore/python/functions.py new file mode 100644 index 0000000000000..5b79a1b205631 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/python/functions.py @@ -0,0 +1,23 @@ +def makeSerialClone(module, **kwargs): + type = module._TypedParameterizable__type + if type.endswith('@alpaka'): + # alpaka module with automatic backend selection + base = type.removesuffix('@alpaka') + elif type.startswith('alpaka_serial_sync::'): + # alpaka module with explicit serial_sync backend + base = type.removeprefix('alpaka_serial_sync::') + elif type.startswith('alpaka_cuda_async::'): + # alpaka module with explicit cuda_async backend + base = type.removeprefix('alpaka_cuda_async::') + elif type.startswith('alpaka_rocm_async::'): + # alpaka module with explicit rocm_async backend + base = type.removeprefix('alpaka_rocm_async::') + else: + # non-alpaka module + raise TypeError('%s is not an alpaka-based module, and cannot be used with makeSerialClone()' % str(module)) + + copy = module.clone(**kwargs) + copy._TypedParameterizable__type = 'alpaka_serial_sync::' + base + if 'alpaka' in copy.parameterNames_(): + del copy.alpaka + return copy diff --git a/HeterogeneousCore/AlpakaTest/test/writer.py b/HeterogeneousCore/AlpakaTest/test/writer.py index bd8d2775b31ed..d23ac528629b8 100644 --- a/HeterogeneousCore/AlpakaTest/test/writer.py +++ b/HeterogeneousCore/AlpakaTest/test/writer.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.AlpakaCore.functions import * process = cms.Process('Writer') @@ -31,16 +32,9 @@ ) # run a second producer explicitly on the cpu -process.testProducerSerial = cms.EDProducer('alpaka_serial_sync::TestAlpakaProducer', +process.testProducerSerial = makeSerialClone(process.testProducer, size = cms.int32(99) ) -# an alternative approach would be to use -#process.testProducerSerial = cms.EDProducer('TestAlpakaProducer@alpaka', -# size = cms.int32(99), -# alpaka = cms.untracked.PSet( -# backend = cms.untracked.string("serial_sync") -# ) -#) # analyse the second set of products process.testAnalyzerSerial = cms.EDAnalyzer('TestAlpakaAnalyzer', diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 820b6b237c7e5..0bfa989c92969 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -21,7 +21,8 @@ // local include(s) #include "PixelClusterizerBase.h" -// #define GPU_DEBUG +//#define GPU_DEBUG + template class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { public: diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc index ad05ad3ff60c9..423951f4cb74f 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc @@ -21,8 +21,9 @@ // local include(s) #include "PixelClusterizerBase.h" -// #define EDM_ML_DEBUG -// #define GPU_DEBUG +//#define EDM_ML_DEBUG +//#define GPU_DEBUG + template class SiPixelDigisClustersFromSoAAlpaka : public edm::global::EDProducer<> { public: diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 56718b4bdae14..452b0e2097071 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -33,7 +33,7 @@ #include "gpuClusterChargeCut.h" #include "gpuClustering.h" -// #define GPU_DEBUG +//#define GPU_DEBUG namespace pixelgpudetails { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 06b30da68c8cd..fe9cc260a5853 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -18,7 +18,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" -// #define GPU_DEBUG +//#define GPU_DEBUG struct SiPixelROCsStatusAndMapping; class SiPixelGainForHLTonGPU; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h index ff885b5bad07f..d1f5509052468 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h @@ -18,7 +18,7 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" -// #define GPU_DEBUG +//#define GPU_DEBUG namespace calibPixel { using namespace cms::alpakatools; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h index c149707e41d9a..4056090517aee 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -10,7 +10,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" -// #define GPU_DEBUG +//#define GPU_DEBUG namespace pixelClustering { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h index 616ccbd3eb8c7..7da68c7b2f5da 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -5,15 +5,16 @@ #include #include #include + #include -#include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" #include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" -// #define GPU_DEBUG +//#define GPU_DEBUG namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -140,7 +141,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // find the index of the first pixel not belonging to this module (or invalid) auto& msize = alpaka::declareSharedVar(acc); - const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + const uint32_t blockIdx = alpaka::getIdx(acc)[0u]; if (blockIdx >= clus_view[0].moduleStart()) return; @@ -274,11 +275,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) <= maxiter); // NB: can be tuned. - constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v ? 1 : 256; + constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v ? 256 : 1; #ifndef NDEBUG - [[maybe_unused]] const uint32_t runTimeThreadDimension( - alpaka::getWorkDiv(acc)[0u]); + [[maybe_unused]] const uint32_t runTimeThreadDimension = + alpaka::getWorkDiv(acc)[0u]; ALPAKA_ASSERT_OFFLOAD(runTimeThreadDimension <= threadDimension); #endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index 3e7caf8b2b3a4..88ad79c6af609 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -640,15 +640,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto moduleStartFirstElement = cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); - constexpr auto threadsPerBlockFindClus = 512; + + // TODO + // - we are fixing this here since it needs to be needed + // at compile time also in the kernel (for_each_element_in_block_strided) + // - put maxIter in the Geometry traits + constexpr auto threadsOrElementsFindClus = 256; + const auto workDivMaxNumModules = - cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + cms::alpakatools::make_workdiv(numberOfModules, threadsOrElementsFindClus); // NB: With present FindClus() / chargeCut() algorithm, // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance. // Though, it does not have to be the same number for CPU/GPU cases. #ifdef GPU_DEBUG - std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsOrElementsFindClus << " threadsPerBlockOrElementsPerThread\n"; #endif @@ -659,9 +665,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::wait(queue); #endif + constexpr auto threadsPerBlockChargeCut = 256; + const auto workDivChargeCut = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockChargeCut); // apply charge cut alpaka::exec(queue, - workDivMaxNumModules, + workDivChargeCut, ::pixelClustering::ClusterChargeCut{}, digis_d->view(), clusters_d->view(), @@ -735,7 +743,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) - const auto threadsPerBlockFindClus = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; + const auto threadsPerBlockFindClus = 256; const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); #ifdef GPU_DEBUG diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 8d78599d07d9c..a6dd2bea80e2a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.AlpakaCore.functions import * from Configuration.Eras.Modifier_run3_common_cff import run3_common from Configuration.ProcessModifiers.gpu_cff import gpu from Configuration.ProcessModifiers.alpaka_cff import alpaka @@ -130,11 +131,7 @@ def _addProcessCalibTrackerAlpakaES(process): )) # reconstruct the pixel digis and clusters with alpaka on the cpu, for validation -siPixelClustersPreSplittingAlpakaSerial = siPixelClustersPreSplittingAlpaka.clone( - #alpaka = dict( backend = '*' ) - alpaka = None -) -siPixelClustersPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelClustersPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') +siPixelClustersPreSplittingAlpakaSerial = makeSerialClone(siPixelClustersPreSplittingAlpaka) from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu index 61442ea9d2b8c..b1e5e1c3c90e9 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu @@ -12,7 +12,8 @@ #include "PixelRecHitGPUKernel.h" #include "gpuPixelRecHits.h" -// #define GPU_DEBUG + +//#define GPU_DEBUG namespace { template diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h index 25cc724cd4c4a..407a18be04fa9 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h @@ -10,7 +10,9 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + //#define GPU_DEBUG + namespace pixelgpudetails { template diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h index 220a91b85ced3..45587034b572b 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -19,7 +19,8 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" -//#define GPU_DEBUG 1 +//#define GPU_DEBUG + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace pixelRecHits { diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 94ae258cc16fb..55c556bd63048 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -7,13 +7,14 @@ #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" #include "DataFormats/Math/interface/approx_atan2.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -//#define GPU_DEBUG 1 +//#define GPU_DEBUG + namespace gpuPixelRecHits { template diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index e6b2c9832600c..7e8910a8e0918 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.AlpakaCore.functions import * from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from Configuration.ProcessModifiers.gpu_cff import gpu from Configuration.ProcessModifiers.alpaka_cff import alpaka @@ -139,12 +140,9 @@ )) # Hit SoA producer on the cpu, for validation -siPixelRecHitsPreSplittingAlpakaSerial = siPixelRecHitsPreSplittingAlpaka.clone( - src = "siPixelClustersPreSplittingAlpakaSerial", - #alpaka = dict( backend = '*' ) - alpaka = None +siPixelRecHitsPreSplittingAlpakaSerial = makeSerialClone(siPixelRecHitsPreSplittingAlpaka, + src = "siPixelClustersPreSplittingAlpakaSerial" ) -siPixelRecHitsPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelRecHitsPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1 from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2 diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc new file mode 100644 index 0000000000000..9772366c6b22e --- /dev/null +++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc @@ -0,0 +1,822 @@ +/* + * \class L2TauTagProducer + * + * L2Tau identification using Convolutional NN. + * + * \author Valeria D'Amante, Università di Siena and INFN Pisa + * Konstantin Androsov, EPFL and ETHZ +*/ +#include +#include +#include +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "DataFormats/Math/interface/deltaR.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/isFinite.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" +#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h" +#include "Geometry/CaloGeometry/interface/CaloGeometry.h" +#include "Geometry/CaloTopology/interface/HcalTopology.h" +#include "Geometry/Records/interface/CaloGeometryRecord.h" +#include "DataFormats/CaloRecHit/interface/CaloRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h" +#include "DataFormats/HcalDetId/interface/HcalDetId.h" +#include "DataFormats/HcalRecHit/interface/HBHERecHit.h" +#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h" +#include "DataFormats/HcalRecHit/interface/HFRecHit.h" +#include "DataFormats/HcalRecHit/interface/HORecHit.h" +#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h" +#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h" +#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" +#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" +#include "DataFormats/TrackReco/interface/HitPattern.h" +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "DataFormats/GeometrySurface/interface/Plane.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" + +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/VertexSoA/interface/ZVertexHost.h" + +namespace L2TauTagNNv1 { + constexpr int nCellEta = 5; + constexpr int nCellPhi = 5; + constexpr int nVars = 31; + constexpr float dR_max = 0.5; + enum class NNInputs { + nVertices = 0, + l1Tau_pt, + l1Tau_eta, + l1Tau_hwIso, + EcalEnergySum, + EcalSize, + EcalEnergyStdDev, + EcalDeltaEta, + EcalDeltaPhi, + EcalChi2, + EcalEnergySumForPositiveChi2, + EcalSizeForPositiveChi2, + HcalEnergySum, + HcalSize, + HcalEnergyStdDev, + HcalDeltaEta, + HcalDeltaPhi, + HcalChi2, + HcalEnergySumForPositiveChi2, + HcalSizeForPositiveChi2, + PatatrackPtSum, + PatatrackSize, + PatatrackSizeWithVertex, + PatatrackPtSumWithVertex, + PatatrackChargeSum, + PatatrackDeltaEta, + PatatrackDeltaPhi, + PatatrackChi2OverNdof, + PatatrackNdof, + PatatrackDxy, + PatatrackDz + }; + + const std::map varNameMap = { + {NNInputs::nVertices, "nVertices"}, + {NNInputs::l1Tau_pt, "l1Tau_pt"}, + {NNInputs::l1Tau_eta, "l1Tau_eta"}, + {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"}, + {NNInputs::EcalEnergySum, "EcalEnergySum"}, + {NNInputs::EcalSize, "EcalSize"}, + {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"}, + {NNInputs::EcalDeltaEta, "EcalDeltaEta"}, + {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"}, + {NNInputs::EcalChi2, "EcalChi2"}, + {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"}, + {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"}, + {NNInputs::HcalEnergySum, "HcalEnergySum"}, + {NNInputs::HcalSize, "HcalSize"}, + {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"}, + {NNInputs::HcalDeltaEta, "HcalDeltaEta"}, + {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"}, + {NNInputs::HcalChi2, "HcalChi2"}, + {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"}, + {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"}, + {NNInputs::PatatrackPtSum, "PatatrackPtSum"}, + {NNInputs::PatatrackSize, "PatatrackSize"}, + {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"}, + {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"}, + {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"}, + {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"}, + {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"}, + {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"}, + {NNInputs::PatatrackNdof, "PatatrackNdof"}, + {NNInputs::PatatrackDxy, "PatatrackDxy"}, + {NNInputs::PatatrackDz, "PatatrackDz"}}; +} // namespace L2TauTagNNv1 +namespace { + inline float& getCellImpl( + tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) { + return cellGridMatrix.tensor()(tau_idx, phi_idx, eta_idx, static_cast(NNInput_idx)); + } +} // namespace +struct normDictElement { + float mean; + float std; + float min; + float max; +}; + +struct L2TauNNProducerAlpakaCacheData { + L2TauNNProducerAlpakaCacheData() : graphDef(nullptr), session(nullptr) {} + tensorflow::GraphDef* graphDef; + tensorflow::Session* session; + std::vector normVec; +}; + +class L2TauNNProducerAlpaka : public edm::stream::EDProducer> { +public: + using TracksHost = pixelTrack::TracksHostPhase1; + + struct caloRecHitCollections { + const HBHERecHitCollection* hbhe; + const HORecHitCollection* ho; + const EcalRecHitCollection* eb; + const EcalRecHitCollection* ee; + const CaloGeometry* geometry; + }; + + struct InputDescTau { + std::string CollectionName; + edm::EDGetTokenT inputToken_; + }; + + static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max; + static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellEta); + static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellPhi); + + explicit L2TauNNProducerAlpaka(const edm::ParameterSet&, const L2TauNNProducerAlpakaCacheData*); + static void fillDescriptions(edm::ConfigurationDescriptions&); + static std::unique_ptr initializeGlobalCache(const edm::ParameterSet&); + static void globalEndJob(L2TauNNProducerAlpakaCacheData*); + +private: + void checknan(tensorflow::Tensor& tensor, int debugLevel); + void standardizeTensor(tensorflow::Tensor& tensor); + std::vector getTauScore(const tensorflow::Tensor& cellGridMatrix); + void produce(edm::Event& event, const edm::EventSetup& eventsetup) override; + void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus); + void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const caloRecHitCollections& caloRecHits); + void fillPatatracks(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const TracksHost& patatracks_tsoa, + const ZVertexHost& patavtx_soa, + const reco::BeamSpot& beamspot, + const MagneticField* magfi); + void selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa, + const TracksHost& patatracks_tsoa, + std::vector& trkGood, + std::vector& vtxGood); + std::pair impactParameter(int it, + const TracksHost& patatracks_tsoa, + float patatrackPhi, + const reco::BeamSpot& beamspot, + const MagneticField* magfi); + template + std::tuple getEtaPhiIndices(const VPos& position, const LVec& tau_p4); + template + std::tuple getEtaPhiIndices(float eta, float phi, const LVec& tau_p4); + +private: + const int debugLevel_; + const edm::EDGetTokenT tauTriggerToken_; + std::vector L1TauDesc_; + const edm::EDGetTokenT hbheToken_; + const edm::EDGetTokenT hoToken_; + const edm::EDGetTokenT ebToken_; + const edm::EDGetTokenT eeToken_; + const edm::ESGetToken geometryToken_; + const edm::ESGetToken bFieldToken_; + const edm::EDGetTokenT pataVerticesToken_; + const edm::EDGetTokenT pataTracksToken_; + const edm::EDGetTokenT beamSpotToken_; + const unsigned int maxVtx_; + const float fractionSumPt2_; + const float minSumPt2_; + const float trackPtMin_; + const float trackPtMax_; + const float trackChi2Max_; + std::string inputTensorName_; + std::string outputTensorName_; + const L2TauNNProducerAlpakaCacheData* L2cacheData_; +}; + +std::unique_ptr L2TauNNProducerAlpaka::initializeGlobalCache( + const edm::ParameterSet& cfg) { + std::unique_ptr cacheData = std::make_unique(); + cacheData->normVec.reserve(L2TauTagNNv1::nVars); + + auto const graphPath = edm::FileInPath(cfg.getParameter("graphPath")).fullPath(); + + cacheData->graphDef = tensorflow::loadGraphDef(graphPath); + cacheData->session = tensorflow::createSession(cacheData->graphDef); + + tensorflow::setLogging("2"); + + boost::property_tree::ptree loadPtreeRoot; + auto const normalizationDict = edm::FileInPath(cfg.getParameter("normalizationDict")).fullPath(); + boost::property_tree::read_json(normalizationDict, loadPtreeRoot); + for (const auto& [key, val] : L2TauTagNNv1::varNameMap) { + boost::property_tree::ptree var = loadPtreeRoot.get_child(val); + normDictElement current_element; + current_element.mean = var.get_child("mean").get_value(); + current_element.std = var.get_child("std").get_value(); + current_element.min = var.get_child("min").get_value(); + current_element.max = var.get_child("max").get_value(); + cacheData->normVec.push_back(current_element); + } + return cacheData; +} +void L2TauNNProducerAlpaka::globalEndJob(L2TauNNProducerAlpakaCacheData* cacheData) { + if (cacheData->graphDef != nullptr) { + delete cacheData->graphDef; + } + tensorflow::closeSession(cacheData->session); +} +void L2TauNNProducerAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("debugLevel", 0)->setComment("set debug level for printing out info"); + edm::ParameterSetDescription l1TausPset; + l1TausPset.add("L1CollectionName", "DoubleTau")->setComment("Name of collections"); + l1TausPset.add("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")) + ->setComment("Which trigger should the L1 Taus collection pass"); + edm::ParameterSet l1TausPSetDefault; + l1TausPSetDefault.addParameter("L1CollectionName", "DoubleTau"); + l1TausPSetDefault.addParameter("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")); + desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault}); + desc.add("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection"); + desc.add("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection"); + desc.add("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection"); + desc.add("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection"); + desc.add("pataVertices", edm::InputTag("hltPixelVerticesSoA")) + ->setComment("patatrack vertices collection"); + desc.add("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection"); + desc.add("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection"); + desc.add("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)"); + desc.add("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex"); + desc.add("minSumPt2", 0.)->setComment("min sumPt2"); + desc.add("track_pt_min", 1.0)->setComment("min track p_T"); + desc.add("track_pt_max", 10.0)->setComment("max track p_T"); + desc.add("track_chi2_max", 99999.)->setComment("max track chi2"); + desc.add("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb") + ->setComment("path to the saved CNN"); + desc.add("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json") + ->setComment("path to the dictionary for variable standardization"); + descriptions.addWithDefaultLabel(desc); +} + +L2TauNNProducerAlpaka::L2TauNNProducerAlpaka(const edm::ParameterSet& cfg, + const L2TauNNProducerAlpakaCacheData* cacheData) + : debugLevel_(cfg.getParameter("debugLevel")), + hbheToken_(consumes(cfg.getParameter("hbheInput"))), + hoToken_(consumes(cfg.getParameter("hoInput"))), + ebToken_(consumes(cfg.getParameter("ebInput"))), + eeToken_(consumes(cfg.getParameter("eeInput"))), + geometryToken_(esConsumes()), + bFieldToken_(esConsumes()), + pataVerticesToken_(consumes(cfg.getParameter("pataVertices"))), + pataTracksToken_(consumes(cfg.getParameter("pataTracks"))), + beamSpotToken_(consumes(cfg.getParameter("BeamSpot"))), + maxVtx_(cfg.getParameter("maxVtx")), + fractionSumPt2_(cfg.getParameter("fractionSumPt2")), + minSumPt2_(cfg.getParameter("minSumPt2")), + trackPtMin_(cfg.getParameter("track_pt_min")), + trackPtMax_(cfg.getParameter("track_pt_max")), + trackChi2Max_(cfg.getParameter("track_chi2_max")) { + if (cacheData->graphDef == nullptr) { + throw cms::Exception("InvalidCacheData") << "Invalid Cache Data."; + } + inputTensorName_ = cacheData->graphDef->node(0).name(); + outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name(); + L2cacheData_ = cacheData; + std::vector L1TauCollections = cfg.getParameter>("L1Taus"); + L1TauDesc_.reserve(L1TauCollections.size()); + for (const auto& l1TauInput : L1TauCollections) { + InputDescTau toInsert; + toInsert.CollectionName = l1TauInput.getParameter("L1CollectionName"); + toInsert.inputToken_ = + consumes(l1TauInput.getParameter("L1TauTrigger")); + L1TauDesc_.push_back(toInsert); + } + for (const auto& desc : L1TauDesc_) + produces>(desc.CollectionName); +} + +void L2TauNNProducerAlpaka::checknan(tensorflow::Tensor& tensor, int debugLevel) { + using NNInputs = L2TauTagNNv1::NNInputs; + std::vector tensor_shape(tensor.shape().dims()); + for (int d = 0; d < tensor.shape().dims(); d++) { + tensor_shape.at(d) = tensor.shape().dim_size(d); + } + if (tensor_shape.size() != 4) { + throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; + } + for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { + for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { + for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { + for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); + }; + auto nonstd_var = getCell(static_cast(var_idx)); + if (edm::isNotFinite(nonstd_var)) { + edm::LogWarning("InputVar") << "var is nan \nvar name= " + << L2TauTagNNv1::varNameMap.at(static_cast(var_idx)) + << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx + << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx; + if (debugLevel > 2) { + edm::LogWarning("InputVar") << "other vars in same cell \n"; + if (var_idx + 1 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 1)) + << "\t = " << getCell(static_cast(var_idx + 1)); + if (var_idx + 2 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 2)) + << "\t = " << getCell(static_cast(var_idx + 2)); + if (var_idx + 3 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 3)) + << "\t = " << getCell(static_cast(var_idx + 3)); + if (var_idx + 4 < tensor_shape.at(3)) + edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 4)) + << "\t = " << getCell(static_cast(var_idx + 4)); + } + } + } + } + } + } +} + +void L2TauNNProducerAlpaka::standardizeTensor(tensorflow::Tensor& tensor) { + using NNInputs = L2TauTagNNv1::NNInputs; + std::vector tensor_shape(tensor.shape().dims()); + for (int d = 0; d < tensor.shape().dims(); d++) { + tensor_shape.at(d) = tensor.shape().dim_size(d); + } + if (tensor_shape.size() != 4) { + throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; + } + for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { + for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { + for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { + for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); + }; + float mean = L2cacheData_->normVec.at(var_idx).mean; + float std = L2cacheData_->normVec.at(var_idx).std; + float min = L2cacheData_->normVec.at(var_idx).min; + float max = L2cacheData_->normVec.at(var_idx).max; + float nonstd_var = getCell(static_cast(var_idx)); + float std_var = static_cast((nonstd_var - mean) / std); + if (std_var > max) { + std_var = static_cast(max); + } else if (std_var < min) { + std_var = static_cast(min); + } + getCell(static_cast(var_idx)) = std_var; + } + } + } + } +} + +void L2TauNNProducerAlpaka::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus) { + using NNInputs = L2TauTagNNv1::NNInputs; + + const int nTaus = allTaus.size(); + for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) { + for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt(); + getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta(); + getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso(); + } + } + } +} + +template +std::tuple L2TauNNProducerAlpaka::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) { + const float deta = eta - tau_p4.eta(); + const float dphi = reco::deltaPhi(phi, tau_p4.phi()); + const int eta_idx = static_cast(floor((deta + L2TauTagNNv1::dR_max) / dEta_width)); + const int phi_idx = static_cast(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width)); + return std::make_tuple(deta, dphi, eta_idx, phi_idx); +} + +template +std::tuple L2TauNNProducerAlpaka::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) { + return getEtaPhiIndices(position.eta(), position.phi(), tau_p4); +} + +void L2TauNNProducerAlpaka::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const caloRecHitCollections& caloRecHits) { + using NNInputs = L2TauTagNNv1::NNInputs; + + const int nTaus = allTaus.size(); + float deta, dphi; + int eta_idx = 0; + int phi_idx = 0; + int tau_idx = 0; + + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { + // calorechit_EE + for (const auto& caloRecHit_ee : *caloRecHits.ee) { + if (caloRecHit_ee.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition(); + const float eeCalEn = caloRecHit_ee.energy(); + const float eeCalChi2 = caloRecHit_ee.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::EcalEnergySum) += eeCalEn; + getCell(NNInputs::EcalSize) += 1.; + getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn; + getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn; + getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn; + if (eeCalChi2 >= 0) { + getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn; + getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn; + getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_EB + for (const auto& caloRecHit_eb : *caloRecHits.eb) { + if (caloRecHit_eb.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition(); + const float ebCalEn = caloRecHit_eb.energy(); + const float ebCalChi2 = caloRecHit_eb.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::EcalEnergySum) += ebCalEn; + getCell(NNInputs::EcalSize) += 1.; + getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn; + getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn; + getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn; + if (ebCalChi2 >= 0) { + getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn; + getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn; + getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_HBHE + for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) { + if (caloRecHit_hbhe.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition(); + const float hbheCalEn = caloRecHit_hbhe.energy(); + const float hbheCalChi2 = caloRecHit_hbhe.chi2(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::HcalEnergySum) += hbheCalEn; + getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn; + getCell(NNInputs::HcalSize) += 1.; + getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn; + getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn; + if (hbheCalChi2 >= 0) { + getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn; + getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn; + getCell(NNInputs::HcalSizeForPositiveChi2) += 1.; + } + } + } + + // calorechit_HO + for (const auto& caloRecHit_ho : *caloRecHits.ho) { + if (caloRecHit_ho.energy() <= 0) + continue; + const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition(); + const float hoCalEn = caloRecHit_ho.energy(); + if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::HcalEnergySum) += hoCalEn; + getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn; + getCell(NNInputs::HcalSize) += 1.; + getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn; + getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn; + } + } + + // normalize to sum and define stdDev + for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + /* normalize eCal vars*/ + if (getCell(NNInputs::EcalEnergySum) > 0.) { + getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum); + getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum); + } + if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) { + getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2); + } + if (getCell(NNInputs::EcalSize) > 1.) { + // (stdDev - (enSum*enSum)/size) / (size-1) + getCell(NNInputs::EcalEnergyStdDev) = + (getCell(NNInputs::EcalEnergyStdDev) - + (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) / + (getCell(NNInputs::EcalSize) - 1); + } else { + getCell(NNInputs::EcalEnergyStdDev) = 0.; + } + /* normalize hCal Vars */ + if (getCell(NNInputs::HcalEnergySum) > 0.) { + getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum); + getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum); + } + if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) { + getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2); + } + if (getCell(NNInputs::HcalSize) > 1.) { + // (stdDev - (enSum*enSum)/size) / (size-1) + getCell(NNInputs::HcalEnergyStdDev) = + (getCell(NNInputs::HcalEnergyStdDev) - + (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) / + (getCell(NNInputs::HcalSize) - 1); + } else { + getCell(NNInputs::HcalEnergyStdDev) = 0.; + } + } + } + } +} + +void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa, + const TracksHost& patatracks_tsoa, + std::vector& trkGood, + std::vector& vtxGood) { + using patatrackHelpers = TracksUtilities; + const auto maxTracks = patatracks_tsoa.view().metadata().size(); + const int nv = patavtx_soa.view().nvFinal(); + trkGood.clear(); + trkGood.reserve(maxTracks); + vtxGood.clear(); + vtxGood.reserve(nv); + auto const* quality = patatracks_tsoa.view().quality(); + + // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). + std::vector pTSquaredSum(nv, 0); + std::vector nTrkAssociated(nv, 0); + + for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { + auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx); + if (nHits == 0) { + break; + } + int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); + if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { + auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); + ++nTrkAssociated[vtx_ass_to_track]; + if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) { + patatrackPt = std::min(patatrackPt, trackPtMax_); + pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; + } + } + if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { + trkGood.push_back(trk_idx); + } + } + if (nv > 0) { + const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_; + for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) { + auto vtx_idx = patavtx_soa.view()[j].sortInd(); + assert(vtx_idx < nv); + if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && + pTSquaredSum[vtx_idx] > minSumPt2_) { + vtxGood.push_back(vtx_idx); + } + } + } +} + +std::pair L2TauNNProducerAlpaka::impactParameter(int it, + const TracksHost& patatracks_tsoa, + float patatrackPhi, + const reco::BeamSpot& beamspot, + const MagneticField* magfi) { + /* dxy and dz */ + riemannFit::Vector5d ipar, opar; + riemannFit::Matrix5d icov, ocov; + TracksUtilities::copyToDense(patatracks_tsoa.view(), ipar, icov, it); + riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); + LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); + float sp = std::sin(patatrackPhi); + float cp = std::cos(patatrackPhi); + Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0()); + Plane impPointPlane(BeamSpotPoint, Rotation); + GlobalTrajectoryParameters gp( + impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi); + GlobalPoint vv = gp.position(); + math::XYZPoint pos(vv.x(), vv.y(), vv.z()); + GlobalVector pp = gp.momentum(); + math::XYZVector mom(pp.x(), pp.y(), pp.z()); + auto lambda = M_PI_2 - pp.theta(); + auto phi = pp.phi(); + float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi); + float patatrackDz = + (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) / + std::cos(lambda); + return std::make_pair(patatrackDxy, patatrackDz); +} + +void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, + const std::vector& allTaus, + const TracksHost& patatracks_tsoa, + const ZVertexHost& patavtx_soa, + const reco::BeamSpot& beamspot, + const MagneticField* magfi) { + using NNInputs = L2TauTagNNv1::NNInputs; + using patatrackHelpers = TracksUtilities; + float deta, dphi; + int eta_idx = 0; + int phi_idx = 0; + int tau_idx = 0; + + auto getCell = [&](NNInputs input) -> float& { + return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); + }; + + std::vector trkGood; + std::vector vtxGood; + + selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood); + + const int nTaus = allTaus.size(); + for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { + const float tauEta = allTaus[tau_idx]->eta(); + const float tauPhi = allTaus[tau_idx]->phi(); + + for (const auto it : trkGood) { + const float patatrackPt = patatracks_tsoa.const_view()[it].pt(); + if (patatrackPt <= 0) + continue; + const float patatrackPhi = reco::phi(patatracks_tsoa.const_view(), it); + const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); + const float patatrackCharge = reco::charge(patatracks_tsoa.const_view(), it); + const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); + const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it); + if (nHits <= 0) + continue; + const int patatrackNdof = 2 * std::min(6, nHits) - 5; + + const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); + if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { + std::tie(deta, dphi, eta_idx, phi_idx) = + getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); + getCell(NNInputs::PatatrackPtSum) += patatrackPt; + getCell(NNInputs::PatatrackSize) += 1.; + getCell(NNInputs::PatatrackChargeSum) += patatrackCharge; + getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt; + getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt; + getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt; + getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt; + std::pair impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi); + getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt; + getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt; + if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) { + getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt; + getCell(NNInputs::PatatrackSizeWithVertex) += 1.; + } + } + } + + // normalize to sum and define stdDev + for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { + for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { + getCell(NNInputs::nVertices) = vtxGood.size(); + if (getCell(NNInputs::PatatrackPtSum) > 0.) { + getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum); + getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum); + } + } + } + } +} + +std::vector L2TauNNProducerAlpaka::getTauScore(const tensorflow::Tensor& cellGridMatrix) { + std::vector pred_tensor; + tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor); + const int nTau = cellGridMatrix.shape().dim_size(0); + std::vector pred_vector(nTau); + for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) { + pred_vector[tau_idx] = pred_tensor[0].matrix()(tau_idx, 0); + } + + return pred_vector; +} + +void L2TauNNProducerAlpaka::produce(edm::Event& event, const edm::EventSetup& eventsetup) { + std::vector> TauCollectionMap(L1TauDesc_.size()); + l1t::TauVectorRef allTaus; + + for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { + l1t::TauVectorRef l1Taus; + auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_); + l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus); + TauCollectionMap.at(inp_idx).resize(l1Taus.size()); + + for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) { + size_t tau_idx; + const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]); + if (iter != allTaus.end()) { + tau_idx = std::distance(allTaus.begin(), iter); + } else { + allTaus.push_back(l1Taus[l1_idx]); + tau_idx = allTaus.size() - 1; + } + TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx; + } + } + const auto ebCal = event.getHandle(ebToken_); + const auto eeCal = event.getHandle(eeToken_); + const auto hbhe = event.getHandle(hbheToken_); + const auto ho = event.getHandle(hoToken_); + auto const& patatracks_SoA = event.get(pataTracksToken_); + auto const& vertices_SoA = event.get(pataVerticesToken_); + const auto bsHandle = event.getHandle(beamSpotToken_); + + auto const fieldESH = eventsetup.getHandle(bFieldToken_); + auto const geometry = eventsetup.getHandle(geometryToken_); + + caloRecHitCollections caloRecHits; + caloRecHits.hbhe = &*hbhe; + caloRecHits.ho = &*ho; + caloRecHits.eb = &*ebCal; + caloRecHits.ee = &*eeCal; + caloRecHits.geometry = &*geometry; + + const int nTaus = allTaus.size(); + tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT, + {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars}); + const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars; + for (int input_idx = 0; input_idx < n_inputs; ++input_idx) { + cellGridMatrix.flat()(input_idx) = 0; + } + fillL1TauVars(cellGridMatrix, allTaus); + + fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits); + + fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product()); + + standardizeTensor(cellGridMatrix); + + if (debugLevel_ > 0) { + checknan(cellGridMatrix, debugLevel_); + } + + std::vector tau_score = getTauScore(cellGridMatrix); + + for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { + const size_t nTau = TauCollectionMap[inp_idx].size(); + auto tau_tags = std::make_unique>(nTau); + for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) { + const auto tau_idx = TauCollectionMap[inp_idx][tau_pos]; + if (debugLevel_ > 0) { + edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t " + << tau_score.at(tau_idx) << std::endl; + } + (*tau_tags)[tau_pos] = tau_score.at(tau_idx); + } + event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName); + } +} +//define this as a plug-in +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(L2TauNNProducerAlpaka); diff --git a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py index 895ba32eca71a..f5ba3ad7df1da 100644 --- a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py +++ b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.AlpakaCore.functions import * from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from RecoTracker.PixelTrackFitting.PixelTracks_cff import * @@ -110,11 +111,9 @@ alpaka.toReplaceWith(pixelVertices, _pixelVertexFromSoAAlpaka.clone()) # pixel vertex SoA producer with alpaka on the cpu, for validation -pixelVerticesAlpakaSerial = pixelVerticesAlpaka.clone( - pixelTrackSrc = 'pixelTracksAlpakaSerial', - alpaka = None +pixelVerticesAlpakaSerial = makeSerialClone(pixelVerticesAlpaka, + pixelTrackSrc = 'pixelTracksAlpakaSerial' ) -pixelVerticesAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + pixelVerticesAlpaka._TypedParameterizable__type.removesuffix('@alpaka') alpaka.toReplaceWith(pixelVerticesTask, cms.Task( # Build the pixel vertices in SoA format with alpaka on the device diff --git a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py index 3d121a8736f8e..55a02f83f913c 100644 --- a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py +++ b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py @@ -3,10 +3,12 @@ # Customise the Pixel-only reconstruction to run on GPU # # Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU. +# CUDA and Alpaka co-living here for the moment + def customizePixelOnlyForProfilingGPUOnly(process): process.consumer = cms.EDAnalyzer("GenericConsumer", - eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA') + eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA', '*DeviceProduct_pixelTracksAlpaka_*_*', '*DeviceProduct_pixelVerticesAlpaka_*_*') ) process.consume_step = cms.EndPath(process.consumer) @@ -25,10 +27,8 @@ def customizePixelOnlyForProfilingGPUOnly(process): # tracks and vertices on the CPU in SoA format, without conversion to legacy format. def customizePixelOnlyForProfilingGPUWithHostCopy(process): - #? process.siPixelRecHitSoAFromLegacy.convertToLegacy = False - process.consumer = cms.EDAnalyzer("GenericConsumer", - eventProducts = cms.untracked.vstring('pixelTracksSoA', 'pixelVerticesSoA') + eventProducts = cms.untracked.vstring('pixelTracksSoA', 'pixelVerticesSoA', 'pixelTracksAlpaka', 'pixelVerticesAlpaka') ) process.consume_step = cms.EndPath(process.consumer) diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu index efb2a2e17715c..6e07126e9e428 100644 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu +++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu @@ -1,8 +1,9 @@ -#include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h" #include -// #define NTUPLE_DEBUG -// #define GPU_DEBUG +#include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h" + +//#define GPU_DEBUG +//#define NTUPLE_DEBUG template void CAHitNtupletGeneratorKernelsGPU::launchKernels(const HitsConstView &hh, diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h index 0865fa5cbc46a..250aef21c1d6a 100644 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h @@ -1,18 +1,17 @@ #ifndef RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h #define RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h -// #define GPU_DEBUG +//#define GPU_DEBUG +//#define DUMP_GPU_TK_TUPLES -#include "GPUCACell.h" -#include "gpuPixelDoublets.h" - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" #include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" +#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" #include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" -// #define DUMP_GPU_TK_TUPLES +#include "GPUCACell.h" +#include "gpuPixelDoublets.h" namespace caHitNtupletGenerator { diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc index 6acff4abbd531..64148d5f5ba81 100644 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc +++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc @@ -2,7 +2,8 @@ #include "CAHitNtupletGeneratorKernels.h" -// #define GPU_DEBUG +//#define GPU_DEBUG + template #ifdef __CUDACC__ void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(int32_t nHits, cudaStream_t stream) { diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h index 540c0b92f9015..57e4ea6f9441f 100644 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h @@ -2,8 +2,8 @@ // Original Author: Felice Pantaleo, CERN // -// #define NTUPLE_DEBUG -// #define GPU_DEBUG +//#define NTUPLE_DEBUG +//#define GPU_DEBUG #include #include @@ -11,15 +11,14 @@ #include +#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" - -#include "CAStructures.h" #include "CAHitNtupletGeneratorKernels.h" +#include "CAStructures.h" #include "GPUCACell.h" #include "gpuFishbone.h" #include "gpuPixelDoublets.h" diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc index faf0bae6fb0a9..5100cf734142c 100644 --- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc +++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc @@ -2,8 +2,8 @@ // Original Author: Felice Pantaleo, CERN // -// #define GPU_DEBUG -// #define DUMP_GPU_TK_TUPLES +//#define GPU_DEBUG +//#define DUMP_GPU_TK_TUPLES #include #include diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc index d003fd97d105a..8f898872a66f4 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc @@ -2,8 +2,8 @@ // Original Author: Felice Pantaleo, CERN // -#define GPU_DEBUG -// #define DUMP_GPU_TK_TUPLES +//#define GPU_DEBUG +//#define DUMP_GPU_TK_TUPLES #include #include diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 9209ab87c9cbc..44e3295bdb606 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -11,8 +11,8 @@ #include #endif -#define NTUPLE_DEBUG -#define GPU_DEBUG +//#define GPU_DEBUG +//#define NTUPLE_DEBUG namespace ALPAKA_ACCELERATOR_NAMESPACE { diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h index 0b8a11c63404c..d55be09e6e497 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -1,23 +1,25 @@ #ifndef RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h #define RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h -// #define GPU_DEBUG -#include -#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +//#define GPU_DEBUG +//#define DUMP_GPU_TK_TUPLES + #include -#include "CACell.h" -#include "CAPixelDoublets.h" -#include "CAStructures.h" -#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include + #include "DataFormats/TrackSoA/interface/TrackDefinitions.h" #include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h" #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" -// #define DUMP_GPU_TK_TUPLES +#include "CACell.h" +#include "CAPixelDoublets.h" +#include "CAStructures.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace caHitNtupletGenerator { diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index ff70abcad2b41..b809caa2e5736 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -2,8 +2,8 @@ // Original Author: Felice Pantaleo, CERN // -// #define NTUPLE_DEBUG -// #define GPU_DEBUG +//#define GPU_DEBUG +//#define NTUPLE_DEBUG #include #include @@ -250,7 +250,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { float mc = maxScore; uint16_t im = tkNotFound; - auto score = [&](auto it) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; + auto score = [&](auto it) { return std::abs(reco::tip(tracks_view, it)); }; // full crazy combinatorics int ntr = thisCell.tracks().size(); @@ -753,7 +753,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (hitToTuple.size(idx) < 2) continue; - auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities::tip(tracks_view, it)); }; + auto score = [&](auto it, auto nl) { return std::abs(reco::tip(tracks_view, it)); }; // full combinatorics for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) { @@ -874,7 +874,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { if (tracks_view[*it].quality() <= good) continue; - onlyTriplets &= TracksUtilities::isTriplet(tracks_view, *it); + onlyTriplets &= reco::isTriplet(tracks_view, *it); if (!onlyTriplets) break; } @@ -886,9 +886,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // for triplets choose best tip! (should we first find best quality???) for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { auto const it = *ip; - if (tracks_view[it].quality() >= good && - std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { - mc = std::abs(TracksUtilities::tip(tracks_view, it)); + if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) { + mc = std::abs(reco::tip(tracks_view, it)); im = it; } } @@ -933,9 +932,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // choose best tip! (should we first find best quality???) for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { auto const it = *ip; - if (tracks_view[it].quality() >= good && - std::abs(TracksUtilities::tip(tracks_view, it)) < mc) { - mc = std::abs(TracksUtilities::tip(tracks_view, it)); + if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) { + mc = std::abs(reco::tip(tracks_view, it)); im = it; } } @@ -946,8 +944,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // mark worse ambiguities for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { auto const it = *ip; - if (tracks_view[it].quality() > reject && TracksUtilities::isTriplet(tracks_view, it) && - it != im) + if (tracks_view[it].quality() > reject && reco::isTriplet(tracks_view, it) && it != im) tracks_view[it].quality() = reject; //no race: simple assignment of the same constant } @@ -980,12 +977,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { nh, tracks_view[i].nLayers(), reco::charge(tracks_view, i), - //TracksUtilities::charge(tracks_view, i), tracks_view[i].pt(), tracks_view[i].eta(), - TracksUtilities::phi(tracks_view, i), - TracksUtilities::tip(tracks_view, i), - TracksUtilities::zip(tracks_view, i), + reco::phi(tracks_view, i), + reco::tip(tracks_view, i), + reco::zip(tracks_view, i), tracks_view[i].chi2(), hh[*tracks_view.hitIndices().begin(i)].zGlobal(), hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(), diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h index f6373893c8e88..234b9b7527a3c 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -6,18 +6,22 @@ #include #include #include + #include -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" -#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" -#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" -#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" + #include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "CAStructures.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + #include "CACell.h" +#include "CAStructures.h" -#define GPU_DEBUG +//#define GPU_DEBUG //#define NTUPLE_DEBUG + namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace caPixelDoublets { using namespace cms::alpakatools; @@ -187,8 +191,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const auto& [firstElementIdxNoStrideY, endElementIdxNoStrideY] = cms::alpakatools::element_index_range_in_grid(acc, 0u, dimIndexY); uint32_t firstElementIdxY = firstElementIdxNoStrideY; + uint32_t endElementIdxY = endElementIdxNoStrideY; + + //const uint32_t incY = cms::alpakatools::requires_single_thread_per_block_v ? 1 : gridDimensionY; + for (uint32_t j = firstElementIdxY; j < ntot; j++) { + if (not cms::alpakatools::next_valid_element_index_strided( + j, firstElementIdxY, endElementIdxY, gridDimensionY, ntot)) + break; - for (uint32_t j = firstElementIdxY; j < ntot; j += gridDimensionY) { while (j >= innerLayerCumulativeSize[pairLayerId++]) ; --pairLayerId; // move to lower_bound ?? @@ -269,7 +279,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // Here we parallelize in X uint32_t firstElementIdxX = firstElementIdxNoStrideX; - for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; pIndex += blockDimensionX) { + uint32_t endElementIdxX = endElementIdxNoStrideX; + + for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; ++pIndex) { + if (not cms::alpakatools::next_valid_element_index_strided( + pIndex, firstElementIdxX, endElementIdxX, blockDimensionX, maxpIndex)) + break; auto oi = p[pIndex]; // auto oi = __ldg(p); is not allowed since __ldg is device-only ALPAKA_ASSERT_OFFLOAD(oi >= offsets[outer]); ALPAKA_ASSERT_OFFLOAD(oi < offsets[outer + 1]); diff --git a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h index ac5975abb2dd5..583021081d534 100644 --- a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h @@ -9,15 +9,15 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h" #include "DataFormats/Math/interface/approx_atan2.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "CAStructures.h" #include "GPUCACell.h" -// #define GPU_DEBUG -// #define NTUPLE_DEBUG +//#define GPU_DEBUG +//#define NTUPLE_DEBUG namespace gpuPixelDoublets { diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc index e3acdd4c48938..4402a1891b2a4 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc @@ -35,7 +35,8 @@ * This class creates "legacy" reco::Track * objects from the output of SoA CA. */ -#define GPU_DEBUG + +//#define GPU_DEBUG template class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { @@ -198,7 +199,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea // mind: this values are respect the beamspot! float chi2 = tsoa.view()[it].chi2(); - float phi = tracksHelpers::phi(tsoa.view(), it); + float phi = reco::phi(tsoa.view(), it); riemannFit::Vector5d ipar, opar; riemannFit::Matrix5d icov, ocov; @@ -246,7 +247,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea tracks.emplace_back(track.release(), hits); } #ifdef GPU_DEBUG - std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl; + std::cout << "processed " << nt << " good tuples " << tracks.size() << " out of " << indToEdm.size() << std::endl; #endif // store tracks storeTracks(iEvent, tracks, httopo); diff --git a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py index aa2fb861de818..046caa0b033f3 100644 --- a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py +++ b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.AlpakaCore.functions import * from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from RecoLocalTracker.SiStripRecHitConverter.StripCPEfromTrackAngle_cfi import * @@ -218,11 +219,9 @@ phase2_tracker.toReplaceWith(pixelTracksAlpaka,_pixelTracksAlpakaPhase2.clone()) # pixel tracks SoA producer on the cpu, for validation -pixelTracksAlpakaSerial = pixelTracksAlpaka.clone( - pixelRecHitSrc = 'siPixelRecHitsPreSplittingAlpakaSerial', - alpaka = None +pixelTracksAlpakaSerial = makeSerialClone(pixelTracksAlpaka, + pixelRecHitSrc = 'siPixelRecHitsPreSplittingAlpakaSerial' ) -pixelTracksAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + pixelTracksAlpaka._TypedParameterizable__type.removesuffix('@alpaka') # legacy pixel tracks from SoA from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase1_cfi import pixelTrackProducerFromSoAAlpakaPhase1 as _pixelTrackProducerFromSoAAlpakaPhase1 diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 83bc8f0d84ec2..c40d9adda93c5 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -46,7 +46,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // initialize soa... soa[idx].idv() = -1; - if (helper::isTriplet(tracks_view, idx)) + if (reco::isTriplet(tracks_view, idx)) continue; // no triplets if (quality[idx] < ::pixelTrack::Quality::highPurity) continue; @@ -62,7 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto& data = pws; auto it = alpaka::atomicAdd(acc, &data.ntrks(), 1u, alpaka::hierarchy::Blocks{}); data[it].itrk() = idx; - data[it].zt() = helper::zip(tracks_view, idx); + data[it].zt() = reco::zip(tracks_view, idx); data[it].ezt2() = tracks_view[idx].covariance()(14); data[it].ptt2() = pt * pt; }