From 81bc63b9abeefa5d71f1209a792ddd72b7866e87 Mon Sep 17 00:00:00 2001 From: AdrianoDee Date: Tue, 19 Apr 2022 09:18:10 +0200 Subject: [PATCH 1/2] GPUvsCPU DQM for pixels - adding pixel hits SoA from GPU to CPU copy; - updating pixel only wfs accordingly (.502,.503). --- .../interface/TrackingRecHit2DHeterogeneous.h | 142 +++++++++++++++--- .../src/TrackingRecHit2DHeterogeneous.cc | 14 ++ .../plugins/SiPixelPhase1MonitorRecHitsSoA.cc | 4 +- .../plugins/SiPixelPhase1MonitorTrackSoA.cc | 2 +- ...ixelPhase1HeterogenousDQM_FirstStep_cff.py | 6 +- .../plugins/SiPixelRecHitSoAFromCUDA.cc | 91 +++++++++++ .../python/SiPixelRecHits_cfi.py | 31 +++- .../python/RecoPixelVertexing_cff.py | 7 + .../python/PixelTracks_cff.py | 17 ++- 9 files changed, 268 insertions(+), 46 deletions(-) create mode 100644 RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index d85673238942b..8ce37f280ac6c 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -8,6 +8,27 @@ template class TrackingRecHit2DHeterogeneous { public: + enum class Storage32 { + kXLocal = 0, + kYLocal = 1, + kXerror = 2, + kYerror = 3, + kCharge = 4, + kXGlobal = 5, + kYGlobal = 6, + kZGlobal = 7, + kRGlobal = 8, + kPhiStorage = 9, + kLayers = 10 + }; + + enum class Storage16 { + kDetId = 0, + kPhi = 1, + kXSize = 2, + kYSize = 3, + }; + template using unique_ptr = typename Traits::template unique_ptr; @@ -24,6 +45,8 @@ class TrackingRecHit2DHeterogeneous { cudaStream_t stream, TrackingRecHit2DHeterogeneous const* input = nullptr); + explicit TrackingRecHit2DHeterogeneous( + float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream = nullptr); ~TrackingRecHit2DHeterogeneous() = default; TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete; @@ -44,10 +67,13 @@ class TrackingRecHit2DHeterogeneous { auto phiBinnerStorage() { return m_phiBinnerStorage; } auto iphi() { return m_iphi; } - // only the local coord and detector index cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr store16ToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr store32ToHostAsync(cudaStream_t stream) const; + // needs specialization for Host void copyFromGPU(TrackingRecHit2DHeterogeneous const* input, cudaStream_t stream); @@ -55,7 +81,7 @@ class TrackingRecHit2DHeterogeneous { static constexpr uint32_t n16 = 4; // number of elements in m_store16 static constexpr uint32_t n32 = 10; // number of elements in m_store32 static_assert(sizeof(uint32_t) == sizeof(float)); // just stating the obvious - + static_assert(n32 == static_cast(Storage32::kLayers)); unique_ptr m_store16; //! unique_ptr m_store32; //! @@ -108,7 +134,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous( // if empy do not bother if (0 == nHits) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version @@ -123,7 +149,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous( // so unless proven VERY inefficient we keep it ordered as generated // host copy is "reduced" (to be reviewed at some point) - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { // it has to compile for ALL cases copyFromGPU(input, stream); } else { @@ -139,43 +165,113 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous( static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(float)); static_assert(sizeof(TrackingRecHit2DSOAView::hindex_type) == sizeof(TrackingRecHit2DSOAView::PhiBinner::index_type)); - auto get32 = [&](int i) { return m_store32.get() + i * nHits; }; + auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast(i) * nHits; }; // copy all the pointers m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); m_phiBinnerStorage = view->m_phiBinnerStorage = - reinterpret_cast(get32(9)); + reinterpret_cast(get32(Storage32::kPhiStorage)); - view->m_xl = get32(0); - view->m_yl = get32(1); - view->m_xerr = get32(2); - view->m_yerr = get32(3); - view->m_chargeAndStatus = reinterpret_cast(get32(4)); + view->m_xl = get32(Storage32::kXLocal); + view->m_yl = get32(Storage32::kYLocal); + view->m_xerr = get32(Storage32::kXerror); + view->m_yerr = get32(Storage32::kYerror); + view->m_chargeAndStatus = reinterpret_cast(get32(Storage32::kCharge)); - if constexpr (!std::is_same::value) { + if constexpr (!std::is_same_v) { assert(input == nullptr); - view->m_xg = get32(5); - view->m_yg = get32(6); - view->m_zg = get32(7); - view->m_rg = get32(8); + view->m_xg = get32(Storage32::kXGlobal); + view->m_yg = get32(Storage32::kYGlobal); + view->m_zg = get32(Storage32::kZGlobal); + view->m_rg = get32(Storage32::kRGlobal); - auto get16 = [&](int i) { return m_store16.get() + i * nHits; }; - m_iphi = view->m_iphi = reinterpret_cast(get16(1)); + auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast(i) * nHits; }; + m_iphi = view->m_iphi = reinterpret_cast(get16(Storage16::kPhi)); - view->m_xsize = reinterpret_cast(get16(2)); - view->m_ysize = reinterpret_cast(get16(3)); - view->m_detInd = get16(0); + view->m_xsize = reinterpret_cast(get16(Storage16::kXSize)); + view->m_ysize = reinterpret_cast(get16(Storage16::kYSize)); + view->m_detInd = get16(Storage16::kDetId); m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); - m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(n32)); + m_hitsLayerStart = view->m_hitsLayerStart = reinterpret_cast(get32(Storage32::kLayers)); } // transfer view - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } } +//this is intended to be used only for CPU SoA but doesn't hurt to have it for all cases +template +TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous( + float* store32, uint16_t* store16, uint32_t* modules, int nHits, cudaStream_t stream) + : m_nHits(nHits), m_hitsModuleStart(modules) { + auto view = Traits::template make_host_unique(stream); + + m_view = Traits::template make_unique(stream); + + view->m_nHits = nHits; + + if (0 == nHits) { + if constexpr (std::is_same_v) { + cms::cuda::copyAsync(m_view, view, stream); + } else { + m_view = std::move(view); + } + return; + } + + m_store16 = Traits::template make_unique(nHits * n16, stream); + m_store32 = Traits::template make_unique(nHits * n32, stream); + m_PhiBinnerStore = Traits::template make_unique(stream); + m_AverageGeometryStore = Traits::template make_unique(stream); + + view->m_averageGeometry = m_AverageGeometryStore.get(); + view->m_hitsModuleStart = m_hitsModuleStart; + + //store transfer + if constexpr (std::is_same_v) { + cms::cuda::copyAsync(m_store16, store16, stream); + cms::cuda::copyAsync(m_store32, store32, stream); + } else { + std::copy(store32, store32 + nHits * n32, m_store32.get()); // want to copy it + std::copy(store16, store16 + nHits * n16, m_store16.get()); + } + + //getters + auto get32 = [&](Storage32 i) { return m_store32.get() + static_cast(i) * nHits; }; + auto get16 = [&](Storage16 i) { return m_store16.get() + static_cast(i) * nHits; }; + + //Store 32 + view->m_xl = get32(Storage32::kXLocal); + view->m_yl = get32(Storage32::kYLocal); + view->m_xerr = get32(Storage32::kXerror); + view->m_yerr = get32(Storage32::kYerror); + view->m_chargeAndStatus = reinterpret_cast(get32(Storage32::kCharge)); + view->m_xg = get32(Storage32::kXGlobal); + view->m_yg = get32(Storage32::kYGlobal); + view->m_zg = get32(Storage32::kZGlobal); + view->m_rg = get32(Storage32::kRGlobal); + + m_phiBinner = view->m_phiBinner = m_PhiBinnerStore.get(); + m_phiBinnerStorage = view->m_phiBinnerStorage = + reinterpret_cast(get32(Storage32::kPhiStorage)); + + //Store 16 + view->m_detInd = get16(Storage16::kDetId); + m_iphi = view->m_iphi = reinterpret_cast(get16(Storage16::kPhi)); + view->m_xsize = reinterpret_cast(get16(Storage16::kXSize)); + view->m_ysize = reinterpret_cast(get16(Storage16::kYSize)); + + // transfer view + if constexpr (std::is_same_v) { + cms::cuda::copyAsync(m_view, view, stream); + } else { + m_view = std::move(view); + } +} + #endif // CUDADataFormats_TrackingRecHit_interface_TrackingRecHit2DHeterogeneous_h diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc index 54622fcf62553..fc6a05ba9ed3e 100644 --- a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -11,6 +11,20 @@ cms::cuda::host::unique_ptr TrackingRecHit2DGPU::localCoordToHostAsync( return ret; } +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DGPU::store32ToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(static_cast(n32) * nHits(), stream); + cms::cuda::copyAsync(ret, m_store32, static_cast(n32) * nHits(), stream); + return ret; +} + +template <> +cms::cuda::host::unique_ptr TrackingRecHit2DGPU::store16ToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(static_cast(n16) * nHits(), stream); + cms::cuda::copyAsync(ret, m_store16, static_cast(n16) * nHits(), stream); + return ret; +} + template <> cms::cuda::host::unique_ptr TrackingRecHit2DGPU::hitsModuleStartToHostAsync(cudaStream_t stream) const { auto ret = cms::cuda::make_host_unique(nMaxModules() + 1, stream); diff --git a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc index f1d4894dc35db..df766e9156cf8 100644 --- a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc +++ b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc @@ -3,7 +3,7 @@ // Package: SiPixelPhase1MonitorRecHitsSoA // Class: SiPixelPhase1MonitorRecHitsSoA // -/**\class SiPixelPhase1MonitorRecHitsSoA SiPixelPhase1MonitorRecHitsSoA.cc +/**\class SiPixelPhase1MonitorRecHitsSoA SiPixelPhase1MonitorRecHitsSoA.cc */ // // Author: Suvankar Roy Chowdhury, Alessandro Rossi @@ -97,7 +97,6 @@ void SiPixelPhase1MonitorRecHitsSoA::analyze(const edm::Event& iEvent, const edm } auto const& rhsoa = *rhsoaHandle; const TrackingRecHit2DSOAView* soa2d = rhsoa.view(); - uint32_t nHits_ = soa2d->nHits(); hnHits->Fill(nHits_); auto detIds = tkGeom_->detUnitIds(); @@ -111,6 +110,7 @@ void SiPixelPhase1MonitorRecHitsSoA::analyze(const edm::Event& iEvent, const edm uint32_t charge = soa2d->charge(i); int16_t sizeX = std::ceil(float(std::abs(soa2d->clusterSizeX(i)) / 8.)); int16_t sizeY = std::ceil(float(std::abs(soa2d->clusterSizeY(i)) / 8.)); + hBFposZP->Fill(zG, fphi); int16_t ysign = yG >= 0 ? 1 : -1; hBFposZR->Fill(zG, rG * ysign); diff --git a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc index 47bde4f171ede..aac487b0bdf71 100644 --- a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc +++ b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc @@ -3,7 +3,7 @@ // Package: SiPixelPhase1MonitorTrackSoA // Class: SiPixelPhase1MonitorTrackSoA // -/**\class SiPixelPhase1MonitorTrackSoA SiPixelPhase1MonitorTrackSoA.cc +/**\class SiPixelPhase1MonitorTrackSoA SiPixelPhase1MonitorTrackSoA.cc */ // // Author: Suvankar Roy Chowdhury diff --git a/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py b/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py index dc19a2318a08d..07915be92d413 100644 --- a/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py +++ b/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py @@ -4,13 +4,11 @@ from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1MonitorRecHitsSoA_cfi import * from Configuration.ProcessModifiers.gpu_cff import gpu -gpu.toModify(siPixelPhase1MonitorRecHitsSoA, pixelHitsSrc = "siPixelRecHitsPreSplittingSoA") - +gpu.toModify(siPixelPhase1MonitorRecHitsSoA, pixelHitsSrc = "siPixelRecHitsPreSplittingSoA") #would be obsloete if .501 is dropped monitorpixelSoASource = cms.Sequence(siPixelPhase1MonitorRecHitsSoA * siPixelPhase1MonitorTrackSoA * siPixelPhase1MonitorVertexSoA) - -#Define the sequence for GPU vs CPU validation +Define the sequence for GPU vs CPU validation #This should run:- individual monitor for the 2 collections + comparison module from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1CompareTrackSoA_cfi import * from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1CompareVertexSoA_cfi import * diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc new file mode 100644 index 0000000000000..fda418320e70a --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromCUDA.cc @@ -0,0 +1,91 @@ +#include + +#include + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +class SiPixelRecHitSoAFromCUDA : public edm::stream::EDProducer { +public: + explicit SiPixelRecHitSoAFromCUDA(const edm::ParameterSet& iConfig); + ~SiPixelRecHitSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using HMSstorage = HostProduct; + +private: + void acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; + + const edm::EDGetTokenT> hitsTokenGPU_; // CUDA hits + const edm::EDPutTokenT hitsPutTokenCPU_; + const edm::EDPutTokenT hostPutToken_; + + uint32_t nHits_; + uint32_t nMaxModules_; + + cms::cuda::host::unique_ptr store32_; + cms::cuda::host::unique_ptr store16_; + cms::cuda::host::unique_ptr hitsModuleStart_; +}; + +SiPixelRecHitSoAFromCUDA::SiPixelRecHitSoAFromCUDA(const edm::ParameterSet& iConfig) + : hitsTokenGPU_( + consumes>(iConfig.getParameter("pixelRecHitSrc"))), + hitsPutTokenCPU_(produces()), + hostPutToken_(produces()) {} + +void SiPixelRecHitSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelRecHitSoAFromCUDA::acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + cms::cuda::Product const& inputDataWrapped = iEvent.get(hitsTokenGPU_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + auto const& inputData = ctx.get(inputDataWrapped); + + nHits_ = inputData.nHits(); + LogDebug("SiPixelRecHitSoAFromCUDA") << "copying to cpu SoA" << inputData.nHits() << " Hits"; + + if (0 == nHits_) + return; + nMaxModules_ = inputData.nMaxModules(); + store32_ = inputData.store32ToHostAsync(ctx.stream()); + store16_ = inputData.store16ToHostAsync(ctx.stream()); + hitsModuleStart_ = inputData.hitsModuleStartToHostAsync(ctx.stream()); +} + +void SiPixelRecHitSoAFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& es) { + auto hmsp = std::make_unique(nMaxModules_ + 1); + std::copy(hitsModuleStart_.get(), hitsModuleStart_.get() + nMaxModules_ + 1, hmsp.get()); + + iEvent.emplace(hostPutToken_, std::move(hmsp)); + iEvent.emplace(hitsPutTokenCPU_, store32_.get(), store16_.get(), hitsModuleStart_.get(), nHits_); +} + +DEFINE_FWK_MODULE(SiPixelRecHitSoAFromCUDA); diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index b2be63a4b6216..781447c70b512 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -23,15 +23,18 @@ # convert the pixel rechits from legacy to SoA format from RecoLocalTracker.SiPixelRecHits.siPixelRecHitSoAFromLegacy_cfi import siPixelRecHitSoAFromLegacy as _siPixelRecHitsPreSplittingSoA -siPixelRecHitsPreSplittingSoA = _siPixelRecHitsPreSplittingSoA.clone(convertToLegacy=True) +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitSoAFromCUDA_cfi import siPixelRecHitSoAFromCUDA as _siPixelRecHitSoAFromCUDA + +siPixelRecHitsPreSplittingCPU = _siPixelRecHitsPreSplittingSoA.clone(convertToLegacy=True) + # phase 2 tracker modifier from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker -phase2_tracker.toModify(siPixelRecHitsPreSplittingSoA, +phase2_tracker.toModify(siPixelRecHitsPreSplittingCPU, isPhase2 = True) # modifier used to prompt patatrack pixel tracks reconstruction on cpu from Configuration.ProcessModifiers.pixelNtupletFit_cff import pixelNtupletFit pixelNtupletFit.toModify(siPixelRecHitsPreSplitting, - cpu = siPixelRecHitsPreSplittingSoA.clone() + cpu = _siPixelRecHitsPreSplittingSoA.clone(convertToLegacy=True) ) siPixelRecHitsPreSplittingTask = cms.Task( @@ -48,9 +51,19 @@ # transfer the pixel rechits to the host and convert them from SoA from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromCUDA_cfi import siPixelRecHitFromCUDA as _siPixelRecHitFromCUDA +#this is an alias for the SoA on GPU or CPU to be used for DQM +siPixelRecHitsPreSplittingSoA = SwitchProducerCUDA( + cpu = cms.EDAlias( + siPixelRecHitsPreSplittingCPU = cms.VPSet( + cms.PSet(type = cms.string("cmscudacompatCPUTraitsTrackingRecHit2DHeterogeneous")), + cms.PSet(type = cms.string("uintAsHostProduct")) + )), + cuda = _siPixelRecHitSoAFromCUDA.clone() +) + (gpu & pixelNtupletFit).toModify(siPixelRecHitsPreSplitting, cpu = cms.EDAlias( - siPixelRecHitsPreSplittingSoA = cms.VPSet( + siPixelRecHitsPreSplittingCPU = cms.VPSet( cms.PSet(type = cms.string("SiPixelRecHitedmNewDetSetVector")), cms.PSet(type = cms.string("uintAsHostProduct")) ) @@ -58,10 +71,12 @@ cuda = _siPixelRecHitFromCUDA.clone()) (gpu & pixelNtupletFit).toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( - # reconstruct the pixel rechits on the gpu + # reconstruct the pixel rechits on the gpu or on the cpu + # (normally only one of the two is run because only one is consumed from later stages) siPixelRecHitsPreSplittingCUDA, - # producing and converting on cpu - siPixelRecHitsPreSplittingSoA, + siPixelRecHitsPreSplittingCPU, # SwitchProducer wrapping an EDAlias on cpu or the converter from SoA to legacy on gpu - siPixelRecHitsPreSplittingTask.copy() + siPixelRecHitsPreSplittingTask.copy(), + # producing and converting on cpu (if needed) + siPixelRecHitsPreSplittingSoA )) diff --git a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py index 380586bba9bbc..6954b536aba1f 100644 --- a/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py +++ b/RecoPixelVertexing/Configuration/python/RecoPixelVertexing_cff.py @@ -53,6 +53,13 @@ ) ) +## GPU vs CPU validation +# force CPU vertexing to use track SoA from CPU chain and not the converted one from GPU chain +from Configuration.ProcessModifiers.gpuValidationPixel_cff import gpuValidationPixel +(pixelNtupletFit & gpu & gpuValidationPixel).toModify(pixelVerticesSoA.cpu, + pixelTrackSrc = "pixelTracksSoA@cpu" +) + (pixelNtupletFit & gpu).toReplaceWith(pixelVerticesTask, cms.Task( # build pixel vertices in SoA format on the GPU pixelVerticesCUDA, diff --git a/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py b/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py index 449d9cdfd084d..143b062eb7c9d 100644 --- a/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py +++ b/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py @@ -114,12 +114,6 @@ )) (pixelNtupletFit & ~phase2_tracker).toReplaceWith(pixelTracksTask, cms.Task( - #pixelTracksTrackingRegions, - #pixelFitterByHelixProjections, - #pixelTrackFilterByKinematics, - #pixelTracksSeedLayers, - #pixelTracksHitDoublets, - #pixelTracksHitQuadruplets, # build the pixel ntuplets and the pixel tracks in SoA format on the GPU pixelTracksSoA, # convert the pixel tracks from SoA to legacy format @@ -129,9 +123,9 @@ # "Patatrack" sequence running on GPU (or CPU if not available) from Configuration.ProcessModifiers.gpu_cff import gpu + (pixelNtupletFit & gpu).toModify(pixelTracksSoA.cpu, - pixelRecHitSrc = "siPixelRecHitsPreSplittingSoA", -) + pixelRecHitSrc = "siPixelRecHitsPreSplittingSoA") # build the pixel ntuplets and pixel tracks in SoA format on the GPU pixelTracksCUDA = _pixelTracksCUDA.clone( @@ -159,3 +153,10 @@ # transfer the pixel tracks in SoA format to the CPU, and convert them to legacy format pixelTracksTask.copy() )) + +## GPU vs CPU validation +# force CPU vertexing to use hit SoA from CPU chain and not the converted one from GPU chain +from Configuration.ProcessModifiers.gpuValidationPixel_cff import gpuValidationPixel +(pixelNtupletFit & gpu & gpuValidationPixel).toModify(pixelTracksSoA.cpu, + pixelRecHitSrc = "siPixelRecHitsPreSplittingSoA@cpu" + ) From f8cb46e1c3c8db47e3209cdc67ca777823c1e629 Mon Sep 17 00:00:00 2001 From: AdrianoDee Date: Fri, 29 Apr 2022 12:28:26 +0200 Subject: [PATCH 2/2] Clean up --- .../plugins/SiPixelPhase1MonitorRecHitsSoA.cc | 4 ++-- .../plugins/SiPixelPhase1MonitorTrackSoA.cc | 2 +- .../python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc index df766e9156cf8..f1d4894dc35db 100644 --- a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc +++ b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorRecHitsSoA.cc @@ -3,7 +3,7 @@ // Package: SiPixelPhase1MonitorRecHitsSoA // Class: SiPixelPhase1MonitorRecHitsSoA // -/**\class SiPixelPhase1MonitorRecHitsSoA SiPixelPhase1MonitorRecHitsSoA.cc +/**\class SiPixelPhase1MonitorRecHitsSoA SiPixelPhase1MonitorRecHitsSoA.cc */ // // Author: Suvankar Roy Chowdhury, Alessandro Rossi @@ -97,6 +97,7 @@ void SiPixelPhase1MonitorRecHitsSoA::analyze(const edm::Event& iEvent, const edm } auto const& rhsoa = *rhsoaHandle; const TrackingRecHit2DSOAView* soa2d = rhsoa.view(); + uint32_t nHits_ = soa2d->nHits(); hnHits->Fill(nHits_); auto detIds = tkGeom_->detUnitIds(); @@ -110,7 +111,6 @@ void SiPixelPhase1MonitorRecHitsSoA::analyze(const edm::Event& iEvent, const edm uint32_t charge = soa2d->charge(i); int16_t sizeX = std::ceil(float(std::abs(soa2d->clusterSizeX(i)) / 8.)); int16_t sizeY = std::ceil(float(std::abs(soa2d->clusterSizeY(i)) / 8.)); - hBFposZP->Fill(zG, fphi); int16_t ysign = yG >= 0 ? 1 : -1; hBFposZR->Fill(zG, rG * ysign); diff --git a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc index aac487b0bdf71..47bde4f171ede 100644 --- a/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc +++ b/DQM/SiPixelPhase1Heterogeneous/plugins/SiPixelPhase1MonitorTrackSoA.cc @@ -3,7 +3,7 @@ // Package: SiPixelPhase1MonitorTrackSoA // Class: SiPixelPhase1MonitorTrackSoA // -/**\class SiPixelPhase1MonitorTrackSoA SiPixelPhase1MonitorTrackSoA.cc +/**\class SiPixelPhase1MonitorTrackSoA SiPixelPhase1MonitorTrackSoA.cc */ // // Author: Suvankar Roy Chowdhury diff --git a/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py b/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py index 07915be92d413..dc19a2318a08d 100644 --- a/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py +++ b/DQM/SiPixelPhase1Heterogeneous/python/SiPixelPhase1HeterogenousDQM_FirstStep_cff.py @@ -4,11 +4,13 @@ from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1MonitorRecHitsSoA_cfi import * from Configuration.ProcessModifiers.gpu_cff import gpu -gpu.toModify(siPixelPhase1MonitorRecHitsSoA, pixelHitsSrc = "siPixelRecHitsPreSplittingSoA") #would be obsloete if .501 is dropped +gpu.toModify(siPixelPhase1MonitorRecHitsSoA, pixelHitsSrc = "siPixelRecHitsPreSplittingSoA") + monitorpixelSoASource = cms.Sequence(siPixelPhase1MonitorRecHitsSoA * siPixelPhase1MonitorTrackSoA * siPixelPhase1MonitorVertexSoA) -Define the sequence for GPU vs CPU validation + +#Define the sequence for GPU vs CPU validation #This should run:- individual monitor for the 2 collections + comparison module from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1CompareTrackSoA_cfi import * from DQM.SiPixelPhase1Heterogeneous.siPixelPhase1CompareVertexSoA_cfi import *