Skip to content

Commit

Permalink
Clean up the pixel local reconstruction code (#593)
Browse files Browse the repository at this point in the history
Address the pixel local reconstruction review comments.

General clean up of the pixel local reconstruction code:
  - remove commented out and obsolete code and data members
  - use named constants more consistently
  - update variable names to follow the coding rules and for better consistency
  - use member initializer lists in the constructors
  - allow `if constexpr` in CUDA code
  - use `std::size` instead of hardcoding the array size
  - convert iterator-based loops to range-based loops
  - replace `cout` and `printf` with `LogDebug` or `LogWarning`
  - use put tokens
  - reorganise the auto-generated cfi files and use them more consistently
  - adjust code after rearranging an `#ifdef GPU_DEBUG` block
  - apply code formatting
  - other minor changes

Improve comments:
  - improve comments and remove obsolete ones
  - clarify comments and types regarding `HostProduct`
  - update comments about `GPU_SMALL_EVENTS` being kept for testing purposes
  - add notes about the original cpu code

Reuse some more common code:
  - move common pixel cluster code to `PixelClusterizerBase`
  - extend the `SiPixelCluster` constructor

Rename classes and modules for better consistency:
  - remove the `TrackingRecHit2DCUDA.h` and `gpuClusteringConstants.h` forwarding headers
  - rename `PixelRecHits` to `PixelRecHitGPUKernel`
  - rename SiPixelRecHitFromSOA to SiPixelRecHitFromCUDA
  - rename `siPixelClustersCUDAPreSplitting` to `siPixelClustersPreSplittingCUDA`
  - rename `siPixelRecHitsCUDAPreSplitting` to `siPixelRecHitsPreSplittingCUDA`
  - rename `siPixelRecHitsLegacyPreSplitting` to `siPixelRecHitsPreSplittingLegacy`
  - rename `siPixelRecHitHostSoA` to `siPixelRecHitSoAFromLegacy`

Re-apply changes from cms-sw#29805 that were lost in the Patatrack branch.
  • Loading branch information
fwyzard committed Jan 15, 2021
1 parent fa74c72 commit 3a9e550
Show file tree
Hide file tree
Showing 13 changed files with 41 additions and 48 deletions.
2 changes: 2 additions & 0 deletions CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ class TrackSoAT {
namespace pixelTrack {

#ifdef GPU_SMALL_EVENTS
// kept for testing and debugging
constexpr uint32_t maxNumber() { return 2 * 1024; }
#else
// tested on MC events with 55-75 pileup events
constexpr uint32_t maxNumber() { return 32 * 1024; }
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ def customizePixelTracksSoAonCPU(process):
# ensure the same results when running on GPU (which supports only the 'HLT' payload) and CPU
process.siPixelClustersPreSplitting.cpu.payloadType = cms.string('HLT')

from RecoLocalTracker.SiPixelRecHits.siPixelRecHitHostSoA_cfi import siPixelRecHitHostSoA
process.siPixelRecHitsPreSplitting = siPixelRecHitHostSoA.clone(
from RecoLocalTracker.SiPixelRecHits.siPixelRecHitSoAFromLegacy_cfi import siPixelRecHitSoAFromLegacy
process.siPixelRecHitsPreSplitting = siPixelRecHitSoAFromLegacy.clone(
convertToLegacy = True
)

Expand Down Expand Up @@ -54,8 +54,8 @@ def customizePixelTracksSoAonCPUForProfiling(process):
process.MessageLogger.cerr.FwkReport.reportEvery = 100

process = customizePixelTracksSoAonCPU(process)
process.siPixelRecHitHostSoA.convertToLegacy = False
process.siPixelRecHitSoAFromLegacy.convertToLegacy = False

process.TkSoA = cms.Path(process.offlineBeamSpot+process.siPixelDigis+process.siPixelClustersPreSplitting+process.siPixelRecHitHostSoA+process.pixelTrackSoA+process.pixelVertexSoA)
process.TkSoA = cms.Path(process.offlineBeamSpot + process.siPixelDigis + process.siPixelClustersPreSplitting + process.siPixelRecHitSoAFromLegacy + process.pixelTrackSoA + process.pixelVertexSoA)
process.schedule = cms.Schedule(process.TkSoA)
return process
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ class PixelTrackProducerFromSoA : public edm::global::EDProducer<> {

static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);

// using HitModuleStart = std::array<uint32_t,gpuClustering::MaxNumModules + 1>;
using HMSstorage = HostProduct<unsigned int[]>;
// using HitModuleStart = std::array<uint32_t, gpuClustering::maxNumModules + 1>;
using HMSstorage = HostProduct<uint32_t[]>;

private:
void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override;
Expand Down Expand Up @@ -77,7 +77,7 @@ void PixelTrackProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions
edm::ParameterSetDescription desc;
desc.add<edm::InputTag>("beamSpot", edm::InputTag("offlineBeamSpot"));
desc.add<edm::InputTag>("trackSrc", edm::InputTag("pixelTrackSoA"));
desc.add<edm::InputTag>("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsLegacyPreSplitting"));
desc.add<edm::InputTag>("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy"));
desc.add<int>("minNumberOfHits", 0);

descriptions.addWithDefaultLabel(desc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include <cuda_runtime.h>

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
Expand Down
38 changes: 17 additions & 21 deletions RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,45 @@
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"

// #define ONLY_PHICUT
//#define ONLY_PHICUT

namespace CAConstants {

// constants
#ifndef ONLY_PHICUT
#ifdef ONLY_PHICUT
constexpr uint32_t maxNumberOfTuples() { return 48 * 1024; }
constexpr uint32_t maxNumberOfDoublets() { return 2 * 1024 * 1024; }
constexpr uint32_t maxCellsPerHit() { return 8 * 128; }
#else
#ifdef GPU_SMALL_EVENTS
// kept for testing and debugging
constexpr uint32_t maxNumberOfTuples() { return 3 * 1024; }
constexpr uint32_t maxNumberOfDoublets() { return 128 * 1024; }
constexpr uint32_t maxCellsPerHit() { return 128 / 2; }
#else
// tested on MC events with 55-75 pileup events
constexpr uint32_t maxNumberOfTuples() { return 24 * 1024; }
#endif
#else
constexpr uint32_t maxNumberOfTuples() { return 48 * 1024; }
#endif
constexpr uint32_t maxNumberOfQuadruplets() { return maxNumberOfTuples(); }
#ifndef ONLY_PHICUT
#ifndef GPU_SMALL_EVENTS
constexpr uint32_t maxNumberOfDoublets() { return 512 * 1024; }
constexpr uint32_t maxCellsPerHit() { return 128; }
#else
constexpr uint32_t maxNumberOfDoublets() { return 128 * 1024; }
constexpr uint32_t maxCellsPerHit() { return 128 / 2; }
#endif
#else
constexpr uint32_t maxNumberOfDoublets() { return 2 * 1024 * 1024; }
constexpr uint32_t maxCellsPerHit() { return 8 * 128; }
#endif
#endif // ONLY_PHICUT
constexpr uint32_t maxNumOfActiveDoublets() { return maxNumberOfDoublets() / 8; }
constexpr uint32_t maxNumberOfQuadruplets() { return maxNumberOfTuples(); }

constexpr uint32_t maxNumberOfLayerPairs() { return 20; }
constexpr uint32_t maxNumberOfLayers() { return 10; }
constexpr uint32_t maxTuples() { return maxNumberOfTuples(); }

// types
using hindex_type = uint32_t; // FIXME from siPixelRecHitsHeterogeneousProduct
using tindex_type = uint16_t; // for tuples
using tindex_type = uint16_t; // for tuples

#ifndef ONLY_PHICUT
using CellNeighbors = cms::cuda::VecArray<uint32_t, 36>;
using CellTracks = cms::cuda::VecArray<tindex_type, 48>;
#else
#ifdef ONLY_PHICUT
using CellNeighbors = cms::cuda::VecArray<uint32_t, 64>;
using CellTracks = cms::cuda::VecArray<tindex_type, 64>;
#else
using CellNeighbors = cms::cuda::VecArray<uint32_t, 36>;
using CellTracks = cms::cuda::VecArray<tindex_type, 48>;
#endif

using CellNeighborsVector = cms::cuda::SimpleVector<CellNeighbors>;
Expand Down
4 changes: 2 additions & 2 deletions RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

#include "CAHitNtupletGeneratorOnGPU.h"
#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"

class CAHitNtupletCUDA : public edm::global::EDProducer<> {
public:
Expand Down Expand Up @@ -58,7 +58,7 @@ void CAHitNtupletCUDA::fillDescriptions(edm::ConfigurationDescriptions& descript
edm::ParameterSetDescription desc;

desc.add<bool>("onGPU", true);
desc.add<edm::InputTag>("pixelRecHitSrc", edm::InputTag("siPixelRecHitsCUDAPreSplitting"));
desc.add<edm::InputTag>("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA"));

CAHitNtupletGeneratorOnGPU::fillDescriptions(desc);
auto label = "caHitNtupletCUDA";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "CAHitNtupletGeneratorKernels.h"

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

#include "CAHitNtupletGeneratorKernels.h"

template <>
#ifdef __CUDACC__
void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(cudaStream_t stream) {
Expand All @@ -25,11 +25,7 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) {
device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1;
device_nCells_ = (uint32_t*)(device_storage_.get() + 2);

if
#ifndef __CUDACC__
constexpr
#endif
(std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
if constexpr (std::is_same<Traits, cms::cudacompat::GPUTraits>::value) {
cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream));
} else {
*device_nCells_ = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h

#include <cuda_runtime.h>
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"

#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h"
Expand Down
5 changes: 2 additions & 3 deletions RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <cuda_runtime.h>

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
Expand Down Expand Up @@ -293,8 +293,7 @@ class GPUCACell {
assert(tmpNtuplet.size() <= 4);

bool last = true;
for (int j = 0; j < outerNeighbors().size(); ++j) {
auto otherCell = outerNeighbors()[j];
for (unsigned int otherCell : outerNeighbors()) {
if (cells[otherCell].theDoubletId < 0)
continue; // killed by earlyFishbone
last = false;
Expand Down
2 changes: 1 addition & 1 deletion RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define RecoPixelVertexing_PixelTrackFitting_plugins_HelixFitOnGPU_h

#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h"

#include "CAConstants.h"
Expand Down
2 changes: 1 addition & 1 deletion RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include <cuda_runtime.h>

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <cstdio>
#include <limits>

#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h"
#include "DataFormats/Math/interface/approx_atan2.h"
#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
Expand Down Expand Up @@ -105,7 +105,7 @@ namespace gpuPixelDoublets {

// found hit corresponding to our cuda thread, now do the job
auto mi = hh.detectorIndex(i);
if (mi > 2000)
if (mi > gpuClustering::maxNumModules)
continue; // invalid

/* maybe clever, not effective when zoCut is on
Expand Down Expand Up @@ -201,7 +201,7 @@ namespace gpuPixelDoublets {
assert(oi >= offsets[outer]);
assert(oi < offsets[outer + 1]);
auto mo = hh.detectorIndex(oi);
if (mo > 2000)
if (mo > gpuClustering::maxNumModules)
continue; // invalid

if (doZ0Cut && z0cutoff(oi))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def customizePixelOnlyForProfilingGPUOnly(process):
# tracks and vertices on the CPU in SoA format, without conversion to legacy format.
def customizePixelOnlyForProfilingGPUWithHostCopy(process):

#? process.siPixelRecHitHostSoA.convertToLegacy = False
#? process.siPixelRecHitSoAFromLegacy.convertToLegacy = False

process.consumer = cms.EDAnalyzer("GenericConsumer",
eventProducts = cms.untracked.vstring('pixelTrackSoA', 'pixelVertexSoA')
Expand Down

0 comments on commit 3a9e550

Please sign in to comment.