Skip to content

Commit

Permalink
Pixel doublets on GPU (cms-sw#118)
Browse files Browse the repository at this point in the history
Pixel doublets (actually CACells) are created on GPU and fed to CA.
The whole workflow up to quadruplets candidates is now fully on GPU.
  • Loading branch information
VinInn authored and fwyzard committed Aug 8, 2018
1 parent c2aba96 commit abf1a46
Show file tree
Hide file tree
Showing 10 changed files with 510 additions and 986 deletions.
14 changes: 7 additions & 7 deletions HeterogeneousCore/CUDAUtilities/interface/GPUVecArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#ifndef GPU_VECARRAY_H_
#define GPU_VECARRAY_H_


#include <cuda.h>
#include <cuda_runtime.h>

Expand All @@ -24,8 +23,6 @@ template <class T, int maxSize> struct VecArray {
}
}



template <class... Ts> constexpr int emplace_back_unsafe(Ts &&... args) {
auto previousSize = m_size;
m_size++;
Expand All @@ -38,9 +35,7 @@ template <class T, int maxSize> struct VecArray {
}
}


__inline__ constexpr T & back() const {

if (m_size > 0) {
return m_data[m_size - 1];
} else
Expand Down Expand Up @@ -95,10 +90,15 @@ template <class T, int maxSize> struct VecArray {

__inline__ constexpr void resize(int size) { m_size = size; }

__inline__ constexpr bool empty() const { return 0 == m_size; }

__inline__ constexpr bool full() const { return maxSize == m_size; }

int m_size;
int m_size = 0;

T m_data[maxSize];
};

}
#endif

#endif // GPU_VECARRAY_H_
1 change: 1 addition & 0 deletions RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ namespace pixelgpudetails {
#endif
auto nhits = input.nClusters;
cpu_ = std::make_unique<HitsOnCPU>(nhits);
cudaCheck(cudaMemcpyAsync(cpu_->detInd.data(), gpu_.detInd_d, nhits*sizeof(int16_t), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(cpu_->charge.data(), gpu_.charge_d, nhits*sizeof(int32_t), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(cpu_->xl.data(), gpu_.xl_d, nhits*sizeof(float), cudaMemcpyDefault, stream.id()));
cudaCheck(cudaMemcpyAsync(cpu_->yl.data(), gpu_.yl_d, nhits*sizeof(float), cudaMemcpyDefault, stream.id()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
HitsOnCPU() = default;

explicit HitsOnCPU(uint32_t nhits) :
detInd(nhits),
charge(nhits),
xl(nhits),
yl(nhits),
Expand All @@ -50,6 +51,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
{ }

uint32_t hitsModuleStart[2001];
std::vector<uint16_t, CUDAHostAllocator<uint16_t>> detInd;
std::vector<int32_t, CUDAHostAllocator<int32_t>> charge;
std::vector<float, CUDAHostAllocator<float>> xl, yl;
std::vector<float, CUDAHostAllocator<float>> xe, ye;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,45 +98,42 @@ void CAHitNtupletHeterogeneousEDProducer::beginStreamGPUCuda(
void CAHitNtupletHeterogeneousEDProducer::acquireGPUCuda(
const edm::HeterogeneousEvent &iEvent, const edm::EventSetup &iSetup,
cuda::stream_t<> &cudaStream) {

seedingHitSets_ = std::make_unique<RegionsSeedingHitSets>();


// FIXME: move directly to region or similar...
edm::Handle<IntermediateHitDoublets> hdoublets;
iEvent.getByToken(doubletToken_, hdoublets);
const auto &regionDoublets = *hdoublets;
assert(regionDoublets.regionSize()<=1);

const SeedingLayerSetsHits &seedingLayerHits =
regionDoublets.seedingLayerHits();
if (seedingLayerHits.numberOfLayersInSet() <
CAHitQuadrupletGeneratorGPU::minLayers) {
throw cms::Exception("LogicError")
<< "CAHitNtupletHeterogeneousEDProducer expects "
"SeedingLayerSetsHits::numberOfLayersInSet() to be >= "
<< CAHitQuadrupletGeneratorGPU::minLayers << ", got "
<< seedingLayerHits.numberOfLayersInSet()
<< ". This is likely caused by a configuration error of this module, "
"HitPairEDProducer, or SeedingLayersEDProducer.";
if (regionDoublets.empty()) {
emptyRegionDoublets = true;
return;
}

seedingHitSets_ = std::make_unique<RegionsSeedingHitSets>();
const TrackingRegion &region = (*regionDoublets.begin()).region();


edm::Handle<siPixelRecHitsHeterogeneousProduct::GPUProduct> gh;
iEvent.getByToken<siPixelRecHitsHeterogeneousProduct::HeterogeneousPixelRecHit>(tGpuHits, gh);
auto const & gHits = *gh;
// auto nhits = gHits.nHits;

GPUGenerator_.buildDoublets(gHits,0.06f,cudaStream.id());
// move inside hitNtuplets???
GPUGenerator_.buildDoublets(gHits,cudaStream.id());

if (regionDoublets.empty()) {
emptyRegionDoublets = true;
} else {
seedingHitSets_->reserve(regionDoublets.regionSize(), localRA_.upper());
GPUGenerator_.initEvent(iEvent.event(), iSetup);
seedingHitSets_->reserve(regionDoublets.regionSize(), localRA_.upper());
GPUGenerator_.initEvent(iEvent.event(), iSetup);

LogDebug("CAHitNtupletHeterogeneousEDProducer")
LogDebug("CAHitNtupletHeterogeneousEDProducer")
<< "Creating ntuplets for " << regionDoublets.regionSize()
<< " regions, and " << regionDoublets.layerPairsSize()
<< " layer pairs";

GPUGenerator_.hitNtuplets(regionDoublets, iSetup, seedingLayerHits, cudaStream.id());
}
GPUGenerator_.hitNtuplets(region, gHits, iSetup, cudaStream.id());

}

void CAHitNtupletHeterogeneousEDProducer::produceGPUCuda(
Expand All @@ -147,16 +144,18 @@ void CAHitNtupletHeterogeneousEDProducer::produceGPUCuda(
edm::Handle<IntermediateHitDoublets> hdoublets;
iEvent.getByToken(doubletToken_, hdoublets);
const auto &regionDoublets = *hdoublets;
const SeedingLayerSetsHits &seedingLayerHits =
regionDoublets.seedingLayerHits();
int index = 0;

edm::Handle<HeterogeneousProduct> gh;
iEvent.getByToken(tGpuHits, gh);
auto const & rechits = gh->get<siPixelRecHitsHeterogeneousProduct::HeterogeneousPixelRecHit>().getProduct<HeterogeneousDevice::kCPU>();

std::vector<OrderedHitSeeds> ntuplets(regionDoublets.regionSize());
for (auto &ntuplet : ntuplets)
ntuplet.reserve(localRA_.upper());
for (auto &ntuplet : ntuplets) ntuplet.reserve(localRA_.upper());
int index = 0;
for (const auto &regionLayerPairs : regionDoublets) {
const TrackingRegion &region = regionLayerPairs.region();
auto seedingHitSetsFiller = seedingHitSets_->beginRegion(&region);
GPUGenerator_.fillResults(regionDoublets, ntuplets, iSetup, seedingLayerHits, cudaStream.id());
GPUGenerator_.fillResults(region, rechits.collection, ntuplets, iSetup, cudaStream.id());
fillNtuplets(seedingHitSetsFiller, ntuplets[index]);
ntuplets[index].clear();
index++;
Expand Down Expand Up @@ -188,6 +187,7 @@ void CAHitNtupletHeterogeneousEDProducer::produceCPU(
iEvent.put(std::move(seedingHitSets));
return;
}

seedingHitSets->reserve(regionDoublets.regionSize(), localRA_.upper());
CPUGenerator_.initEvent(iEvent.event(), iSetup);

Expand Down
Loading

0 comments on commit abf1a46

Please sign in to comment.