diff --git a/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py
new file mode 100644
index 0000000000000..ebdb7d9e6981a
--- /dev/null
+++ b/Configuration/ProcessModifiers/python/alpakaValidationPixel_cff.py
@@ -0,0 +1,6 @@
+import FWCore.ParameterSet.Config as cms
+
+# This modifier chain is for turning on DQM modules used for alpaka device/host validation for pixels
+
+alpakaValidationPixel = cms.Modifier()
+
diff --git a/Configuration/ProcessModifiers/python/alpakaValidation_cff.py b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py
new file mode 100644
index 0000000000000..3399bdda7c4df
--- /dev/null
+++ b/Configuration/ProcessModifiers/python/alpakaValidation_cff.py
@@ -0,0 +1,11 @@
+import FWCore.ParameterSet.Config as cms
+
+from Configuration.ProcessModifiers.alpaka_cff import *
+from Configuration.ProcessModifiers.alpakaValidationPixel_cff import *
+
+# This modifier chain is for turning on DQM modules used for alpaka device/host validation
+
+alpakaValidation =  cms.ModifierChain(
+    alpaka,
+    alpakaValidationPixel
+)
diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
index 56da630d8b9c8..c6fd188a22c89 100644
--- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
+++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
@@ -896,6 +896,7 @@ def setup_(self, step, stepName, stepDict, k, properties):
 #  - HLT on CPU
 #  - Pixel-only reconstruction on CPU, with DQM and validation
 #  - harvesting
+
 upgradeWFs['PatatrackPixelOnlyCPU'] = PatatrackWorkflow(
     digi = {
         # the HLT menu is already set up for using GPUs if available and if the "gpu" modifier is enabled
@@ -1062,8 +1063,7 @@ def setup_(self, step, stepName, stepDict, k, properties):
 upgradeWFs['PatatrackECALOnlyAlpaka'] = PatatrackWorkflow(
     digi = {
         # customize the ECAL Local Reco part of the HLT menu for Alpaka
-        '--procModifiers': 'alpaka',
-        '--customise' : 'HLTrigger/Configuration/customizeHLTforAlpaka.customizeHLTforAlpakaEcalLocalReco'
+        '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka 
     },
     reco = {
         '-s': 'RAW2DIGI:RawToDigi_ecalOnly,RECO:reconstruction_ecalOnly,VALIDATION:@ecalOnlyValidation,DQM:@ecalOnly',
@@ -1536,6 +1536,53 @@ def setup_(self, step, stepName, stepDict, k, properties):
     offset = 0.597,
 )
 
+
+# Alpaka workflows
+
+upgradeWFs['PatatrackPixelOnlyAlpaka'] = PatatrackWorkflow(
+    digi = {
+        '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka 
+    },
+    reco = {
+        '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
+        '--procModifiers': 'alpaka'
+    },
+    harvest = {
+        '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'
+    },
+    suffix = 'Patatrack_PixelOnlyAlpaka',
+    offset = 0.402,
+)
+
+upgradeWFs['PatatrackPixelOnlyAlpakaValidation'] = PatatrackWorkflow(
+    digi = {
+        '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka 
+    },
+    reco = {
+        '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
+        '--procModifiers': 'alpakaValidation'
+    },
+    harvest = {
+        '-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'
+    },
+    suffix = 'Patatrack_PixelOnlyAlpaka_Validation',
+    offset = 0.403,
+)
+
+upgradeWFs['PatatrackPixelOnlyAlpakaProfiling'] = PatatrackWorkflow(
+    digi = {
+        '--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka 
+    },
+    reco = {
+        '-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly',
+        '--procModifiers': 'alpaka',
+        '--customise' : 'RecoTracker/Configuration/customizePixelOnlyForProfiling.customizePixelOnlyForProfilingGPUOnly'
+    },
+    harvest = None,
+    suffix = 'Patatrack_PixelOnlyAlpaka_Profiling',
+    offset = 0.404,
+)
+
 # end of Patatrack workflows
 
 class UpgradeWorkflow_ProdLike(UpgradeWorkflow):
@@ -2741,7 +2788,7 @@ def condition(self, fragment, stepList, key, hasHarvest):
     },
     '2022HI' : {
         'Geom' : 'DB:Extended',
-        'GT':'auto:phase1_2022_realistic_hi', 
+        'GT':'auto:phase1_2022_realistic_hi',
         'HLTmenu': '@fake2',
         'Era':'Run3_pp_on_PbPb',
         'BeamSpot': 'DBrealistic',
@@ -2749,7 +2796,7 @@ def condition(self, fragment, stepList, key, hasHarvest):
     },
     '2022HIRP' : {
         'Geom' : 'DB:Extended',
-        'GT':'auto:phase1_2022_realistic_hi', 
+        'GT':'auto:phase1_2022_realistic_hi',
         'HLTmenu': '@fake2',
         'Era':'Run3_pp_on_PbPb_approxSiStripClusters',
         'BeamSpot': 'DBrealistic',
@@ -2757,7 +2804,7 @@ def condition(self, fragment, stepList, key, hasHarvest):
     },
     '2023HI' : {
         'Geom' : 'DB:Extended',
-        'GT':'auto:phase1_2023_realistic_hi', 
+        'GT':'auto:phase1_2023_realistic_hi',
         'HLTmenu': '@fake2',
         'Era':'Run3_pp_on_PbPb',
         'BeamSpot': 'DBrealistic',
@@ -2765,7 +2812,7 @@ def condition(self, fragment, stepList, key, hasHarvest):
     },
     '2023HIRP' : {
         'Geom' : 'DB:Extended',
-        'GT':'auto:phase1_2023_realistic_hi', 
+        'GT':'auto:phase1_2023_realistic_hi',
         'HLTmenu': '@fake2',
         'Era':'Run3_pp_on_PbPb_approxSiStripClusters',
         'BeamSpot': 'DBrealistic',
diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc
index 65a6dc2802831..025bdfd988ea6 100644
--- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc
+++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTrackSoAAlpaka.cc
@@ -176,9 +176,9 @@ void SiPixelCompareTrackSoAAlpaka<T>::analyze(const edm::Event& iEvent, const ed
 
     float ptHost = tsoaHost.view()[it].pt();
     float etaHost = tsoaHost.view()[it].eta();
-    float phiHost = helper::phi(tsoaHost.view(), it);
-    float zipHost = helper::zip(tsoaHost.view(), it);
-    float tipHost = helper::tip(tsoaHost.view(), it);
+    float phiHost = reco::phi(tsoaHost.view(), it);
+    float zipHost = reco::zip(tsoaHost.view(), it);
+    float tipHost = reco::tip(tsoaHost.view(), it);
 
     if (!(ptHost > 0.))
       continue;
@@ -193,7 +193,7 @@ void SiPixelCompareTrackSoAAlpaka<T>::analyze(const edm::Event& iEvent, const ed
 
     for (auto gid : looseTrkidxDevice) {
       float etaDevice = tsoaDevice.view()[gid].eta();
-      float phiDevice = helper::phi(tsoaDevice.view(), gid);
+      float phiDevice = reco::phi(tsoaDevice.view(), gid);
       float dr2 = reco::deltaR2(etaHost, phiHost, etaDevice, phiDevice);
       if (dr2 > dr2cut_)
         continue;  // this is arbitrary
@@ -210,22 +210,22 @@ void SiPixelCompareTrackSoAAlpaka<T>::analyze(const edm::Event& iEvent, const ed
     nLooseAndAboveTracksHost_matchedDevice++;
 
     hchi2_->Fill(tsoaHost.view()[it].chi2(), tsoaDevice.view()[closestTkidx].chi2());
-    hCharge_->Fill(reco::charge<T>(tsoaHost.view(), it), reco::charge<T>(tsoaDevice.view(), closestTkidx));
+    hCharge_->Fill(reco::charge(tsoaHost.view(), it), reco::charge(tsoaDevice.view(), closestTkidx));
     hnHits_->Fill(helper::nHits(tsoaHost.view(), it), helper::nHits(tsoaDevice.view(), closestTkidx));
     hnLayers_->Fill(tsoaHost.view()[it].nLayers(), tsoaDevice.view()[closestTkidx].nLayers());
     hpt_->Fill(tsoaHost.view()[it].pt(), tsoaDevice.view()[closestTkidx].pt());
     hptLogLog_->Fill(tsoaHost.view()[it].pt(), tsoaDevice.view()[closestTkidx].pt());
     heta_->Fill(etaHost, tsoaDevice.view()[closestTkidx].eta());
-    hphi_->Fill(phiHost, helper::phi(tsoaDevice.view(), closestTkidx));
-    hz_->Fill(zipHost, helper::zip(tsoaDevice.view(), closestTkidx));
-    htip_->Fill(tipHost, helper::tip(tsoaDevice.view(), closestTkidx));
+    hphi_->Fill(phiHost, reco::phi(tsoaDevice.view(), closestTkidx));
+    hz_->Fill(zipHost, reco::zip(tsoaDevice.view(), closestTkidx));
+    htip_->Fill(tipHost, reco::tip(tsoaDevice.view(), closestTkidx));
     hptdiffMatched_->Fill(ptHost - tsoaDevice.view()[closestTkidx].pt());
-    hCurvdiffMatched_->Fill((reco::charge<T>(tsoaHost.view(), it) / tsoaHost.view()[it].pt()) -
-                            (reco::charge<T>(tsoaDevice.view(), closestTkidx) / tsoaDevice.view()[closestTkidx].pt()));
+    hCurvdiffMatched_->Fill((reco::charge(tsoaHost.view(), it) / tsoaHost.view()[it].pt()) -
+                            (reco::charge(tsoaDevice.view(), closestTkidx) / tsoaDevice.view()[closestTkidx].pt()));
     hetadiffMatched_->Fill(etaHost - tsoaDevice.view()[closestTkidx].eta());
-    hphidiffMatched_->Fill(reco::deltaPhi(phiHost, helper::phi(tsoaDevice.view(), closestTkidx)));
-    hzdiffMatched_->Fill(zipHost - helper::zip(tsoaDevice.view(), closestTkidx));
-    htipdiffMatched_->Fill(tipHost - helper::tip(tsoaDevice.view(), closestTkidx));
+    hphidiffMatched_->Fill(reco::deltaPhi(phiHost, reco::phi(tsoaDevice.view(), closestTkidx)));
+    hzdiffMatched_->Fill(zipHost - reco::zip(tsoaDevice.view(), closestTkidx));
+    htipdiffMatched_->Fill(tipHost - reco::tip(tsoaDevice.view(), closestTkidx));
     hpt_eta_tkAllHostMatched_->Fill(etaHost, tsoaHost.view()[it].pt());  //matched to gpu
     hphi_z_tkAllHostMatched_->Fill(etaHost, zipHost);
   }
diff --git a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py
index dfb83708c95cf..95245a3fea968 100644
--- a/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py
+++ b/DQM/SiPixelHeterogeneous/python/SiPixelHeterogenousDQM_FirstStep_cff.py
@@ -7,20 +7,35 @@
 from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoA_cfi import *
 from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoA_cfi import *
 from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoA_cfi import *
+# Alpaka Modules
+from Configuration.ProcessModifiers.alpaka_cff import alpaka
+from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase1MonitorTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase2MonitorTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelHIonPhase1MonitorTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelMonitorVertexSoAAlpaka_cfi import *
 
 # Run-3 sequence
 monitorpixelSoASource = cms.Sequence(siPixelPhase1MonitorRecHitsSoA * siPixelPhase1MonitorTrackSoA * siPixelMonitorVertexSoA)
-
+# Run-3 Alpaka sequence 
+monitorpixelSoASourceAlpaka = cms.Sequence(siPixelPhase1MonitorRecHitsSoAAlpaka * siPixelPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka)
+alpaka.toReplaceWith(monitorpixelSoASource, monitorpixelSoASourceAlpaka)
 # Phase-2 sequence
 from Configuration.Eras.Modifier_phase2_tracker_cff import phase2_tracker
 _monitorpixelSoARecHitsSource = cms.Sequence(siPixelPhase2MonitorRecHitsSoA * siPixelPhase2MonitorTrackSoA * siPixelMonitorVertexSoA)
-phase2_tracker.toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource)
+(phase2_tracker & ~alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSource)
+_monitorpixelSoARecHitsSourceAlpaka = cms.Sequence(siPixelPhase2MonitorRecHitsSoAAlpaka * siPixelPhase2MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka)
+(phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceAlpaka)
 
 # HIon Phase 1 sequence
 from Configuration.ProcessModifiers.pp_on_AA_cff import pp_on_AA
 
 _monitorpixelSoARecHitsSourceHIon = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoA * siPixelHIonPhase1MonitorTrackSoA * siPixelMonitorVertexSoA)
 (pp_on_AA & ~phase2_tracker).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIon)
+_monitorpixelSoARecHitsSourceHIonAlpaka = cms.Sequence(siPixelHIonPhase1MonitorRecHitsSoAAlpaka * siPixelHIonPhase1MonitorTrackSoAAlpaka * siPixelMonitorVertexSoAAlpaka)
+(pp_on_AA & ~phase2_tracker & alpaka).toReplaceWith(monitorpixelSoASource, _monitorpixelSoARecHitsSourceHIonAlpaka)
 
 #Define the sequence for GPU vs CPU validation
 #This should run:- individual monitor for the 2 collections + comparison module
@@ -33,6 +48,14 @@
 from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoA_cfi import *
 from DQM.SiPixelHeterogeneous.siPixelPhase1RawDataErrorComparator_cfi import *
 from DQM.SiPixelPhase1Common.SiPixelPhase1RawData_cfi import *
+#Alpaka
+from DQM.SiPixelHeterogeneous.siPixelPhase1CompareRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase2CompareRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareRecHitsSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase1CompareTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelPhase2CompareTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelHIonPhase1CompareTrackSoAAlpaka_cfi import *
+from DQM.SiPixelHeterogeneous.siPixelCompareVertexSoAAlpaka_cfi import *
 
 # digi errors
 SiPixelPhase1RawDataConfForCPU = copy.deepcopy(SiPixelPhase1RawDataConf)
@@ -126,6 +149,43 @@
   topFolderName = 'SiPixelHeterogeneous/PixelVertexSoAGPU',
 )
 
+### Alpaka
+
+# PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync')
+siPixelRecHitsSoAMonitorSerial = siPixelPhase1MonitorRecHitsSoAAlpaka.clone(
+    pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpakaSerial' ),
+    TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsSerial' )
+)
+
+# PixelRecHits: monitor of Device product (Alpaka backend: '')
+siPixelRecHitsSoAMonitorDevice = siPixelPhase1MonitorRecHitsSoAAlpaka.clone(
+    pixelHitsSrc = cms.InputTag( 'siPixelRecHitsPreSplittingAlpaka' ),
+    TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsDevice' )
+)
+
+# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync')
+siPixelTrackSoAMonitorSerial = siPixelPhase1MonitorTrackSoAAlpaka.clone(
+    pixelTrackSrc = cms.InputTag('pixelTracksAlpakaSerial'),
+    topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackSerial')
+)
+
+# PixelTracks: monitor of CPUSerial product (Alpaka backend: 'serial_sync')
+siPixelTrackSoAMonitorDevice = siPixelPhase1MonitorTrackSoAAlpaka.clone(
+    pixelTrackSrc = cms.InputTag('pixelTracksAlpaka'),
+    topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackDevice')
+)
+
+# PixelVertices: monitor of CPUSerial product (Alpaka backend: 'serial_sync')
+siPixelVertexSoAMonitorSerial = siPixelMonitorVertexSoAAlpaka.clone(
+    pixelVertexSrc = cms.InputTag("pixelVerticesAlpakaSerial"),
+    topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexSerial')
+)
+
+siPixelVertexSoAMonitorDevice = siPixelMonitorVertexSoAAlpaka.clone(
+    pixelVertexSrc = cms.InputTag("pixelVerticesAlpaka"),
+    topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexDevice')
+)
+
 # Run-3 sequence
 monitorpixelSoACompareSource = cms.Sequence(siPixelPhase1MonitorRawDataACPU *
                                             siPixelPhase1MonitorRawDataAGPU *
@@ -139,6 +199,17 @@
                                             siPixelMonitorVertexSoAGPU *
                                             siPixelCompareVertexSoA *
                                             siPixelPhase1RawDataErrorComparator)
+# and the Alpaka version
+monitorpixelSoACompareSourceAlpaka = cms.Sequence(
+                                            siPixelRecHitsSoAMonitorSerial *
+                                            siPixelRecHitsSoAMonitorDevice *
+                                            siPixelPhase1CompareRecHitsSoAAlpaka *
+                                            siPixelTrackSoAMonitorSerial *
+                                            siPixelTrackSoAMonitorDevice *
+                                            siPixelPhase1CompareTrackSoAAlpaka *
+                                            siPixelVertexSoAMonitorSerial *
+                                            siPixelVertexSoAMonitorDevice *
+                                            siPixelCompareVertexSoAAlpaka )
 
 # Phase-2 sequence
 _monitorpixelSoACompareSource =  cms.Sequence(siPixelPhase2MonitorRecHitsSoACPU *
@@ -166,3 +237,6 @@
 
 from Configuration.ProcessModifiers.gpuValidationPixel_cff import gpuValidationPixel
 gpuValidationPixel.toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSource)
+
+from Configuration.ProcessModifiers.alpakaValidationPixel_cff import alpakaValidationPixel
+(alpakaValidationPixel & ~gpuValidationPixel).toReplaceWith(monitorpixelSoASource, monitorpixelSoACompareSourceAlpaka)
diff --git a/DataFormats/TrackSoA/interface/TracksSoA.h b/DataFormats/TrackSoA/interface/TracksSoA.h
index bc3a8c4be9cb5..ed4ef2e5a4c93 100644
--- a/DataFormats/TrackSoA/interface/TracksSoA.h
+++ b/DataFormats/TrackSoA/interface/TracksSoA.h
@@ -1,7 +1,10 @@
-#ifndef DataFormats_Track_interface_TrackLayout_h
-#define DataFormats_Track_interface_TrackLayout_h
+#ifndef DataFormats_TrackSoA_interface_TracksSoA_h
+#define DataFormats_TrackSoA_interface_TracksSoA_h
+
+#include <alpaka/alpaka.hpp>
 
 #include <Eigen/Core>
+
 #include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
 #include "DataFormats/SoATemplate/interface/SoALayout.h"
@@ -29,6 +32,7 @@ namespace reco {
                         SOA_COLUMN(int8_t, nLayers),
                         SOA_COLUMN(float, eta),
                         SOA_COLUMN(float, pt),
+                        // state at the beam spot: {phi, tip, 1/pt, cotan(theta), zip}
                         SOA_EIGEN_COLUMN(Vector5f, state),
                         SOA_EIGEN_COLUMN(Vector15f, covariance),
                         SOA_SCALAR(int, nTracks),
@@ -43,14 +47,56 @@ namespace reco {
   template <typename TrackerTraits>
   using TrackSoAConstView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::ConstView;
 
-  template <typename TrackerTraits>
-  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView<TrackerTraits> &tracks,
-                                                                    int32_t i) {
+  /* Implement a type trait to identify the specialisations of TrackSoAConstView<TrackerTraits>
+   *
+   * This is done explicitly for all possible pixel topologies, because we did not find a way
+   * to use template deduction with a partial specialisation.
+   */
+  template <typename T>
+  struct IsTrackSoAConstView : std::false_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAConstView<pixelTopology::Phase1>> : std::true_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAView<pixelTopology::Phase1>> : std::true_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAConstView<pixelTopology::Phase2>> : std::true_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAView<pixelTopology::Phase2>> : std::true_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAConstView<pixelTopology::HIonPhase1>> : std::true_type {};
+  template <>
+  struct IsTrackSoAConstView<TrackSoAView<pixelTopology::HIonPhase1>> : std::true_type {};
+
+  template <typename T>
+  constexpr bool isTrackSoAConstView = IsTrackSoAConstView<T>::value;
+
+  template <typename ConstView, typename = std::enable_if_t<isTrackSoAConstView<ConstView>>>
+  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float charge(ConstView const& tracks, int32_t i) {
     //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23
     float v = tracks[i].state()(2);
     return float((0.0f < v) - (v < 0.0f));
   }
 
+  template <typename ConstView, typename = std::enable_if_t<isTrackSoAConstView<ConstView>>>
+  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float phi(ConstView const& tracks, int32_t i) {
+    return tracks[i].state()(0);
+  }
+
+  template <typename ConstView, typename = std::enable_if_t<isTrackSoAConstView<ConstView>>>
+  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float tip(ConstView const& tracks, int32_t i) {
+    return tracks[i].state()(1);
+  }
+
+  template <typename ConstView, typename = std::enable_if_t<isTrackSoAConstView<ConstView>>>
+  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const& tracks, int32_t i) {
+    return tracks[i].state()(4);
+  }
+
+  template <typename ConstView, typename = std::enable_if_t<isTrackSoAConstView<ConstView>>>
+  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr bool isTriplet(ConstView const& tracks, int32_t i) {
+    return tracks[i].nLayers() == 3;
+  }
+
 }  // namespace reco
 
-#endif
+#endif  // DataFormats_TrackSoA_interface_TracksSoA_h
diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h
index 8affb29845779..6b95d2843653f 100644
--- a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h
+++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h
@@ -12,29 +12,7 @@ struct TracksUtilities {
   using TrackSoAConstView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::ConstView;
   using hindex_type = typename reco::TrackSoA<TrackerTraits>::hindex_type;
 
-  // State at the Beam spot
-  // phi,tip,1/pt,cotan(theta),zip
-  /*  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks, int32_t i) {
-    //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23
-    float v = tracks[i].state()(2);
-    return float((0.0f < v) - (v < 0.0f));
-  }
-*/
-  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float phi(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(0);
-  }
-
-  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float tip(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(1);
-  }
-
-  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float zip(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(4);
-  }
-
-  ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr bool isTriplet(const TrackSoAConstView &tracks, int i) {
-    return tracks[i].nLayers() == 3;
-  }
+  // state at the beam spot: { phi, tip, 1/pt, cotan(theta), zip }
 
   template <typename V3, typename M3, typename V2, typename M2>
   ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromCircle(
@@ -109,7 +87,6 @@ namespace pixelTrack {
   struct QualityCutsT<TrackerTraits, pixelTopology::isPhase1Topology<TrackerTraits>> {
     using TrackSoAView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::View;
     using TrackSoAConstView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::ConstView;
-    using tracksHelper = TracksUtilities<TrackerTraits>;
     float chi2Coeff[4];
     float chi2MaxPt;  // GeV
     float chi2Scale;
@@ -130,8 +107,8 @@ namespace pixelTrack {
       //   - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
       // (see CAHitNtupletGeneratorGPU.cc)
       auto const &region = (nHits > 3) ? quadruplet : triplet;
-      return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and
-             (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip);
+      return (std::abs(reco::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and
+             (std::abs(reco::zip(tracks, it)) < region.maxZip);
     }
 
     ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const {
@@ -172,7 +149,6 @@ namespace pixelTrack {
   struct QualityCutsT<TrackerTraits, pixelTopology::isPhase2Topology<TrackerTraits>> {
     using TrackSoAView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::View;
     using TrackSoAConstView = typename reco::TrackSoA<TrackerTraits>::template Layout<>::ConstView;
-    using tracksHelper = TracksUtilities<TrackerTraits>;
 
     float maxChi2;
     float minPt;
@@ -180,8 +156,8 @@ namespace pixelTrack {
     float maxZip;
 
     ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
-      return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and
-             (std::abs(tracksHelper::zip(tracks, it)) < maxZip);
+      return (std::abs(reco::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and
+             (std::abs(reco::zip(tracks, it)) < maxZip);
     }
     ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const {
       return tracks.chi2(it) >= maxChi2;
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h
index 4783184611401..17f3b64498711 100644
--- a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h
+++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h
@@ -1,4 +1,3 @@
-
 #ifndef DataFormats_TrackSoA_src_alpaka_classes_cuda_h
 #define DataFormats_TrackSoA_src_alpaka_classes_cuda_h
 
@@ -9,6 +8,6 @@
 #include "DataFormats/TrackSoA/interface/TracksDevice.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 
-using namespace reco;
+using namespace pixelTopology;
 
 #endif  // DataFormats_TrackSoA_src_alpaka_classes_cuda_h
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml
index c04ca173c49f9..9edee6439e63b 100644
--- a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml
+++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml
@@ -1,9 +1,10 @@
 <lcgdict>
-  <class name="alpaka_cuda_async::PortableCollection<TrackLayout<pixelTopology::Phase1>>" persistent="false"/>
+  <class name="alpaka_cuda_async::PortableCollection<reco::TrackLayout<pixelTopology::Phase1>>" persistent="false"/>
   <class name="alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase1" persistent="false"/>
   <class name="edm::DeviceProduct<alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase1>" persistent="false"/>
   <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase1>>" persistent="false"/>
-  <class name="alpaka_cuda_async::PortableCollection<TrackLayout<pixelTopology::Phase2>>" persistent="false"/>
+
+  <class name="alpaka_cuda_async::PortableCollection<reco::TrackLayout<pixelTopology::Phase2>>" persistent="false"/>
   <class name="alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase2" persistent="false"/>
   <class name="edm::DeviceProduct<alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase2>" persistent="false"/>
   <class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::pixelTrack::TracksSoACollectionPhase2>>" persistent="false"/>
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h
index 38143a6058c36..0267ddeb213d5 100644
--- a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h
+++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h
@@ -1,4 +1,3 @@
-
 #ifndef DataFormats_TrackSoA_src_alpaka_classes_rocm_h
 #define DataFormats_TrackSoA_src_alpaka_classes_rocm_h
 
@@ -9,6 +8,6 @@
 #include "DataFormats/TrackSoA/interface/TracksDevice.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 
-using namespace reco;
+using namespace pixelTopology;
 
 #endif  // DataFormats_TrackSoA_src_alpaka_classes_rocm_h
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml
index b7e40aedead42..772a1b385a957 100644
--- a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml
+++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml
@@ -1,9 +1,10 @@
 <lcgdict>
-  <class name="alpaka_rocm_async::PortableCollection<TrackLayout<pixelTopology::Phase1>>" persistent="false"/>
+  <class name="alpaka_rocm_async::PortableCollection<reco::TrackLayout<pixelTopology::Phase1>>" persistent="false"/>
   <class name="alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase1" persistent="false"/>
   <class name="edm::DeviceProduct<alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase1>" persistent="false"/>
   <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase1>>" persistent="false"/>
-  <class name="alpaka_rocm_async::PortableCollection<TrackLayout<pixelTopology::Phase2>>" persistent="false"/>
+
+  <class name="alpaka_rocm_async::PortableCollection<reco::TrackLayout<pixelTopology::Phase2>>" persistent="false"/>
   <class name="alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase2" persistent="false"/>
   <class name="edm::DeviceProduct<alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase2>" persistent="false"/>
   <class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::pixelTrack::TracksSoACollectionPhase2>>" persistent="false"/>
diff --git a/DataFormats/TrackSoA/src/classes.h b/DataFormats/TrackSoA/src/classes.h
index 43d40e5f8f3ac..c97bb234d7e18 100644
--- a/DataFormats/TrackSoA/src/classes.h
+++ b/DataFormats/TrackSoA/src/classes.h
@@ -6,6 +6,5 @@
 #include "DataFormats/TrackSoA/interface/TracksHost.h"
 
 using namespace pixelTopology;
-using namespace reco;
 
 #endif  // DataFormats_TrackSoA_src_classes_h
diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml
index fd8fc0781ee25..5ae5fbf55cd8f 100644
--- a/DataFormats/TrackSoA/src/classes_def.xml
+++ b/DataFormats/TrackSoA/src/classes_def.xml
@@ -1,32 +1,32 @@
 <lcgdict>
-  <class name="TrackSoA<pixelTopology::Phase1>"/>
-  <class name="TrackSoA<pixelTopology::Phase1>::Layout<>"/>
-  <class name="TrackLayout<pixelTopology::Phase1>"/>
-  <class name="TrackSoAView<pixelTopology::Phase1>"/>
+  <class name="reco::TrackSoA<pixelTopology::Phase1>"/>
+  <class name="reco::TrackSoA<pixelTopology::Phase1>::Layout<>"/>
+  <class name="reco::TrackLayout<pixelTopology::Phase1>"/>
+  <class name="reco::TrackSoAView<pixelTopology::Phase1>"/>
 
-  <class name="PortableHostCollection<TrackLayout<pixelTopology::Phase1>>"/>
+  <class name="PortableHostCollection<reco::TrackLayout<pixelTopology::Phase1>>"/>
   <class name="pixelTrack::TracksHostPhase1" ClassVersion="3">
     <version ClassVersion="3" checksum="794224446"/>
   </class>
   <class name="edm::Wrapper<pixelTrack::TracksHostPhase1>" splitLevel="0"/>
 
-  <class name="TrackSoA<pixelTopology::Phase2>"/>
-  <class name="TrackSoA<pixelTopology::Phase2>::Layout<>"/>
-  <class name="TrackLayout<pixelTopology::Phase2>"/>
-  <class name="TrackSoAView<pixelTopology::Phase2>"/>
+  <class name="reco::TrackSoA<pixelTopology::Phase2>"/>
+  <class name="reco::TrackSoA<pixelTopology::Phase2>::Layout<>"/>
+  <class name="reco::TrackLayout<pixelTopology::Phase2>"/>
+  <class name="reco::TrackSoAView<pixelTopology::Phase2>"/>
 
-  <class name="PortableHostCollection<TrackLayout<pixelTopology::Phase2>>"/>
+  <class name="PortableHostCollection<reco::TrackLayout<pixelTopology::Phase2>>"/>
   <class name="pixelTrack::TracksHostPhase2" ClassVersion="3">
     <version ClassVersion="3" checksum="3813159454"/>
   </class>
   <class name="edm::Wrapper<pixelTrack::TracksHostPhase2>" splitLevel="0"/>
 
-  <class name="TrackSoA<pixelTopology::HIonPhase1>"/>
-  <class name="TrackSoA<pixelTopology::HIonPhase1>::Layout<>"/>
-  <class name="TrackLayout<pixelTopology::HIonPhase1>"/>
-  <class name="TrackSoAView<pixelTopology::HIonPhase1>"/>
+  <class name="reco::TrackSoA<pixelTopology::HIonPhase1>"/>
+  <class name="reco::TrackSoA<pixelTopology::HIonPhase1>::Layout<>"/>
+  <class name="reco::TrackLayout<pixelTopology::HIonPhase1>"/>
+  <class name="reco::TrackSoAView<pixelTopology::HIonPhase1>"/>
 
-  <class name="PortableHostCollection<TrackLayout<pixelTopology::HIonPhase1>>"/>
+  <class name="PortableHostCollection<reco::TrackLayout<pixelTopology::HIonPhase1>>"/>
   <class name="pixelTrack::TracksHostHIonPhase1" ClassVersion="3">
     <version ClassVersion="3" checksum="3935456462"/>
   </class>
diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc
index 2c2d0961eb106..2b9807e3db054 100644
--- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc
+++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc
@@ -17,12 +17,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     class TestFillKernel {
     public:
       template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-      ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView<TrackerTraits> tracks_view) const {
+      ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView<TrackerTraits> tracks_view, int32_t nTracks) const {
         if (cms::alpakatools::once_per_grid(acc)) {
-          tracks_view.nTracks() = 420;
+          tracks_view.nTracks() = nTracks;
         }
 
-        for (int32_t j : elements_with_stride(acc, tracks_view.metadata().size())) {
+        for (int32_t j : elements_with_stride(acc, nTracks)) {
           tracks_view[j].pt() = (float)j;
           tracks_view[j].eta() = (float)j;
           tracks_view[j].chi2() = (float)j;
@@ -39,17 +39,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     class TestVerifyKernel {
     public:
       template <typename TAcc, typename = std::enable_if_t<alpaka::isAccelerator<TAcc>>>
-      ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAConstView<TrackerTraits> tracks_view) const {
+      ALPAKA_FN_ACC void operator()(TAcc const& acc,
+                                    TrackSoAConstView<TrackerTraits> tracks_view,
+                                    int32_t nTracks) const {
         if (cms::alpakatools::once_per_grid(acc)) {
-          ALPAKA_ASSERT_OFFLOAD(tracks_view.nTracks() == 420);
+          ALPAKA_ASSERT(tracks_view.nTracks() == nTracks);
         }
         for (int32_t j : elements_with_stride(acc, tracks_view.nTracks())) {
-          assert(abs(tracks_view[j].pt() - (float)j) < .0001);
-          assert(abs(tracks_view[j].eta() - (float)j) < .0001);
-          assert(abs(tracks_view[j].chi2() - (float)j) < .0001);
-          assert(tracks_view[j].quality() == (Quality)(j % 256));
-          assert(tracks_view[j].nLayers() == j % 128);
-          assert(tracks_view.hitIndices().off[j] == uint32_t(j));
+          ALPAKA_ASSERT(abs(tracks_view[j].pt() - (float)j) < .0001);
+          ALPAKA_ASSERT(abs(tracks_view[j].eta() - (float)j) < .0001);
+          ALPAKA_ASSERT(abs(tracks_view[j].chi2() - (float)j) < .0001);
+          ALPAKA_ASSERT(tracks_view[j].quality() == (Quality)(j % 256));
+          ALPAKA_ASSERT(tracks_view[j].nLayers() == j % 128);
+          ALPAKA_ASSERT(tracks_view.hitIndices().off[j] == uint32_t(j));
         }
       }
     };
@@ -57,14 +59,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
     // Host function which invokes the two kernels above
     template <typename TrackerTraits>
     void runKernels(TrackSoAView<TrackerTraits> tracks_view, Queue& queue) {
+      int32_t tracks = 420;
       uint32_t items = 64;
-      uint32_t groups = divide_up_by(tracks_view.metadata().size(), items);
+      uint32_t groups = divide_up_by(tracks, items);
       auto workDiv = make_workdiv<Acc1D>(groups, items);
-      alpaka::exec<Acc1D>(queue, workDiv, TestFillKernel<TrackerTraits>{}, tracks_view);
-      alpaka::exec<Acc1D>(queue,
-                          workDiv,
-                          TestVerifyKernel<TrackerTraits>{},
-                          tracks_view);  //TODO: wait for some PR that solves this and then check it!!!
+      alpaka::exec<Acc1D>(queue, workDiv, TestFillKernel<TrackerTraits>{}, tracks_view, tracks);
+      alpaka::exec<Acc1D>(queue, workDiv, TestVerifyKernel<TrackerTraits>{}, tracks_view, tracks);
     }
 
     template void runKernels<pixelTopology::Phase1>(TrackSoAView<pixelTopology::Phase1> tracks_view, Queue& queue);
diff --git a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py
index d2b8fa901461c..0b97262093e86 100644
--- a/HLTrigger/Configuration/python/customizeHLTforAlpaka.py
+++ b/HLTrigger/Configuration/python/customizeHLTforAlpaka.py
@@ -1,7 +1,554 @@
 import FWCore.ParameterSet.Config as cms
 
+
+## Pixel HLT in Alpaka
+def customizeHLTforDQMGPUvsCPUPixel(process):
+    '''Ad-hoc changes to test HLT config containing only DQM_PixelReconstruction_v and DQMGPUvsCPU stream
+       only up to the Pixel Local Reconstruction
+    '''
+    dqmPixelRecoPathName = None
+    for pathName in process.paths_():
+        if pathName.startswith('DQM_PixelReconstruction_v'):
+            dqmPixelRecoPathName = pathName
+            break
+
+    if dqmPixelRecoPathName == None:
+        return process
+
+    process.hltPixelConsumerGPU.eventProducts = [
+        'hltSiPixelClusters',
+        'hltSiPixelClustersSoA',
+        'hltSiPixelDigis',
+        'hltSiPixelRecHits',
+        'hltSiPixelRecHitsSoA',
+        'hltPixelTracks',
+        'hltPixelTracksSoA',
+        'hltPixelVertices',
+        'hltPixelVerticesSoA',
+    ]
+
+    process.hltPixelConsumerCPU.eventProducts = []
+    for foo in process.hltPixelConsumerGPU.eventProducts:
+        process.hltPixelConsumerCPU.eventProducts += [foo+'CPUSerial']
+
+    # modify EventContent of DQMGPUvsCPU stream
+    if hasattr(process, 'hltOutputDQMGPUvsCPU'):
+        process.hltOutputDQMGPUvsCPU.outputCommands = [
+            'drop *',
+            'keep *Cluster*_hltSiPixelClusters_*_*',
+            'keep *Cluster*_hltSiPixelClustersLegacyFormatCPUSerial_*_*',
+            'keep *_hltSiPixelDigis_*_*',
+            'keep *_hltSiPixelDigiErrorsLegacyFormatCPUSerial_*_*',
+            'keep *RecHit*_hltSiPixelRecHits_*_*',
+            'keep *RecHit*_hltSiPixelRecHitsLegacyFormatCPUSerial_*_*',
+            'keep *_hltPixelTracks_*_*',
+            'keep *_hltPixelTracksLegacyFormatCPUSerial_*_*',
+            'keep *_hltPixelVertices_*_*',
+            'keep *_hltPixelVerticesLegacyFormatCPUSerial_*_*',
+        ]
+
+    # PixelRecHits: monitor of CPUSerial product (Alpaka backend: 'serial_sync')
+    process.hltPixelRecHitsSoAMonitorCPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka',
+        pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ),
+        TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCPU' )
+    )
+
+    # PixelRecHits: monitor of GPU product (Alpaka backend: '')
+    process.hltPixelRecHitsSoAMonitorGPU = cms.EDProducer('SiPixelPhase1MonitorRecHitsSoAAlpaka',
+        pixelHitsSrc = cms.InputTag( 'hltSiPixelRecHitsSoA' ),
+        TopFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsGPU' )
+    )
+
+    # PixelRecHits: 'GPUvsCPU' comparisons
+    process.hltPixelRecHitsSoACompareGPUvsCPU = cms.EDProducer('SiPixelPhase1CompareRecHitsSoAAlpaka',
+        pixelHitsSrcHost = cms.InputTag( 'hltSiPixelRecHitsCPUSerial' ),
+        pixelHitsSrcDevice = cms.InputTag( 'hltSiPixelRecHitsSoA' ),
+        topFolderName = cms.string( 'SiPixelHeterogeneous/PixelRecHitsCompareGPUvsCPU' ),
+        minD2cut = cms.double( 1.0E-4 )
+    )
+
+    process.hltPixelTracksSoAMonitorCPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka",
+        mightGet = cms.optional.untracked.vstring,
+        minQuality = cms.string('loose'),
+        pixelTrackSrc = cms.InputTag('hltPixelTracksCPUSerial'),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCPU'),
+        useQualityCut = cms.bool(True)
+    )
+
+    process.hltPixelTracksSoAMonitorGPU = cms.EDProducer("SiPixelPhase1MonitorTrackSoAAlpaka",
+        mightGet = cms.optional.untracked.vstring,
+        minQuality = cms.string('loose'),
+        pixelTrackSrc = cms.InputTag('hltPixelTracksSoA'),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackGPU'),
+        useQualityCut = cms.bool(True)
+    )
+
+    process.hltPixelTracksSoACompareGPUvsCPU = cms.EDProducer("SiPixelPhase1CompareTrackSoAAlpaka",
+        deltaR2cut = cms.double(0.04),
+        mightGet = cms.optional.untracked.vstring,
+        minQuality = cms.string('loose'),
+        pixelTrackSrcHost = cms.InputTag("hltPixelTracksCPUSerial"),
+        pixelTrackSrcDevice = cms.InputTag("hltPixelTracksSoA"),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelTrackCompareGPUvsCPU'),
+        useQualityCut = cms.bool(True)
+    )
+
+    process.hltPixelVertexSoAMonitorCPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka",
+        beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"),
+        mightGet = cms.optional.untracked.vstring,
+        pixelVertexSrc = cms.InputTag("hltPixelVerticesCPUSerial"),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCPU')
+    )
+
+    process.hltPixelVertexSoAMonitorGPU = cms.EDProducer("SiPixelMonitorVertexSoAAlpaka",
+        beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"),
+        mightGet = cms.optional.untracked.vstring,
+        pixelVertexSrc = cms.InputTag("hltPixelVerticesSoA"),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexGPU')
+    )
+
+    process.hltPixelVertexSoACompareGPUvsCPU = cms.EDProducer("SiPixelCompareVertexSoAAlpaka",
+        beamSpotSrc = cms.InputTag("hltOnlineBeamSpot"),
+        dzCut = cms.double(1),
+        mightGet = cms.optional.untracked.vstring,
+        pixelVertexSrcHost = cms.InputTag("hltPixelVerticesCPUSerial"),
+        pixelVertexSrcDevice = cms.InputTag("hltPixelVerticesSoA"),
+        topFolderName = cms.string('SiPixelHeterogeneous/PixelVertexCompareGPUvsCPU')
+    )
+
+    process.HLTDQMPixelReconstruction = cms.Sequence(
+        process.hltPixelRecHitsSoAMonitorCPU
+      + process.hltPixelRecHitsSoAMonitorGPU
+      + process.hltPixelRecHitsSoACompareGPUvsCPU
+      + process.hltPixelTracksSoAMonitorCPU
+      + process.hltPixelTracksSoAMonitorGPU
+      + process.hltPixelTracksSoACompareGPUvsCPU
+      + process.hltPixelVertexSoAMonitorCPU
+      + process.hltPixelVertexSoAMonitorGPU
+      + process.hltPixelVertexSoACompareGPUvsCPU
+    )
+
+    # Add CPUSerial sequences to DQM_PixelReconstruction_v Path
+    dqmPixelRecoPath = getattr(process, dqmPixelRecoPathName)
+    try:
+        dqmPixelRecoPathIndex = dqmPixelRecoPath.index(process.HLTRecopixelvertexingSequence) + 1
+        for cpuSeqName in [
+            'HLTDoLocalPixelCPUSerialSequence',
+            'HLTRecopixelvertexingCPUSerialSequence',
+        ]:
+            dqmPixelRecoPath.insert(dqmPixelRecoPathIndex, getattr(process, cpuSeqName))
+            dqmPixelRecoPathIndex += 1
+    except:
+        dqmPixelRecoPathIndex = None
+
+    return process
+
+def customizeHLTforAlpakaPixelRecoLocal(process):
+    '''Customisation to introduce the Local Pixel Reconstruction in Alpaka
+    '''
+    process.hltESPSiPixelCablingSoA = cms.ESProducer('SiPixelCablingSoAESProducer@alpaka', 
+        CablingMapLabel = cms.string(''),
+        UseQualityInfo = cms.bool(False),
+        appendToDataLabel = cms.string(''),
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltESPSiPixelGainCalibrationForHLTSoA = cms.ESProducer('SiPixelGainCalibrationForHLTSoAESProducer@alpaka',
+        appendToDataLabel = cms.string(''),
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltESPPixelCPEFastParamsPhase1 = cms.ESProducer('PixelCPEFastParamsESProducerAlpakaPhase1@alpaka', 
+        appendToDataLabel = cms.string(''),
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    ###
+
+    # alpaka EDProducer
+    # consumes
+    #  - reco::BeamSpot
+    # produces
+    #  - BeamSpotDevice
+    process.hltOnlineBeamSpotDevice = cms.EDProducer('BeamSpotDeviceProducer@alpaka',
+        src = cms.InputTag('hltOnlineBeamSpot'),
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    # alpaka EDProducer
+    # consumes
+    #  - FEDRawDataCollection
+    # produces (* optional)
+    #  - SiPixelClustersSoA
+    #  - SiPixelDigisSoACollection
+    #  - SiPixelDigiErrorsSoACollection *
+    #  - SiPixelFormatterErrors *
+    process.hltSiPixelClustersSoA = cms.EDProducer('SiPixelRawToClusterPhase1@alpaka',
+        mightGet = cms.optional.untracked.vstring,
+        IncludeErrors = cms.bool(True),
+        UseQualityInfo = cms.bool(False),
+        clusterThreshold_layer1 = cms.int32(4000),
+        clusterThreshold_otherLayers = cms.int32(4000),
+        VCaltoElectronGain      = cms.double(1),  # all gains=1, pedestals=0
+        VCaltoElectronGain_L1   = cms.double(1),
+        VCaltoElectronOffset    = cms.double(0),
+        VCaltoElectronOffset_L1 = cms.double(0),
+        InputLabel = cms.InputTag('rawDataCollector'),
+        Regions = cms.PSet(
+            inputs = cms.optional.VInputTag,
+            deltaPhi = cms.optional.vdouble,
+            maxZ = cms.optional.vdouble,
+            beamSpot = cms.optional.InputTag
+        ),
+        CablingMapLabel = cms.string(''),
+        # autoselect the alpaka backend
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltSiPixelClusters = cms.EDProducer('SiPixelDigisClustersFromSoAAlpakaPhase1',
+        src = cms.InputTag('hltSiPixelClustersSoA'),
+        clusterThreshold_layer1 = cms.int32(4000),
+        clusterThreshold_otherLayers = cms.int32(4000),
+        produceDigis = cms.bool(False),
+        storeDigis = cms.bool(False)
+    )
+
+    process.hltSiPixelClustersCache = cms.EDProducer('SiPixelClusterShapeCacheProducer',
+        src = cms.InputTag( 'hltSiPixelClusters' ),
+        onDemand = cms.bool( False )
+    )
+
+    # legacy EDProducer
+    # consumes
+    #  - SiPixelDigiErrorsHost
+    #  - SiPixelFormatterErrors
+    # produces
+    #  - edm::DetSetVector<SiPixelRawDataError>
+    #  - DetIdCollection
+    #  - DetIdCollection, 'UserErrorModules'
+    #  - edmNew::DetSetVector<PixelFEDChannel>
+    process.hltSiPixelDigis = cms.EDProducer('SiPixelDigiErrorsFromSoAAlpaka',
+        digiErrorSoASrc = cms.InputTag('hltSiPixelClustersSoA'),
+        fmtErrorsSoASrc = cms.InputTag('hltSiPixelClustersSoA'),
+        CablingMapLabel = cms.string(''),
+        UsePhase1 = cms.bool(True),
+        ErrorList = cms.vint32(29),
+        UserErrorList = cms.vint32(40)
+    )
+
+    # alpaka EDProducer
+    # consumes
+    #  - BeamSpotDevice
+    #  - SiPixelClustersSoA
+    #  - SiPixelDigisSoACollection
+    # produces
+    #  - TrackingRecHitsSoACollection<TrackerTraits>
+    process.hltSiPixelRecHitsSoA = cms.EDProducer('SiPixelRecHitAlpakaPhase1@alpaka',
+        beamSpot = cms.InputTag('hltOnlineBeamSpotDevice'),
+        src = cms.InputTag('hltSiPixelClustersSoA'),
+        CPE = cms.string('PixelCPEFastParams'),
+        mightGet = cms.optional.untracked.vstring,
+        # autoselect the alpaka backend
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltSiPixelRecHits = cms.EDProducer('SiPixelRecHitFromSoAAlpakaPhase1',
+        pixelRecHitSrc = cms.InputTag('hltSiPixelRecHitsSoA'),
+        src = cms.InputTag('hltSiPixelClusters'),
+    )
+
+    ###
+    ### Task: Pixel Local Reconstruction
+    ###
+    process.HLTDoLocalPixelTask = cms.ConditionalTask(
+        process.hltOnlineBeamSpotDevice,
+        process.hltSiPixelClustersSoA,
+        process.hltSiPixelClusters,   # was: hltSiPixelClusters
+        process.hltSiPixelClustersCache,          # really needed ??
+        process.hltSiPixelDigis, # was: hltSiPixelDigis
+        process.hltSiPixelRecHitsSoA,
+        process.hltSiPixelRecHits,    # was: hltSiPixelRecHits
+    )
+
+    ###
+    ### CPUSerial version of Pixel Local Reconstruction
+    ###
+    process.hltOnlineBeamSpotDeviceCPUSerial = process.hltOnlineBeamSpotDevice.clone(
+        alpaka = dict( backend = 'serial_sync' )
+    )
+
+    process.hltSiPixelClustersCPUSerial = process.hltSiPixelClustersSoA.clone(
+        alpaka = dict( backend = 'serial_sync' )
+    )
+
+    process.hltSiPixelClustersLegacyFormatCPUSerial = process.hltSiPixelClusters.clone(
+        src = 'hltSiPixelClustersCPUSerial'
+    )
+
+    process.hltSiPixelDigiErrorsLegacyFormatCPUSerial = process.hltSiPixelDigis.clone(
+        digiErrorSoASrc = 'hltSiPixelClustersCPUSerial',
+        fmtErrorsSoASrc = 'hltSiPixelClustersCPUSerial',
+    )
+
+    process.hltSiPixelRecHitsCPUSerial = process.hltSiPixelRecHitsSoA.clone(
+        beamSpot = 'hltOnlineBeamSpotDeviceCPUSerial',
+        src = 'hltSiPixelClustersCPUSerial',
+        alpaka = dict( backend = 'serial_sync' )
+    )
+
+    process.hltSiPixelRecHitsLegacyFormatCPUSerial = process.hltSiPixelRecHits.clone(
+        pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial',
+        src = 'hltSiPixelClustersLegacyFormatCPUSerial',
+    )
+
+    process.HLTDoLocalPixelCPUSerialTask = cms.ConditionalTask(
+        process.hltOnlineBeamSpotDeviceCPUSerial,
+        process.hltSiPixelClustersCPUSerial,
+        process.hltSiPixelClustersLegacyFormatCPUSerial,
+        process.hltSiPixelDigiErrorsLegacyFormatCPUSerial,
+        process.hltSiPixelRecHitsCPUSerial,
+        process.hltSiPixelRecHitsLegacyFormatCPUSerial,
+    )
+
+    process.HLTDoLocalPixelCPUSerialSequence = cms.Sequence( process.HLTDoLocalPixelCPUSerialTask )
+
+    return process
+
+def customizeHLTforAlpakaPixelRecoTracking(process):
+    '''Customisation to introduce the Pixel-Track Reconstruction in Alpaka
+    '''
+
+    # alpaka EDProducer
+    # consumes
+    #  - TrackingRecHitsSoACollection<TrackerTraits>
+    # produces
+    #  - TkSoADevice
+    process.hltPixelTracksSoA = cms.EDProducer('CAHitNtupletAlpakaPhase1@alpaka',
+        pixelRecHitSrc = cms.InputTag('hltSiPixelRecHitsSoA'),
+        CPE = cms.string('PixelCPEFastParams'),
+        ptmin = cms.double(0.9),
+        CAThetaCutBarrel = cms.double(0.002),
+        CAThetaCutForward = cms.double(0.003),
+        hardCurvCut = cms.double(0.0328407225),
+        dcaCutInnerTriplet = cms.double(0.15),
+        dcaCutOuterTriplet = cms.double(0.25),
+        earlyFishbone = cms.bool(True),
+        lateFishbone = cms.bool(False),
+        fillStatistics = cms.bool(False),
+        minHitsPerNtuplet = cms.uint32(3),
+        phiCuts = cms.vint32(
+            522, 730, 730, 522, 626,
+            626, 522, 522, 626, 626,
+            626, 522, 522, 522, 522,
+            522, 522, 522, 522
+        ),
+        maxNumberOfDoublets = cms.uint32(524288),
+        minHitsForSharingCut = cms.uint32(10),
+        fitNas4 = cms.bool(False),
+        doClusterCut = cms.bool(True),
+        doZ0Cut = cms.bool(True),
+        doPtCut = cms.bool(True),
+        useRiemannFit = cms.bool(False),
+        doSharedHitCut = cms.bool(True),
+        dupPassThrough = cms.bool(False),
+        useSimpleTripletCleaner = cms.bool(True),
+        idealConditions = cms.bool(False),
+        includeJumpingForwardDoublets = cms.bool(True),
+        trackQualityCuts = cms.PSet(
+            chi2MaxPt = cms.double(10),
+            chi2Coeff = cms.vdouble(0.9, 1.8),
+            chi2Scale = cms.double(8),
+            tripletMinPt = cms.double(0.5),
+            tripletMaxTip = cms.double(0.3),
+            tripletMaxZip = cms.double(12),
+            quadrupletMinPt = cms.double(0.3),
+            quadrupletMaxTip = cms.double(0.5),
+            quadrupletMaxZip = cms.double(12)
+        ),
+        # autoselect the alpaka backend
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltPixelTracksCPUSerial = process.hltPixelTracksSoA.clone(
+        pixelRecHitSrc = 'hltSiPixelRecHitsCPUSerial',
+        alpaka = dict( backend = 'serial_sync' )
+    )
+
+    process.hltPixelTracks = cms.EDProducer("PixelTrackProducerFromSoAAlpakaPhase1",
+        beamSpot = cms.InputTag("hltOnlineBeamSpot"),
+        minNumberOfHits = cms.int32(0),
+        minQuality = cms.string('loose'),
+        pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHits"),
+        trackSrc = cms.InputTag("hltPixelTracksSoA")
+    )
+
+    process.hltPixelTracksLegacyFormatCPUSerial = process.hltPixelTracks.clone(
+        pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHitsLegacyFormatCPUSerial"),
+        trackSrc = cms.InputTag("hltPixelTracksCPUSerial")
+    )
+
+    process.HLTRecoPixelTracksTask = cms.ConditionalTask(
+        process.hltPixelTracksSoA,
+        process.hltPixelTracks,
+    )
+
+    process.HLTRecoPixelTracksCPUSerialTask = cms.ConditionalTask(
+        process.hltPixelTracksCPUSerial,
+        process.hltPixelTracksLegacyFormatCPUSerial,
+    )
+
+    process.HLTRecoPixelTracksCPUSerialSequence = cms.Sequence( process.HLTRecoPixelTracksCPUSerialTask )
+
+    return process
+
+def customizeHLTforAlpakaPixelRecoVertexing(process):
+    '''Customisation to introduce the Pixel-Vertex Reconstruction in Alpaka
+    '''
+
+    # alpaka EDProducer
+    # consumes
+    #  - TkSoADevice
+    # produces
+    #  - ZVertexDevice
+    process.hltPixelVerticesSoA = cms.EDProducer('PixelVertexProducerAlpakaPhase1@alpaka',
+        oneKernel = cms.bool(True),
+        useDensity = cms.bool(True),
+        useDBSCAN = cms.bool(False),
+        useIterative = cms.bool(False),
+        minT = cms.int32(2),
+        eps = cms.double(0.07),
+        errmax = cms.double(0.01),
+        chi2max = cms.double(9),
+        PtMin = cms.double(0.5),
+        PtMax = cms.double(75),
+        pixelTrackSrc = cms.InputTag('hltPixelTracksSoA'),
+        # autoselect the alpaka backend
+        alpaka = cms.untracked.PSet(
+            backend = cms.untracked.string('')
+        )
+    )
+
+    process.hltPixelVerticesCPUSerial = process.hltPixelVerticesSoA.clone(
+        pixelTrackSrc = 'hltPixelTracksCPUSerial',
+        alpaka = dict( backend = 'serial_sync' )
+    )
+
+    process.hltPixelVertices = cms.EDProducer("PixelVertexProducerFromSoAAlpaka",
+        TrackCollection = cms.InputTag("hltPixelTracks"),
+        beamSpot = cms.InputTag("hltOnlineBeamSpot"),
+        src = cms.InputTag("hltPixelVerticesSoA")
+    )
+
+    process.hltPixelVerticesLegacyFormatCPUSerial = process.hltPixelVertices.clone(
+        TrackCollection = cms.InputTag("hltPixelTracksLegacyFormatCPUSerial"),
+        src = cms.InputTag("hltPixelVerticesCPUSerial")
+    )
+
+    process.HLTRecopixelvertexingTask = cms.ConditionalTask(
+        process.HLTRecoPixelTracksTask,
+        process.hltPixelVerticesSoA,
+        process.hltPixelVertices,
+        process.hltTrimmedPixelVertices 
+    )
+
+    process.HLTRecopixelvertexingCPUSerialTask = cms.ConditionalTask(
+        process.HLTRecoPixelTracksCPUSerialTask,
+        process.hltPixelVerticesCPUSerial,
+        process.hltPixelVerticesLegacyFormatCPUSerial,
+    )
+
+    process.HLTRecopixelvertexingCPUSerialSequence = cms.Sequence( process.HLTRecopixelvertexingCPUSerialTask )
+
+    return process
+
+def customizeHLTforAlpakaPixelRecoTheRest(process):
+    '''Customize HLT path depending on old SoA tracks
+    '''
+    process.hltL2TauTagNNProducer = cms.EDProducer("L2TauNNProducerAlpaka",
+        BeamSpot = cms.InputTag("hltOnlineBeamSpot"),
+        L1Taus = cms.VPSet(
+            cms.PSet(
+                L1CollectionName = cms.string('DoubleTau'),
+                L1TauTrigger = cms.InputTag("hltL1sDoubleTauBigOR")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('SingleTau'),
+                L1TauTrigger = cms.InputTag("hltL1sSingleTau")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('MuXXTauYY'),
+                L1TauTrigger = cms.InputTag("hltL1sBigOrMuXXerIsoTauYYer")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('Mu22Tau40'),
+                L1TauTrigger = cms.InputTag("hltL1sMu22erIsoTau40er")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('DoubleTauJet'),
+                L1TauTrigger = cms.InputTag("hltL1sBigORDoubleTauJet")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('VBFIsoTau'),
+                L1TauTrigger = cms.InputTag("hltL1VBFDiJetIsoTau")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('Mu18TauXX'),
+                L1TauTrigger = cms.InputTag("hltL1sVeryBigORMu18erTauXXer2p1")
+            ),
+            cms.PSet(
+                L1CollectionName = cms.string('DoubleTauLowMass'),
+                L1TauTrigger = cms.InputTag("hltL1sDoubleTauBigORWithLowMass")
+            )
+        ),
+        debugLevel = cms.int32(0),
+        ebInput = cms.InputTag("hltEcalRecHit","EcalRecHitsEB"),
+        eeInput = cms.InputTag("hltEcalRecHit","EcalRecHitsEE"),
+        fractionSumPt2 = cms.double(0.3),
+        graphPath = cms.string('RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb'),
+        hbheInput = cms.InputTag("hltHbhereco"),
+        hoInput = cms.InputTag("hltHoreco"),
+        maxVtx = cms.uint32(100),
+        minSumPt2 = cms.double(0.0),
+        normalizationDict = cms.string('RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json'),
+        pataTracks = cms.InputTag("hltPixelTracksSoA"),
+        pataVertices = cms.InputTag("hltPixelVerticesSoA"),
+        track_chi2_max = cms.double(99999.0),
+        track_pt_max = cms.double(10.0),
+        track_pt_min = cms.double(1.0)
+    )
+    
+    return process
+
+def customizeHLTforAlpakaPixelReco(process):
+    '''Customisation to introduce the Pixel Local+Track+Vertex Reconstruction in Alpaka
+    '''
+
+    process = customizeHLTforAlpakaPixelRecoLocal(process)
+    process = customizeHLTforAlpakaPixelRecoTracking(process)
+    process = customizeHLTforAlpakaPixelRecoVertexing(process)
+    process = customizeHLTforDQMGPUvsCPUPixel(process)    
+    process = customizeHLTforAlpakaPixelRecoTheRest(process)
+
+    return process
+
+## ECAL HLT in Alpaka
+
 def customizeHLTforAlpakaEcalLocalReco(process):
-    process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi")
+    
     if hasattr(process, 'hltEcalDigisGPU'):
         process.hltEcalDigisPortable = cms.EDProducer("EcalRawToDigiPortable@alpaka",
             FEDs = process.hltEcalDigisGPU.FEDs,
@@ -96,7 +643,12 @@ def customizeHLTforAlpakaEcalLocalReco(process):
     return process
 
 def customizeHLTforAlpaka(process):
+
+    process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi")
+    process.load('Configuration.StandardSequences.Accelerators_cff')
+    
     process = customizeHLTforAlpakaEcalLocalReco(process)
+    process = customizeHLTforAlpakaPixelReco(process)
 
     return process
 
diff --git a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py
index 2d457ddb337a0..bebd39c86a65b 100644
--- a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py
+++ b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py
@@ -233,7 +233,6 @@ def customiseForOffline(process):
 
     return process
 
-
 def customizeHLTfor43025(process):
 
     for producer in producers_by_type(process, "PFClusterProducer"):
@@ -270,14 +269,18 @@ def customizeHLTfor43774(process):
                 filt.useAbs = cms.bool(True)
             
     return process
-            
-
 
 # CMSSW version specific customizations
 def customizeHLTforCMSSW(process, menuType="GRun"):
 
     process = customiseForOffline(process)
 
+    # Alpaka HLT
+    from Configuration.ProcessModifiers.alpaka_cff import alpaka 
+    from Configuration.Eras.Modifier_run3_common_cff import run3_common
+    from HLTrigger.Configuration.customizeHLTforAlpaka import customizeHLTforAlpaka
+    (alpaka & run3_common).makeProcessModifier(customizeHLTforAlpaka).apply(process)
+
     # add call to action function in proper order: newest last!
     # process = customiseFor12718(process)
 
diff --git a/HeterogeneousCore/AlpakaCore/python/functions.py b/HeterogeneousCore/AlpakaCore/python/functions.py
new file mode 100644
index 0000000000000..5b79a1b205631
--- /dev/null
+++ b/HeterogeneousCore/AlpakaCore/python/functions.py
@@ -0,0 +1,23 @@
+def makeSerialClone(module, **kwargs):
+    type = module._TypedParameterizable__type
+    if type.endswith('@alpaka'):
+        # alpaka module with automatic backend selection
+        base = type.removesuffix('@alpaka')
+    elif type.startswith('alpaka_serial_sync::'):
+        # alpaka module with explicit serial_sync backend
+        base = type.removeprefix('alpaka_serial_sync::')
+    elif type.startswith('alpaka_cuda_async::'):
+        # alpaka module with explicit cuda_async backend
+        base = type.removeprefix('alpaka_cuda_async::')
+    elif type.startswith('alpaka_rocm_async::'):
+        # alpaka module with explicit rocm_async backend
+        base = type.removeprefix('alpaka_rocm_async::')
+    else:
+        # non-alpaka module
+        raise TypeError('%s is not an alpaka-based module, and cannot be used with makeSerialClone()' % str(module))
+
+    copy = module.clone(**kwargs)
+    copy._TypedParameterizable__type = 'alpaka_serial_sync::' + base
+    if 'alpaka' in copy.parameterNames_():
+        del copy.alpaka
+    return copy
diff --git a/HeterogeneousCore/AlpakaTest/test/writer.py b/HeterogeneousCore/AlpakaTest/test/writer.py
index bd8d2775b31ed..d23ac528629b8 100644
--- a/HeterogeneousCore/AlpakaTest/test/writer.py
+++ b/HeterogeneousCore/AlpakaTest/test/writer.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from HeterogeneousCore.AlpakaCore.functions import *
 
 process = cms.Process('Writer')
 
@@ -31,16 +32,9 @@
 )
 
 # run a second producer explicitly on the cpu
-process.testProducerSerial = cms.EDProducer('alpaka_serial_sync::TestAlpakaProducer',
+process.testProducerSerial = makeSerialClone(process.testProducer,
     size = cms.int32(99)
 )
-# an alternative approach would be to use
-#process.testProducerSerial = cms.EDProducer('TestAlpakaProducer@alpaka',
-#    size = cms.int32(99),
-#    alpaka = cms.untracked.PSet(
-#        backend = cms.untracked.string("serial_sync")
-#    )
-#)
 
 # analyse the second set of products
 process.testAnalyzerSerial = cms.EDAnalyzer('TestAlpakaAnalyzer',
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc
index 820b6b237c7e5..0bfa989c92969 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc
@@ -21,7 +21,8 @@
 // local include(s)
 #include "PixelClusterizerBase.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
+
 template <typename TrackerTraits>
 class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> {
 public:
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc
index ad05ad3ff60c9..423951f4cb74f 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc
@@ -21,8 +21,9 @@
 // local include(s)
 #include "PixelClusterizerBase.h"
 
-// #define EDM_ML_DEBUG
-// #define GPU_DEBUG
+//#define EDM_ML_DEBUG
+//#define GPU_DEBUG
+
 template <typename TrackerTraits>
 class SiPixelDigisClustersFromSoAAlpaka : public edm::global::EDProducer<> {
 public:
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
index 56718b4bdae14..452b0e2097071 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
@@ -33,7 +33,7 @@
 #include "gpuClusterChargeCut.h"
 #include "gpuClustering.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
 
 namespace pixelgpudetails {
 
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
index 06b30da68c8cd..fe9cc260a5853 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
@@ -18,7 +18,7 @@
 #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
 
 struct SiPixelROCsStatusAndMapping;
 class SiPixelGainForHLTonGPU;
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h
index ff885b5bad07f..d1f5509052468 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h
@@ -18,7 +18,7 @@
 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
 
 namespace calibPixel {
   using namespace cms::alpakatools;
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h
index c149707e41d9a..4056090517aee 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h
@@ -10,7 +10,7 @@
 #include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h"
 #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
 
 namespace pixelClustering {
 
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h
index 616ccbd3eb8c7..7da68c7b2f5da 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h
@@ -5,15 +5,16 @@
 #include <cstdint>
 #include <cstdio>
 #include <type_traits>
+
 #include <alpaka/alpaka.hpp>
 
-#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
-#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
 #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
@@ -140,7 +141,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         // find the index of the first pixel not belonging to this module (or invalid)
         auto& msize = alpaka::declareSharedVar<unsigned int, __COUNTER__>(acc);
 
-        const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]);
+        const uint32_t blockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
         if (blockIdx >= clus_view[0].moduleStart())
           return;
 
@@ -274,11 +275,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) <= maxiter);
 
           // NB: can be tuned.
-          constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v<TAcc> ? 1 : 256;
+          constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v<TAcc> ? 256 : 1;
 
 #ifndef NDEBUG
-          [[maybe_unused]] const uint32_t runTimeThreadDimension(
-              alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u]);
+          [[maybe_unused]] const uint32_t runTimeThreadDimension =
+              alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[0u];
           ALPAKA_ASSERT_OFFLOAD(runTimeThreadDimension <= threadDimension);
 #endif
 
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc
index 3e7caf8b2b3a4..88ad79c6af609 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc
@@ -640,15 +640,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         auto moduleStartFirstElement =
             cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u);
         alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement);
-        constexpr auto threadsPerBlockFindClus = 512;
+
+        // TODO
+        // - we are fixing this here since it needs to be needed
+        // at compile time also in the kernel (for_each_element_in_block_strided)
+        // - put maxIter in the Geometry traits
+        constexpr auto threadsOrElementsFindClus = 256;
+
         const auto workDivMaxNumModules =
-            cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockFindClus);
+            cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsOrElementsFindClus);
         // NB: With present FindClus() / chargeCut() algorithm,
         // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance.
         // Though, it does not have to be the same number for CPU/GPU cases.
 
 #ifdef GPU_DEBUG
-        std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus
+        std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsOrElementsFindClus
                   << " threadsPerBlockOrElementsPerThread\n";
 #endif
 
@@ -659,9 +665,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
         alpaka::wait(queue);
 #endif
 
+        constexpr auto threadsPerBlockChargeCut = 256;
+        const auto workDivChargeCut = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockChargeCut);
         // apply charge cut
         alpaka::exec<Acc1D>(queue,
-                            workDivMaxNumModules,
+                            workDivChargeCut,
                             ::pixelClustering::ClusterChargeCut<TrackerTraits>{},
                             digis_d->view(),
                             clusters_d->view(),
@@ -735,7 +743,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
       /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel)
 
-      const auto threadsPerBlockFindClus = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128;
+      const auto threadsPerBlockFindClus = 256;
       const auto workDivMaxNumModules = cms::alpakatools::make_workdiv<Acc1D>(numberOfModules, threadsPerBlockFindClus);
 
 #ifdef GPU_DEBUG
diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py
index 8d78599d07d9c..a6dd2bea80e2a 100644
--- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py
+++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from HeterogeneousCore.AlpakaCore.functions import *
 from Configuration.Eras.Modifier_run3_common_cff import run3_common
 from Configuration.ProcessModifiers.gpu_cff import gpu
 from Configuration.ProcessModifiers.alpaka_cff import alpaka
@@ -130,11 +131,7 @@ def _addProcessCalibTrackerAlpakaES(process):
 ))
 
 # reconstruct the pixel digis and clusters with alpaka on the cpu, for validation
-siPixelClustersPreSplittingAlpakaSerial = siPixelClustersPreSplittingAlpaka.clone(
-    #alpaka = dict( backend = '*' )
-    alpaka = None
-)
-siPixelClustersPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelClustersPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka')
+siPixelClustersPreSplittingAlpakaSerial = makeSerialClone(siPixelClustersPreSplittingAlpaka)
 
 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1
 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu
index 61442ea9d2b8c..b1e5e1c3c90e9 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.cu
@@ -12,7 +12,8 @@
 
 #include "PixelRecHitGPUKernel.h"
 #include "gpuPixelRecHits.h"
-// #define GPU_DEBUG
+
+//#define GPU_DEBUG
 
 namespace {
   template <typename TrackerTraits>
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h
index 25cc724cd4c4a..407a18be04fa9 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHitGPUKernel.h
@@ -10,7 +10,9 @@
 #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h"
 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+
 //#define GPU_DEBUG
+
 namespace pixelgpudetails {
 
   template <typename TrackerTraits>
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h
index 220a91b85ced3..45587034b572b 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h
@@ -19,7 +19,8 @@
 #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
 
-//#define GPU_DEBUG 1
+//#define GPU_DEBUG
+
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
   namespace pixelRecHits {
 
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
index 94ae258cc16fb..55c556bd63048 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
@@ -7,13 +7,14 @@
 
 #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h"
 #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h"
+#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
 #include "DataFormats/Math/interface/approx_atan2.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
-#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
 
-//#define GPU_DEBUG 1
+//#define GPU_DEBUG
+
 namespace gpuPixelRecHits {
 
   template <typename TrackerTraits>
diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py
index e6b2c9832600c..7e8910a8e0918 100644
--- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py
+++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from HeterogeneousCore.AlpakaCore.functions import *
 from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA
 from Configuration.ProcessModifiers.gpu_cff import gpu
 from Configuration.ProcessModifiers.alpaka_cff import alpaka
@@ -139,12 +140,9 @@
 ))
 
 # Hit SoA producer on the cpu, for validation
-siPixelRecHitsPreSplittingAlpakaSerial = siPixelRecHitsPreSplittingAlpaka.clone(
-    src = "siPixelClustersPreSplittingAlpakaSerial",
-    #alpaka = dict( backend = '*' )
-    alpaka = None
+siPixelRecHitsPreSplittingAlpakaSerial = makeSerialClone(siPixelRecHitsPreSplittingAlpaka,
+    src = "siPixelClustersPreSplittingAlpakaSerial"
 )
-siPixelRecHitsPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelRecHitsPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka')
 
 from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1
 from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2
diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc
new file mode 100644
index 0000000000000..9772366c6b22e
--- /dev/null
+++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc
@@ -0,0 +1,822 @@
+/*
+ * \class L2TauTagProducer
+ *
+ * L2Tau identification using Convolutional NN.
+ *
+ * \author Valeria D'Amante, Università di Siena and INFN Pisa
+ *         Konstantin Androsov, EPFL and ETHZ
+*/
+#include <memory>
+#include <boost/property_tree/json_parser.hpp>
+#include <cmath>
+#include "FWCore/Framework/interface/stream/EDProducer.h"
+#include "FWCore/Framework/interface/ESHandle.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "DataFormats/Math/interface/deltaR.h"
+#include "DataFormats/Common/interface/Handle.h"
+#include "FWCore/Utilities/interface/InputTag.h"
+#include "FWCore/Utilities/interface/isFinite.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
+#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h"
+#include "Geometry/CaloGeometry/interface/CaloGeometry.h"
+#include "Geometry/CaloTopology/interface/HcalTopology.h"
+#include "Geometry/Records/interface/CaloGeometryRecord.h"
+#include "DataFormats/CaloRecHit/interface/CaloRecHit.h"
+#include "DataFormats/EcalRecHit/interface/EcalRecHit.h"
+#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h"
+#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h"
+#include "DataFormats/HcalDetId/interface/HcalDetId.h"
+#include "DataFormats/HcalRecHit/interface/HBHERecHit.h"
+#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h"
+#include "DataFormats/HcalRecHit/interface/HFRecHit.h"
+#include "DataFormats/HcalRecHit/interface/HORecHit.h"
+#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h"
+#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h"
+#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h"
+#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h"
+#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h"
+#include "DataFormats/TrackReco/interface/HitPattern.h"
+#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h"
+#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h"
+#include "DataFormats/GeometrySurface/interface/Plane.h"
+#include "DataFormats/BeamSpot/interface/BeamSpot.h"
+#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h"
+#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h"
+
+#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
+#include "DataFormats/TrackSoA/interface/TracksHost.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+
+namespace L2TauTagNNv1 {
+  constexpr int nCellEta = 5;
+  constexpr int nCellPhi = 5;
+  constexpr int nVars = 31;
+  constexpr float dR_max = 0.5;
+  enum class NNInputs {
+    nVertices = 0,
+    l1Tau_pt,
+    l1Tau_eta,
+    l1Tau_hwIso,
+    EcalEnergySum,
+    EcalSize,
+    EcalEnergyStdDev,
+    EcalDeltaEta,
+    EcalDeltaPhi,
+    EcalChi2,
+    EcalEnergySumForPositiveChi2,
+    EcalSizeForPositiveChi2,
+    HcalEnergySum,
+    HcalSize,
+    HcalEnergyStdDev,
+    HcalDeltaEta,
+    HcalDeltaPhi,
+    HcalChi2,
+    HcalEnergySumForPositiveChi2,
+    HcalSizeForPositiveChi2,
+    PatatrackPtSum,
+    PatatrackSize,
+    PatatrackSizeWithVertex,
+    PatatrackPtSumWithVertex,
+    PatatrackChargeSum,
+    PatatrackDeltaEta,
+    PatatrackDeltaPhi,
+    PatatrackChi2OverNdof,
+    PatatrackNdof,
+    PatatrackDxy,
+    PatatrackDz
+  };
+
+  const std::map<NNInputs, std::string> varNameMap = {
+      {NNInputs::nVertices, "nVertices"},
+      {NNInputs::l1Tau_pt, "l1Tau_pt"},
+      {NNInputs::l1Tau_eta, "l1Tau_eta"},
+      {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"},
+      {NNInputs::EcalEnergySum, "EcalEnergySum"},
+      {NNInputs::EcalSize, "EcalSize"},
+      {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"},
+      {NNInputs::EcalDeltaEta, "EcalDeltaEta"},
+      {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"},
+      {NNInputs::EcalChi2, "EcalChi2"},
+      {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"},
+      {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"},
+      {NNInputs::HcalEnergySum, "HcalEnergySum"},
+      {NNInputs::HcalSize, "HcalSize"},
+      {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"},
+      {NNInputs::HcalDeltaEta, "HcalDeltaEta"},
+      {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"},
+      {NNInputs::HcalChi2, "HcalChi2"},
+      {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"},
+      {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"},
+      {NNInputs::PatatrackPtSum, "PatatrackPtSum"},
+      {NNInputs::PatatrackSize, "PatatrackSize"},
+      {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"},
+      {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"},
+      {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"},
+      {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"},
+      {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"},
+      {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"},
+      {NNInputs::PatatrackNdof, "PatatrackNdof"},
+      {NNInputs::PatatrackDxy, "PatatrackDxy"},
+      {NNInputs::PatatrackDz, "PatatrackDz"}};
+}  // namespace L2TauTagNNv1
+namespace {
+  inline float& getCellImpl(
+      tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) {
+    return cellGridMatrix.tensor<float, 4>()(tau_idx, phi_idx, eta_idx, static_cast<int>(NNInput_idx));
+  }
+}  // namespace
+struct normDictElement {
+  float mean;
+  float std;
+  float min;
+  float max;
+};
+
+struct L2TauNNProducerAlpakaCacheData {
+  L2TauNNProducerAlpakaCacheData() : graphDef(nullptr), session(nullptr) {}
+  tensorflow::GraphDef* graphDef;
+  tensorflow::Session* session;
+  std::vector<normDictElement> normVec;
+};
+
+class L2TauNNProducerAlpaka : public edm::stream::EDProducer<edm::GlobalCache<L2TauNNProducerAlpakaCacheData>> {
+public:
+  using TracksHost = pixelTrack::TracksHostPhase1;
+
+  struct caloRecHitCollections {
+    const HBHERecHitCollection* hbhe;
+    const HORecHitCollection* ho;
+    const EcalRecHitCollection* eb;
+    const EcalRecHitCollection* ee;
+    const CaloGeometry* geometry;
+  };
+
+  struct InputDescTau {
+    std::string CollectionName;
+    edm::EDGetTokenT<trigger::TriggerFilterObjectWithRefs> inputToken_;
+  };
+
+  static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max;
+  static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast<float>(L2TauTagNNv1::nCellEta);
+  static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast<float>(L2TauTagNNv1::nCellPhi);
+
+  explicit L2TauNNProducerAlpaka(const edm::ParameterSet&, const L2TauNNProducerAlpakaCacheData*);
+  static void fillDescriptions(edm::ConfigurationDescriptions&);
+  static std::unique_ptr<L2TauNNProducerAlpakaCacheData> initializeGlobalCache(const edm::ParameterSet&);
+  static void globalEndJob(L2TauNNProducerAlpakaCacheData*);
+
+private:
+  void checknan(tensorflow::Tensor& tensor, int debugLevel);
+  void standardizeTensor(tensorflow::Tensor& tensor);
+  std::vector<float> getTauScore(const tensorflow::Tensor& cellGridMatrix);
+  void produce(edm::Event& event, const edm::EventSetup& eventsetup) override;
+  void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector<l1t::TauRef>& allTaus);
+  void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
+                       const std::vector<l1t::TauRef>& allTaus,
+                       const caloRecHitCollections& caloRecHits);
+  void fillPatatracks(tensorflow::Tensor& cellGridMatrix,
+                      const std::vector<l1t::TauRef>& allTaus,
+                      const TracksHost& patatracks_tsoa,
+                      const ZVertexHost& patavtx_soa,
+                      const reco::BeamSpot& beamspot,
+                      const MagneticField* magfi);
+  void selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa,
+                                   const TracksHost& patatracks_tsoa,
+                                   std::vector<int>& trkGood,
+                                   std::vector<int>& vtxGood);
+  std::pair<float, float> impactParameter(int it,
+                                          const TracksHost& patatracks_tsoa,
+                                          float patatrackPhi,
+                                          const reco::BeamSpot& beamspot,
+                                          const MagneticField* magfi);
+  template <typename VPos, typename LVec>
+  std::tuple<float, float, int, int> getEtaPhiIndices(const VPos& position, const LVec& tau_p4);
+  template <typename LVec>
+  std::tuple<float, float, int, int> getEtaPhiIndices(float eta, float phi, const LVec& tau_p4);
+
+private:
+  const int debugLevel_;
+  const edm::EDGetTokenT<trigger::TriggerFilterObjectWithRefs> tauTriggerToken_;
+  std::vector<InputDescTau> L1TauDesc_;
+  const edm::EDGetTokenT<HBHERecHitCollection> hbheToken_;
+  const edm::EDGetTokenT<HORecHitCollection> hoToken_;
+  const edm::EDGetTokenT<EcalRecHitCollection> ebToken_;
+  const edm::EDGetTokenT<EcalRecHitCollection> eeToken_;
+  const edm::ESGetToken<CaloGeometry, CaloGeometryRecord> geometryToken_;
+  const edm::ESGetToken<MagneticField, IdealMagneticFieldRecord> bFieldToken_;
+  const edm::EDGetTokenT<ZVertexHost> pataVerticesToken_;
+  const edm::EDGetTokenT<TracksHost> pataTracksToken_;
+  const edm::EDGetTokenT<reco::BeamSpot> beamSpotToken_;
+  const unsigned int maxVtx_;
+  const float fractionSumPt2_;
+  const float minSumPt2_;
+  const float trackPtMin_;
+  const float trackPtMax_;
+  const float trackChi2Max_;
+  std::string inputTensorName_;
+  std::string outputTensorName_;
+  const L2TauNNProducerAlpakaCacheData* L2cacheData_;
+};
+
+std::unique_ptr<L2TauNNProducerAlpakaCacheData> L2TauNNProducerAlpaka::initializeGlobalCache(
+    const edm::ParameterSet& cfg) {
+  std::unique_ptr<L2TauNNProducerAlpakaCacheData> cacheData = std::make_unique<L2TauNNProducerAlpakaCacheData>();
+  cacheData->normVec.reserve(L2TauTagNNv1::nVars);
+
+  auto const graphPath = edm::FileInPath(cfg.getParameter<std::string>("graphPath")).fullPath();
+
+  cacheData->graphDef = tensorflow::loadGraphDef(graphPath);
+  cacheData->session = tensorflow::createSession(cacheData->graphDef);
+
+  tensorflow::setLogging("2");
+
+  boost::property_tree::ptree loadPtreeRoot;
+  auto const normalizationDict = edm::FileInPath(cfg.getParameter<std::string>("normalizationDict")).fullPath();
+  boost::property_tree::read_json(normalizationDict, loadPtreeRoot);
+  for (const auto& [key, val] : L2TauTagNNv1::varNameMap) {
+    boost::property_tree::ptree var = loadPtreeRoot.get_child(val);
+    normDictElement current_element;
+    current_element.mean = var.get_child("mean").get_value<float>();
+    current_element.std = var.get_child("std").get_value<float>();
+    current_element.min = var.get_child("min").get_value<float>();
+    current_element.max = var.get_child("max").get_value<float>();
+    cacheData->normVec.push_back(current_element);
+  }
+  return cacheData;
+}
+void L2TauNNProducerAlpaka::globalEndJob(L2TauNNProducerAlpakaCacheData* cacheData) {
+  if (cacheData->graphDef != nullptr) {
+    delete cacheData->graphDef;
+  }
+  tensorflow::closeSession(cacheData->session);
+}
+void L2TauNNProducerAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+  edm::ParameterSetDescription desc;
+  desc.add<int>("debugLevel", 0)->setComment("set debug level for printing out info");
+  edm::ParameterSetDescription l1TausPset;
+  l1TausPset.add<std::string>("L1CollectionName", "DoubleTau")->setComment("Name of collections");
+  l1TausPset.add<edm::InputTag>("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR"))
+      ->setComment("Which trigger should the L1 Taus collection pass");
+  edm::ParameterSet l1TausPSetDefault;
+  l1TausPSetDefault.addParameter<std::string>("L1CollectionName", "DoubleTau");
+  l1TausPSetDefault.addParameter<edm::InputTag>("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR"));
+  desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault});
+  desc.add<edm::InputTag>("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection");
+  desc.add<edm::InputTag>("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection");
+  desc.add<edm::InputTag>("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection");
+  desc.add<edm::InputTag>("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection");
+  desc.add<edm::InputTag>("pataVertices", edm::InputTag("hltPixelVerticesSoA"))
+      ->setComment("patatrack vertices collection");
+  desc.add<edm::InputTag>("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection");
+  desc.add<edm::InputTag>("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection");
+  desc.add<uint>("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)");
+  desc.add<double>("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex");
+  desc.add<double>("minSumPt2", 0.)->setComment("min sumPt2");
+  desc.add<double>("track_pt_min", 1.0)->setComment("min track p_T");
+  desc.add<double>("track_pt_max", 10.0)->setComment("max track p_T");
+  desc.add<double>("track_chi2_max", 99999.)->setComment("max track chi2");
+  desc.add<std::string>("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb")
+      ->setComment("path to the saved CNN");
+  desc.add<std::string>("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json")
+      ->setComment("path to the dictionary for variable standardization");
+  descriptions.addWithDefaultLabel(desc);
+}
+
+L2TauNNProducerAlpaka::L2TauNNProducerAlpaka(const edm::ParameterSet& cfg,
+                                             const L2TauNNProducerAlpakaCacheData* cacheData)
+    : debugLevel_(cfg.getParameter<int>("debugLevel")),
+      hbheToken_(consumes<HBHERecHitCollection>(cfg.getParameter<edm::InputTag>("hbheInput"))),
+      hoToken_(consumes<HORecHitCollection>(cfg.getParameter<edm::InputTag>("hoInput"))),
+      ebToken_(consumes<EcalRecHitCollection>(cfg.getParameter<edm::InputTag>("ebInput"))),
+      eeToken_(consumes<EcalRecHitCollection>(cfg.getParameter<edm::InputTag>("eeInput"))),
+      geometryToken_(esConsumes<CaloGeometry, CaloGeometryRecord>()),
+      bFieldToken_(esConsumes<MagneticField, IdealMagneticFieldRecord>()),
+      pataVerticesToken_(consumes(cfg.getParameter<edm::InputTag>("pataVertices"))),
+      pataTracksToken_(consumes(cfg.getParameter<edm::InputTag>("pataTracks"))),
+      beamSpotToken_(consumes<reco::BeamSpot>(cfg.getParameter<edm::InputTag>("BeamSpot"))),
+      maxVtx_(cfg.getParameter<uint>("maxVtx")),
+      fractionSumPt2_(cfg.getParameter<double>("fractionSumPt2")),
+      minSumPt2_(cfg.getParameter<double>("minSumPt2")),
+      trackPtMin_(cfg.getParameter<double>("track_pt_min")),
+      trackPtMax_(cfg.getParameter<double>("track_pt_max")),
+      trackChi2Max_(cfg.getParameter<double>("track_chi2_max")) {
+  if (cacheData->graphDef == nullptr) {
+    throw cms::Exception("InvalidCacheData") << "Invalid Cache Data.";
+  }
+  inputTensorName_ = cacheData->graphDef->node(0).name();
+  outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name();
+  L2cacheData_ = cacheData;
+  std::vector<edm::ParameterSet> L1TauCollections = cfg.getParameter<std::vector<edm::ParameterSet>>("L1Taus");
+  L1TauDesc_.reserve(L1TauCollections.size());
+  for (const auto& l1TauInput : L1TauCollections) {
+    InputDescTau toInsert;
+    toInsert.CollectionName = l1TauInput.getParameter<std::string>("L1CollectionName");
+    toInsert.inputToken_ =
+        consumes<trigger::TriggerFilterObjectWithRefs>(l1TauInput.getParameter<edm::InputTag>("L1TauTrigger"));
+    L1TauDesc_.push_back(toInsert);
+  }
+  for (const auto& desc : L1TauDesc_)
+    produces<std::vector<float>>(desc.CollectionName);
+}
+
+void L2TauNNProducerAlpaka::checknan(tensorflow::Tensor& tensor, int debugLevel) {
+  using NNInputs = L2TauTagNNv1::NNInputs;
+  std::vector<int> tensor_shape(tensor.shape().dims());
+  for (int d = 0; d < tensor.shape().dims(); d++) {
+    tensor_shape.at(d) = tensor.shape().dim_size(d);
+  }
+  if (tensor_shape.size() != 4) {
+    throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!";
+  }
+  for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) {
+    for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) {
+      for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) {
+        for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) {
+          auto getCell = [&](NNInputs input) -> float& {
+            return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input);
+          };
+          auto nonstd_var = getCell(static_cast<NNInputs>(var_idx));
+          if (edm::isNotFinite(nonstd_var)) {
+            edm::LogWarning("InputVar") << "var is nan \nvar name= "
+                                        << L2TauTagNNv1::varNameMap.at(static_cast<L2TauTagNNv1::NNInputs>(var_idx))
+                                        << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx
+                                        << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx;
+            if (debugLevel > 2) {
+              edm::LogWarning("InputVar") << "other vars in same cell \n";
+              if (var_idx + 1 < tensor_shape.at(3))
+                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 1))
+                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 1));
+              if (var_idx + 2 < tensor_shape.at(3))
+                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 2))
+                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 2));
+              if (var_idx + 3 < tensor_shape.at(3))
+                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 3))
+                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 3));
+              if (var_idx + 4 < tensor_shape.at(3))
+                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 4))
+                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 4));
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void L2TauNNProducerAlpaka::standardizeTensor(tensorflow::Tensor& tensor) {
+  using NNInputs = L2TauTagNNv1::NNInputs;
+  std::vector<int> tensor_shape(tensor.shape().dims());
+  for (int d = 0; d < tensor.shape().dims(); d++) {
+    tensor_shape.at(d) = tensor.shape().dim_size(d);
+  }
+  if (tensor_shape.size() != 4) {
+    throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!";
+  }
+  for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) {
+    for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) {
+      for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) {
+        for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) {
+          auto getCell = [&](NNInputs input) -> float& {
+            return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input);
+          };
+          float mean = L2cacheData_->normVec.at(var_idx).mean;
+          float std = L2cacheData_->normVec.at(var_idx).std;
+          float min = L2cacheData_->normVec.at(var_idx).min;
+          float max = L2cacheData_->normVec.at(var_idx).max;
+          float nonstd_var = getCell(static_cast<NNInputs>(var_idx));
+          float std_var = static_cast<float>((nonstd_var - mean) / std);
+          if (std_var > max) {
+            std_var = static_cast<float>(max);
+          } else if (std_var < min) {
+            std_var = static_cast<float>(min);
+          }
+          getCell(static_cast<NNInputs>(var_idx)) = std_var;
+        }
+      }
+    }
+  }
+}
+
+void L2TauNNProducerAlpaka::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector<l1t::TauRef>& allTaus) {
+  using NNInputs = L2TauTagNNv1::NNInputs;
+
+  const int nTaus = allTaus.size();
+  for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) {
+    for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
+      for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
+        auto getCell = [&](NNInputs input) -> float& {
+          return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
+        };
+        getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt();
+        getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta();
+        getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso();
+      }
+    }
+  }
+}
+
+template <typename LVec>
+std::tuple<float, float, int, int> L2TauNNProducerAlpaka::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) {
+  const float deta = eta - tau_p4.eta();
+  const float dphi = reco::deltaPhi(phi, tau_p4.phi());
+  const int eta_idx = static_cast<int>(floor((deta + L2TauTagNNv1::dR_max) / dEta_width));
+  const int phi_idx = static_cast<int>(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width));
+  return std::make_tuple(deta, dphi, eta_idx, phi_idx);
+}
+
+template <typename VPos, typename LVec>
+std::tuple<float, float, int, int> L2TauNNProducerAlpaka::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) {
+  return getEtaPhiIndices(position.eta(), position.phi(), tau_p4);
+}
+
+void L2TauNNProducerAlpaka::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
+                                            const std::vector<l1t::TauRef>& allTaus,
+                                            const caloRecHitCollections& caloRecHits) {
+  using NNInputs = L2TauTagNNv1::NNInputs;
+
+  const int nTaus = allTaus.size();
+  float deta, dphi;
+  int eta_idx = 0;
+  int phi_idx = 0;
+  int tau_idx = 0;
+
+  auto getCell = [&](NNInputs input) -> float& {
+    return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
+  };
+  for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
+    // calorechit_EE
+    for (const auto& caloRecHit_ee : *caloRecHits.ee) {
+      if (caloRecHit_ee.energy() <= 0)
+        continue;
+      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition();
+      const float eeCalEn = caloRecHit_ee.energy();
+      const float eeCalChi2 = caloRecHit_ee.chi2();
+      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
+        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
+        getCell(NNInputs::EcalEnergySum) += eeCalEn;
+        getCell(NNInputs::EcalSize) += 1.;
+        getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn;
+        getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn;
+        getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn;
+        if (eeCalChi2 >= 0) {
+          getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn;
+          getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn;
+          getCell(NNInputs::EcalSizeForPositiveChi2) += 1.;
+        }
+      }
+    }
+
+    // calorechit_EB
+    for (const auto& caloRecHit_eb : *caloRecHits.eb) {
+      if (caloRecHit_eb.energy() <= 0)
+        continue;
+      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition();
+      const float ebCalEn = caloRecHit_eb.energy();
+      const float ebCalChi2 = caloRecHit_eb.chi2();
+      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
+        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
+        getCell(NNInputs::EcalEnergySum) += ebCalEn;
+        getCell(NNInputs::EcalSize) += 1.;
+        getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn;
+        getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn;
+        getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn;
+        if (ebCalChi2 >= 0) {
+          getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn;
+          getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn;
+          getCell(NNInputs::EcalSizeForPositiveChi2) += 1.;
+        }
+      }
+    }
+
+    // calorechit_HBHE
+    for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) {
+      if (caloRecHit_hbhe.energy() <= 0)
+        continue;
+      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition();
+      const float hbheCalEn = caloRecHit_hbhe.energy();
+      const float hbheCalChi2 = caloRecHit_hbhe.chi2();
+      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
+        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
+        getCell(NNInputs::HcalEnergySum) += hbheCalEn;
+        getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn;
+        getCell(NNInputs::HcalSize) += 1.;
+        getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn;
+        getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn;
+        if (hbheCalChi2 >= 0) {
+          getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn;
+          getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn;
+          getCell(NNInputs::HcalSizeForPositiveChi2) += 1.;
+        }
+      }
+    }
+
+    // calorechit_HO
+    for (const auto& caloRecHit_ho : *caloRecHits.ho) {
+      if (caloRecHit_ho.energy() <= 0)
+        continue;
+      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition();
+      const float hoCalEn = caloRecHit_ho.energy();
+      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
+        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
+        getCell(NNInputs::HcalEnergySum) += hoCalEn;
+        getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn;
+        getCell(NNInputs::HcalSize) += 1.;
+        getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn;
+        getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn;
+      }
+    }
+
+    // normalize to sum and define stdDev
+    for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
+      for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
+        /* normalize eCal vars*/
+        if (getCell(NNInputs::EcalEnergySum) > 0.) {
+          getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum);
+          getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum);
+        }
+        if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) {
+          getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2);
+        }
+        if (getCell(NNInputs::EcalSize) > 1.) {
+          // (stdDev - (enSum*enSum)/size) / (size-1)
+          getCell(NNInputs::EcalEnergyStdDev) =
+              (getCell(NNInputs::EcalEnergyStdDev) -
+               (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) /
+              (getCell(NNInputs::EcalSize) - 1);
+        } else {
+          getCell(NNInputs::EcalEnergyStdDev) = 0.;
+        }
+        /* normalize hCal Vars */
+        if (getCell(NNInputs::HcalEnergySum) > 0.) {
+          getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum);
+          getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum);
+        }
+        if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) {
+          getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2);
+        }
+        if (getCell(NNInputs::HcalSize) > 1.) {
+          // (stdDev - (enSum*enSum)/size) / (size-1)
+          getCell(NNInputs::HcalEnergyStdDev) =
+              (getCell(NNInputs::HcalEnergyStdDev) -
+               (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) /
+              (getCell(NNInputs::HcalSize) - 1);
+        } else {
+          getCell(NNInputs::HcalEnergyStdDev) = 0.;
+        }
+      }
+    }
+  }
+}
+
+void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patavtx_soa,
+                                                        const TracksHost& patatracks_tsoa,
+                                                        std::vector<int>& trkGood,
+                                                        std::vector<int>& vtxGood) {
+  using patatrackHelpers = TracksUtilities<pixelTopology::Phase1>;
+  const auto maxTracks = patatracks_tsoa.view().metadata().size();
+  const int nv = patavtx_soa.view().nvFinal();
+  trkGood.clear();
+  trkGood.reserve(maxTracks);
+  vtxGood.clear();
+  vtxGood.reserve(nv);
+  auto const* quality = patatracks_tsoa.view().quality();
+
+  // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum).
+  std::vector<float> pTSquaredSum(nv, 0);
+  std::vector<int> nTrkAssociated(nv, 0);
+
+  for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) {
+    auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx);
+    if (nHits == 0) {
+      break;
+    }
+    int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv();
+    if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) {
+      auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt();
+      ++nTrkAssociated[vtx_ass_to_track];
+      if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) {
+        patatrackPt = std::min(patatrackPt, trackPtMax_);
+        pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt;
+      }
+    }
+    if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) {
+      trkGood.push_back(trk_idx);
+    }
+  }
+  if (nv > 0) {
+    const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_;
+    for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) {
+      auto vtx_idx = patavtx_soa.view()[j].sortInd();
+      assert(vtx_idx < nv);
+      if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
+          pTSquaredSum[vtx_idx] > minSumPt2_) {
+        vtxGood.push_back(vtx_idx);
+      }
+    }
+  }
+}
+
+std::pair<float, float> L2TauNNProducerAlpaka::impactParameter(int it,
+                                                               const TracksHost& patatracks_tsoa,
+                                                               float patatrackPhi,
+                                                               const reco::BeamSpot& beamspot,
+                                                               const MagneticField* magfi) {
+  /* dxy and dz */
+  riemannFit::Vector5d ipar, opar;
+  riemannFit::Matrix5d icov, ocov;
+  TracksUtilities<pixelTopology::Phase1>::copyToDense(patatracks_tsoa.view(), ipar, icov, it);
+  riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov);
+  LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.);
+  float sp = std::sin(patatrackPhi);
+  float cp = std::cos(patatrackPhi);
+  Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0);
+  GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0());
+  Plane impPointPlane(BeamSpotPoint, Rotation);
+  GlobalTrajectoryParameters gp(
+      impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi);
+  GlobalPoint vv = gp.position();
+  math::XYZPoint pos(vv.x(), vv.y(), vv.z());
+  GlobalVector pp = gp.momentum();
+  math::XYZVector mom(pp.x(), pp.y(), pp.z());
+  auto lambda = M_PI_2 - pp.theta();
+  auto phi = pp.phi();
+  float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi);
+  float patatrackDz =
+      (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) /
+      std::cos(lambda);
+  return std::make_pair(patatrackDxy, patatrackDz);
+}
+
+void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
+                                           const std::vector<l1t::TauRef>& allTaus,
+                                           const TracksHost& patatracks_tsoa,
+                                           const ZVertexHost& patavtx_soa,
+                                           const reco::BeamSpot& beamspot,
+                                           const MagneticField* magfi) {
+  using NNInputs = L2TauTagNNv1::NNInputs;
+  using patatrackHelpers = TracksUtilities<pixelTopology::Phase1>;
+  float deta, dphi;
+  int eta_idx = 0;
+  int phi_idx = 0;
+  int tau_idx = 0;
+
+  auto getCell = [&](NNInputs input) -> float& {
+    return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
+  };
+
+  std::vector<int> trkGood;
+  std::vector<int> vtxGood;
+
+  selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood);
+
+  const int nTaus = allTaus.size();
+  for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
+    const float tauEta = allTaus[tau_idx]->eta();
+    const float tauPhi = allTaus[tau_idx]->phi();
+
+    for (const auto it : trkGood) {
+      const float patatrackPt = patatracks_tsoa.const_view()[it].pt();
+      if (patatrackPt <= 0)
+        continue;
+      const float patatrackPhi = reco::phi(patatracks_tsoa.const_view(), it);
+      const float patatrackEta = patatracks_tsoa.const_view()[it].eta();
+      const float patatrackCharge = reco::charge(patatracks_tsoa.const_view(), it);
+      const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2();
+      const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it);
+      if (nHits <= 0)
+        continue;
+      const int patatrackNdof = 2 * std::min(6, nHits) - 5;
+
+      const int vtx_idx_assTrk = patavtx_soa.view()[it].idv();
+      if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) {
+        std::tie(deta, dphi, eta_idx, phi_idx) =
+            getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4());
+        getCell(NNInputs::PatatrackPtSum) += patatrackPt;
+        getCell(NNInputs::PatatrackSize) += 1.;
+        getCell(NNInputs::PatatrackChargeSum) += patatrackCharge;
+        getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt;
+        getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt;
+        getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt;
+        getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt;
+        std::pair<float, float> impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi);
+        getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt;
+        getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt;
+        if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) {
+          getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt;
+          getCell(NNInputs::PatatrackSizeWithVertex) += 1.;
+        }
+      }
+    }
+
+    // normalize to sum and define stdDev
+    for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
+      for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
+        getCell(NNInputs::nVertices) = vtxGood.size();
+        if (getCell(NNInputs::PatatrackPtSum) > 0.) {
+          getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum);
+          getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum);
+          getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum);
+          getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum);
+          getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum);
+          getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum);
+        }
+      }
+    }
+  }
+}
+
+std::vector<float> L2TauNNProducerAlpaka::getTauScore(const tensorflow::Tensor& cellGridMatrix) {
+  std::vector<tensorflow::Tensor> pred_tensor;
+  tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor);
+  const int nTau = cellGridMatrix.shape().dim_size(0);
+  std::vector<float> pred_vector(nTau);
+  for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) {
+    pred_vector[tau_idx] = pred_tensor[0].matrix<float>()(tau_idx, 0);
+  }
+
+  return pred_vector;
+}
+
+void L2TauNNProducerAlpaka::produce(edm::Event& event, const edm::EventSetup& eventsetup) {
+  std::vector<std::vector<size_t>> TauCollectionMap(L1TauDesc_.size());
+  l1t::TauVectorRef allTaus;
+
+  for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) {
+    l1t::TauVectorRef l1Taus;
+    auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_);
+    l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus);
+    TauCollectionMap.at(inp_idx).resize(l1Taus.size());
+
+    for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) {
+      size_t tau_idx;
+      const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]);
+      if (iter != allTaus.end()) {
+        tau_idx = std::distance(allTaus.begin(), iter);
+      } else {
+        allTaus.push_back(l1Taus[l1_idx]);
+        tau_idx = allTaus.size() - 1;
+      }
+      TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx;
+    }
+  }
+  const auto ebCal = event.getHandle(ebToken_);
+  const auto eeCal = event.getHandle(eeToken_);
+  const auto hbhe = event.getHandle(hbheToken_);
+  const auto ho = event.getHandle(hoToken_);
+  auto const& patatracks_SoA = event.get(pataTracksToken_);
+  auto const& vertices_SoA = event.get(pataVerticesToken_);
+  const auto bsHandle = event.getHandle(beamSpotToken_);
+
+  auto const fieldESH = eventsetup.getHandle(bFieldToken_);
+  auto const geometry = eventsetup.getHandle(geometryToken_);
+
+  caloRecHitCollections caloRecHits;
+  caloRecHits.hbhe = &*hbhe;
+  caloRecHits.ho = &*ho;
+  caloRecHits.eb = &*ebCal;
+  caloRecHits.ee = &*eeCal;
+  caloRecHits.geometry = &*geometry;
+
+  const int nTaus = allTaus.size();
+  tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT,
+                                    {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars});
+  const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars;
+  for (int input_idx = 0; input_idx < n_inputs; ++input_idx) {
+    cellGridMatrix.flat<float>()(input_idx) = 0;
+  }
+  fillL1TauVars(cellGridMatrix, allTaus);
+
+  fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits);
+
+  fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product());
+
+  standardizeTensor(cellGridMatrix);
+
+  if (debugLevel_ > 0) {
+    checknan(cellGridMatrix, debugLevel_);
+  }
+
+  std::vector<float> tau_score = getTauScore(cellGridMatrix);
+
+  for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) {
+    const size_t nTau = TauCollectionMap[inp_idx].size();
+    auto tau_tags = std::make_unique<std::vector<float>>(nTau);
+    for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) {
+      const auto tau_idx = TauCollectionMap[inp_idx][tau_pos];
+      if (debugLevel_ > 0) {
+        edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t "
+                                  << tau_score.at(tau_idx) << std::endl;
+      }
+      (*tau_tags)[tau_pos] = tau_score.at(tau_idx);
+    }
+    event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName);
+  }
+}
+//define this as a plug-in
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(L2TauNNProducerAlpaka);
diff --git a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
index 895ba32eca71a..f5ba3ad7df1da 100644
--- a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
+++ b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from HeterogeneousCore.AlpakaCore.functions import *
 from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA
 
 from RecoTracker.PixelTrackFitting.PixelTracks_cff import *
@@ -110,11 +111,9 @@
 alpaka.toReplaceWith(pixelVertices, _pixelVertexFromSoAAlpaka.clone())
 
 # pixel vertex SoA producer with alpaka on the cpu, for validation
-pixelVerticesAlpakaSerial = pixelVerticesAlpaka.clone(
-    pixelTrackSrc = 'pixelTracksAlpakaSerial',
-    alpaka = None
+pixelVerticesAlpakaSerial = makeSerialClone(pixelVerticesAlpaka,
+    pixelTrackSrc = 'pixelTracksAlpakaSerial'
 )
-pixelVerticesAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + pixelVerticesAlpaka._TypedParameterizable__type.removesuffix('@alpaka')
 
 alpaka.toReplaceWith(pixelVerticesTask, cms.Task(
     # Build the pixel vertices in SoA format with alpaka on the device
diff --git a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py
index 3d121a8736f8e..55a02f83f913c 100644
--- a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py
+++ b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py
@@ -3,10 +3,12 @@
 # Customise the Pixel-only reconstruction to run on GPU
 #
 # Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU.
+# CUDA and Alpaka co-living here for the moment
+
 def customizePixelOnlyForProfilingGPUOnly(process):
 
   process.consumer = cms.EDAnalyzer("GenericConsumer",
-      eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA')
+      eventProducts = cms.untracked.vstring('pixelTracksCUDA', 'pixelVerticesCUDA', '*DeviceProduct_pixelTracksAlpaka_*_*', '*DeviceProduct_pixelVerticesAlpaka_*_*')
   )
 
   process.consume_step = cms.EndPath(process.consumer)
@@ -25,10 +27,8 @@ def customizePixelOnlyForProfilingGPUOnly(process):
 # tracks and vertices on the CPU in SoA format, without conversion to legacy format.
 def customizePixelOnlyForProfilingGPUWithHostCopy(process):
 
-  #? process.siPixelRecHitSoAFromLegacy.convertToLegacy = False
-
   process.consumer = cms.EDAnalyzer("GenericConsumer",
-      eventProducts = cms.untracked.vstring('pixelTracksSoA', 'pixelVerticesSoA')
+      eventProducts = cms.untracked.vstring('pixelTracksSoA', 'pixelVerticesSoA', 'pixelTracksAlpaka', 'pixelVerticesAlpaka')
   )
 
   process.consume_step = cms.EndPath(process.consumer)
diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu
index efb2a2e17715c..6e07126e9e428 100644
--- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu
+++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.cu
@@ -1,8 +1,9 @@
-#include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h"
 #include <mutex>
 
-// #define NTUPLE_DEBUG
-// #define GPU_DEBUG
+#include "RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h"
+
+//#define GPU_DEBUG
+//#define NTUPLE_DEBUG
 
 template <typename TrackerTraits>
 void CAHitNtupletGeneratorKernelsGPU<TrackerTraits>::launchKernels(const HitsConstView &hh,
diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h
index 0865fa5cbc46a..250aef21c1d6a 100644
--- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h
+++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernels.h
@@ -1,18 +1,17 @@
 #ifndef RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h
 #define RecoTracker_PixelSeeding_plugins_CAHitNtupletGeneratorKernels_h
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
+//#define DUMP_GPU_TK_TUPLES
 
-#include "GPUCACell.h"
-#include "gpuPixelDoublets.h"
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
 #include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
-#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h"
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
 #include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitSoADevice.h"
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
 
-// #define DUMP_GPU_TK_TUPLES
+#include "GPUCACell.h"
+#include "gpuPixelDoublets.h"
 
 namespace caHitNtupletGenerator {
 
diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc
index 6acff4abbd531..64148d5f5ba81 100644
--- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc
+++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsAlloc.cc
@@ -2,7 +2,8 @@
 
 #include "CAHitNtupletGeneratorKernels.h"
 
-// #define GPU_DEBUG
+//#define GPU_DEBUG
+
 template <typename TrackerTraits>
 #ifdef __CUDACC__
 void CAHitNtupletGeneratorKernelsGPU<TrackerTraits>::allocateOnGPU(int32_t nHits, cudaStream_t stream) {
diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h
index 540c0b92f9015..57e4ea6f9441f 100644
--- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h
+++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorKernelsImpl.h
@@ -2,8 +2,8 @@
 // Original Author: Felice Pantaleo, CERN
 //
 
-// #define NTUPLE_DEBUG
-// #define GPU_DEBUG
+//#define NTUPLE_DEBUG
+//#define GPU_DEBUG
 
 #include <cmath>
 #include <cstdint>
@@ -11,15 +11,14 @@
 
 #include <cuda_runtime.h>
 
+#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
+#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"
 
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
-
-#include "CAStructures.h"
 #include "CAHitNtupletGeneratorKernels.h"
+#include "CAStructures.h"
 #include "GPUCACell.h"
 #include "gpuFishbone.h"
 #include "gpuPixelDoublets.h"
diff --git a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc
index faf0bae6fb0a9..5100cf734142c 100644
--- a/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc
+++ b/RecoTracker/PixelSeeding/plugins/CAHitNtupletGeneratorOnGPU.cc
@@ -2,8 +2,8 @@
 // Original Author: Felice Pantaleo, CERN
 //
 
-// #define GPU_DEBUG
-// #define DUMP_GPU_TK_TUPLES
+//#define GPU_DEBUG
+//#define DUMP_GPU_TK_TUPLES
 
 #include <array>
 #include <cassert>
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc
index d003fd97d105a..8f898872a66f4 100644
--- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc
@@ -2,8 +2,8 @@
 // Original Author: Felice Pantaleo, CERN
 //
 
-#define GPU_DEBUG
-// #define DUMP_GPU_TK_TUPLES
+//#define GPU_DEBUG
+//#define DUMP_GPU_TK_TUPLES
 
 #include <array>
 #include <cassert>
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc
index 9209ab87c9cbc..44e3295bdb606 100644
--- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc
@@ -11,8 +11,8 @@
 #include <mutex>
 #endif
 
-#define NTUPLE_DEBUG
-#define GPU_DEBUG
+//#define GPU_DEBUG
+//#define NTUPLE_DEBUG
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h
index 0b8a11c63404c..d55be09e6e497 100644
--- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h
@@ -1,23 +1,25 @@
 #ifndef RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h
 #define RecoPixelVertexing_PixelTriplets_CAHitNtupletGeneratorKernels_h
 
-// #define GPU_DEBUG
-#include <alpaka/alpaka.hpp>
-#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+//#define GPU_DEBUG
+//#define DUMP_GPU_TK_TUPLES
+
 #include <cstdint>
-#include "CACell.h"
-#include "CAPixelDoublets.h"
-#include "CAStructures.h"
 
-#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
+#include <alpaka/alpaka.hpp>
+
 #include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
 #include "DataFormats/TrackSoA/interface/TracksHost.h"
+#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h"
 #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/AtomicPairCounter.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
 #include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
 
-// #define DUMP_GPU_TK_TUPLES
+#include "CACell.h"
+#include "CAPixelDoublets.h"
+#include "CAStructures.h"
 
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
   namespace caHitNtupletGenerator {
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h
index ff70abcad2b41..b809caa2e5736 100644
--- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h
@@ -2,8 +2,8 @@
 // Original Author: Felice Pantaleo, CERN
 //
 
-// #define NTUPLE_DEBUG
-// #define GPU_DEBUG
+//#define GPU_DEBUG
+//#define NTUPLE_DEBUG
 
 #include <alpaka/alpaka.hpp>
 #include <cmath>
@@ -250,7 +250,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           float mc = maxScore;
           uint16_t im = tkNotFound;
 
-          auto score = [&](auto it) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
+          auto score = [&](auto it) { return std::abs(reco::tip(tracks_view, it)); };
 
           // full crazy combinatorics
           int ntr = thisCell.tracks().size();
@@ -753,7 +753,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           if (hitToTuple.size(idx) < 2)
             continue;
 
-          auto score = [&](auto it, auto nl) { return std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)); };
+          auto score = [&](auto it, auto nl) { return std::abs(reco::tip(tracks_view, it)); };
 
           // full combinatorics
           for (auto ip = hitToTuple.begin(idx); ip < hitToTuple.end(idx) - 1; ++ip) {
@@ -874,7 +874,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) {
             if (tracks_view[*it].quality() <= good)
               continue;
-            onlyTriplets &= TracksUtilities<TrackerTraits>::isTriplet(tracks_view, *it);
+            onlyTriplets &= reco::isTriplet(tracks_view, *it);
             if (!onlyTriplets)
               break;
           }
@@ -886,9 +886,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           // for triplets choose best tip!  (should we first find best quality???)
           for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
             auto const it = *ip;
-            if (tracks_view[it].quality() >= good &&
-                std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
-              mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
+            if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
+              mc = std::abs(reco::tip(tracks_view, it));
               im = it;
             }
           }
@@ -933,9 +932,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           // choose best tip!  (should we first find best quality???)
           for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
             auto const it = *ip;
-            if (tracks_view[it].quality() >= good &&
-                std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it)) < mc) {
-              mc = std::abs(TracksUtilities<TrackerTraits>::tip(tracks_view, it));
+            if (tracks_view[it].quality() >= good && std::abs(reco::tip(tracks_view, it)) < mc) {
+              mc = std::abs(reco::tip(tracks_view, it));
               im = it;
             }
           }
@@ -946,8 +944,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           // mark worse ambiguities
           for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) {
             auto const it = *ip;
-            if (tracks_view[it].quality() > reject && TracksUtilities<TrackerTraits>::isTriplet(tracks_view, it) &&
-                it != im)
+            if (tracks_view[it].quality() > reject && reco::isTriplet(tracks_view, it) && it != im)
               tracks_view[it].quality() = reject;  //no race:  simple assignment of the same constant
           }
 
@@ -980,12 +977,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
                  nh,
                  tracks_view[i].nLayers(),
                  reco::charge(tracks_view, i),
-                 //TracksUtilities<TrackerTraits>::charge(tracks_view, i),
                  tracks_view[i].pt(),
                  tracks_view[i].eta(),
-                 TracksUtilities<TrackerTraits>::phi(tracks_view, i),
-                 TracksUtilities<TrackerTraits>::tip(tracks_view, i),
-                 TracksUtilities<TrackerTraits>::zip(tracks_view, i),
+                 reco::phi(tracks_view, i),
+                 reco::tip(tracks_view, i),
+                 reco::zip(tracks_view, i),
                  tracks_view[i].chi2(),
                  hh[*tracks_view.hitIndices().begin(i)].zGlobal(),
                  hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(),
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h
index f6373893c8e88..234b9b7527a3c 100644
--- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h
@@ -6,18 +6,22 @@
 #include <cstdint>
 #include <cstdio>
 #include <limits>
+
 #include <alpaka/alpaka.hpp>
-#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
-#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
-#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h"
-#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
+
 #include "DataFormats/Math/interface/approx_atan2.h"
+#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
 #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "CAStructures.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
+
 #include "CACell.h"
+#include "CAStructures.h"
 
-#define GPU_DEBUG
+//#define GPU_DEBUG
 //#define NTUPLE_DEBUG
+
 namespace ALPAKA_ACCELERATOR_NAMESPACE {
   namespace caPixelDoublets {
     using namespace cms::alpakatools;
@@ -187,8 +191,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
       const auto& [firstElementIdxNoStrideY, endElementIdxNoStrideY] =
           cms::alpakatools::element_index_range_in_grid(acc, 0u, dimIndexY);
       uint32_t firstElementIdxY = firstElementIdxNoStrideY;
+      uint32_t endElementIdxY = endElementIdxNoStrideY;
+
+      //const uint32_t incY = cms::alpakatools::requires_single_thread_per_block_v<TAcc> ? 1 : gridDimensionY;
+      for (uint32_t j = firstElementIdxY; j < ntot; j++) {
+        if (not cms::alpakatools::next_valid_element_index_strided(
+                j, firstElementIdxY, endElementIdxY, gridDimensionY, ntot))
+          break;
 
-      for (uint32_t j = firstElementIdxY; j < ntot; j += gridDimensionY) {
         while (j >= innerLayerCumulativeSize[pairLayerId++])
           ;
         --pairLayerId;  // move to lower_bound ??
@@ -269,7 +279,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
 
           // Here we parallelize in X
           uint32_t firstElementIdxX = firstElementIdxNoStrideX;
-          for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; pIndex += blockDimensionX) {
+          uint32_t endElementIdxX = endElementIdxNoStrideX;
+
+          for (uint32_t pIndex = firstElementIdxX; pIndex < maxpIndex; ++pIndex) {
+            if (not cms::alpakatools::next_valid_element_index_strided(
+                    pIndex, firstElementIdxX, endElementIdxX, blockDimensionX, maxpIndex))
+              break;
             auto oi = p[pIndex];  // auto oi = __ldg(p); is not allowed since __ldg is device-only
             ALPAKA_ASSERT_OFFLOAD(oi >= offsets[outer]);
             ALPAKA_ASSERT_OFFLOAD(oi < offsets[outer + 1]);
diff --git a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h
index ac5975abb2dd5..583021081d534 100644
--- a/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h
+++ b/RecoTracker/PixelSeeding/plugins/gpuPixelDoubletsAlgos.h
@@ -9,15 +9,15 @@
 
 #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHitsUtilities.h"
 #include "DataFormats/Math/interface/approx_atan2.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
 
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
 #include "CAStructures.h"
 #include "GPUCACell.h"
 
-// #define GPU_DEBUG
-// #define NTUPLE_DEBUG
+//#define GPU_DEBUG
+//#define NTUPLE_DEBUG
 
 namespace gpuPixelDoublets {
 
diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc
index e3acdd4c48938..4402a1891b2a4 100644
--- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc
+++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc
@@ -35,7 +35,8 @@
  * This class creates "legacy" reco::Track
  * objects from the output of SoA CA.
  */
-#define GPU_DEBUG
+
+//#define GPU_DEBUG
 
 template <typename TrackerTraits>
 class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> {
@@ -198,7 +199,7 @@ void PixelTrackProducerFromSoAAlpaka<TrackerTraits>::produce(edm::StreamID strea
     // mind: this values are respect the beamspot!
 
     float chi2 = tsoa.view()[it].chi2();
-    float phi = tracksHelpers::phi(tsoa.view(), it);
+    float phi = reco::phi(tsoa.view(), it);
 
     riemannFit::Vector5d ipar, opar;
     riemannFit::Matrix5d icov, ocov;
@@ -246,7 +247,7 @@ void PixelTrackProducerFromSoAAlpaka<TrackerTraits>::produce(edm::StreamID strea
     tracks.emplace_back(track.release(), hits);
   }
 #ifdef GPU_DEBUG
-  std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl;
+  std::cout << "processed " << nt << " good tuples " << tracks.size() << " out of " << indToEdm.size() << std::endl;
 #endif
   // store tracks
   storeTracks(iEvent, tracks, httopo);
diff --git a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py
index aa2fb861de818..046caa0b033f3 100644
--- a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py
+++ b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py
@@ -1,4 +1,5 @@
 import FWCore.ParameterSet.Config as cms
+from HeterogeneousCore.AlpakaCore.functions import *
 from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA
 
 from RecoLocalTracker.SiStripRecHitConverter.StripCPEfromTrackAngle_cfi import *
@@ -218,11 +219,9 @@
 phase2_tracker.toReplaceWith(pixelTracksAlpaka,_pixelTracksAlpakaPhase2.clone())
 
 # pixel tracks SoA producer on the cpu, for validation
-pixelTracksAlpakaSerial = pixelTracksAlpaka.clone(
-    pixelRecHitSrc = 'siPixelRecHitsPreSplittingAlpakaSerial',
-    alpaka = None
+pixelTracksAlpakaSerial = makeSerialClone(pixelTracksAlpaka,
+    pixelRecHitSrc = 'siPixelRecHitsPreSplittingAlpakaSerial'
 )
-pixelTracksAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + pixelTracksAlpaka._TypedParameterizable__type.removesuffix('@alpaka')
 
 # legacy pixel tracks from SoA
 from  RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase1_cfi import pixelTrackProducerFromSoAAlpakaPhase1 as _pixelTrackProducerFromSoAAlpakaPhase1
diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc
index 83bc8f0d84ec2..c40d9adda93c5 100644
--- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc
+++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc
@@ -46,7 +46,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           // initialize soa...
           soa[idx].idv() = -1;
 
-          if (helper::isTriplet(tracks_view, idx))
+          if (reco::isTriplet(tracks_view, idx))
             continue;  // no triplets
           if (quality[idx] < ::pixelTrack::Quality::highPurity)
             continue;
@@ -62,7 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
           auto& data = pws;
           auto it = alpaka::atomicAdd(acc, &data.ntrks(), 1u, alpaka::hierarchy::Blocks{});
           data[it].itrk() = idx;
-          data[it].zt() = helper::zip(tracks_view, idx);
+          data[it].zt() = reco::zip(tracks_view, idx);
           data[it].ezt2() = tracks_view[idx].covariance()(14);
           data[it].ptt2() = pt * pt;
         }