cms-patatrack · fwyzard · Oct 20, 2020 · Oct 16, 2020 · Oct 16, 2020 · Oct 16, 2020
diff --git a/CondFormats/HcalObjects/interface/HcalCondObjectContainer.h b/CondFormats/HcalObjects/interface/HcalCondObjectContainer.h
@@ -64,7 +64,7 @@ class HcalCondObjectContainer : public HcalCondObjectContainerBase {
   const Item* getValues(DetId fId, bool throwOnFail = true) const;
 
   // does the object exist ?
-  const bool exists(DetId fId) const;
+  bool exists(DetId fId) const;
 
   // set the object/fill it in:
   bool addValues(const Item& myItem);
@@ -220,7 +220,7 @@ const Item* HcalCondObjectContainer<Item>::getValues(DetId fId, bool throwOnFail
 }
 
 template <class Item>
-const bool HcalCondObjectContainer<Item>::exists(DetId fId) const {
+bool HcalCondObjectContainer<Item>::exists(DetId fId) const {
   const Item* cell = getValues(fId, false);
 
   if (cell) {

diff --git a/...ormats/Math/interface/EigenComputations.h → ...loRecHit/interface/MultifitComputations.h b/...ormats/Math/interface/EigenComputations.h → ...loRecHit/interface/MultifitComputations.h
@@ -1,5 +1,5 @@
-#ifndef DataFormats_Math_interface_EigenComputations_h
-#define DataFormats_Math_interface_EigenComputations_h
+#ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
+#define DataFormats_CaloRecHit_interface_MultifitComputations_h
 
 #include <cmath>
 #include <limits>
@@ -32,16 +32,16 @@ namespace calo {
       static constexpr int stride = Stride;
       T* data;
 
-      __forceinline__ __device__ MapSymM(T* data) : data{data} {}
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapSymM(T* data) : data{data} {}
 
-      __forceinline__ __device__ T const& operator()(int const row, int const col) const {
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC T const& operator()(int const row, int const col) const {
         auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
         auto const index = total - tmp + row - col;
         return data[index];
       }
 
       template <typename U = T>
-      __forceinline__ __device__ typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
       operator()(int const row, int const col) {
         auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
         auto const index = total - tmp + row - col;
@@ -58,17 +58,17 @@ namespace calo {
       using base_type = typename std::remove_cv<type>::type;
 
       type* data;
-      __forceinline__ __device__ MapMForPM(type* data) : data{data} {}
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapMForPM(type* data) : data{data} {}
 
-      __forceinline__ __device__ base_type operator()(int const row, int const col) const {
+      EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC base_type operator()(int const row, int const col) const {
         auto const index = 2 - col + row;
         return index >= 0 ? data[index] : 0;
       }
     };
 
     // simple/trivial cholesky decomposition impl
     template <typename MatrixType1, typename MatrixType2>
-    __forceinline__ __device__ void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
       auto const sqrtm_0_0 = std::sqrt(M(0, 0));
       L(0, 0) = sqrtm_0_0;
       using T = typename MatrixType1::base_type;
@@ -94,7 +94,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2>
-    __forceinline__ __device__ void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
       auto const sqrtm_0_0 = std::sqrt(M(0, 0));
       L(0, 0) = sqrtm_0_0;
       using T = typename MatrixType1::base_type;
@@ -119,7 +119,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename VectorType>
-    __forceinline__ __device__ void compute_decomposition_forwardsubst_with_offsets(
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(
         MatrixType1& L,
         MatrixType2 const& M,
         float b[MatrixType1::stride],
@@ -158,7 +158,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename VectorType>
-    __forceinline__ __device__ void update_decomposition_forwardsubst_with_offsets(
+    EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(
         MatrixType1& L,
         MatrixType2 const& M,
         float b[MatrixType1::stride],
@@ -190,7 +190,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
-    __device__ void solve_forward_subst_matrix(MatrixType1& A,
+    EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1& A,
                                                MatrixType2 const& pulseMatrixView,
                                                MatrixType3 const& matrixL) {
       // FIXME: this assumes pulses are on columns and samples on rows
@@ -205,7 +205,12 @@ namespace calo {
 // preload a column and load column 0 of cholesky
 #pragma unroll
         for (int i = 0; i < NSAMPLES; i++) {
+#ifdef __CUDA_ARCH__
+          // load through the read-only cache
           reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
+#else
+          reg_b[i] = pulseMatrixView.coeffRef(i, icol);
+#endif  // __CUDA_ARCH__
           reg_L[i] = matrixL(i, 0);
         }
 
@@ -236,7 +241,7 @@ namespace calo {
     }
 
     template <typename MatrixType1, typename MatrixType2>
-    __device__ void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
+    EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
                                                MatrixType1 inputAmplitudesView,
                                                MatrixType2 matrixL) {
       constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
@@ -276,24 +281,24 @@ namespace calo {
       }
     }
 
-    /*
     // TODO: add active bxs
     template <typename MatrixType, typename VectorType>
-    __device__ void fnnls(MatrixType const& AtA,
+    EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,
                           VectorType const& Atb,
                           VectorType& solution,
                           int& npassive,
                           ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
                           MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
-                          double const eps,
-                          int const maxIterations) {
+                          double eps,                    // convergence condition
+                          const int maxIterations,       // maximum number of iterations
+                          const int relaxationPeriod,    // every "relaxationPeriod" iterations
+                          const int relaxationFactor) {  // multiply "eps" by "relaxationFactor"
       // constants
       constexpr auto NPULSES = VectorType::RowsAtCompileTime;
 
       // to keep track of where to terminate if converged
       Eigen::Index w_max_idx_prev = 0;
       float w_max_prev = 0;
-      auto eps_to_use = eps;
       bool recompute = false;
 
       // used throughout
@@ -331,7 +336,7 @@ namespace calo {
           }
 
           // check for convergence
-          if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
+          if (w_max < eps || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
             break;
 
           if (iter >= maxIterations)
@@ -428,13 +433,12 @@ namespace calo {
 
         // as in cpu
         ++iter;
-        if (iter % 16 == 0)
-          eps_to_use *= 2;
+        if (iter % relaxationPeriod == 0)
+          eps *= relaxationFactor;
       }
     }
-    */
 
   }  // namespace multifit
 }  // namespace calo
 
-#endif  // DataFormats_Math_interface_EigenComputations_h
+#endif  // DataFormats_CaloRecHit_interface_MultifitComputations_h
diff --git a/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu
@@ -279,15 +279,17 @@ namespace ecal {
           //DataType matrixLForFnnlsStorage[MapSymM<DataType, NPULSES>::total];
           calo::multifit::MapSymM<DataType, NPULSES> matrixLForFnnls{shrMatrixLForFnnlsStorage};
 
-          fnnls(AtA,
-                Atb,
-                //amplitudes[idx],
-                resultAmplitudes,
-                npassive,
-                pulseOffsets,
-                matrixLForFnnls,
-                1e-11,
-                500);
+          calo::multifit::fnnls(AtA,
+                                Atb,
+                                //amplitudes[idx],
+                                resultAmplitudes,
+                                npassive,
+                                pulseOffsets,
+                                matrixLForFnnls,
+                                1e-11,
+                                500,
+                                16,
+                                2);
 
           {
             DataType accum[NSAMPLES];

diff --git a/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h b/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h
@@ -1,176 +1,14 @@
 #ifndef RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h
 #define RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h
 
-#include "DataFormats/Math/interface/EigenComputations.h"
+#include "DataFormats/CaloRecHit/interface/MultifitComputations.h"
 
 #include <cmath>
 #include <limits>
 #include <type_traits>
 
 #include <Eigen/Dense>
 
-namespace ecal {
-  namespace multifit {
-
-    // TODO: add active bxs
-    template <typename MatrixType, typename VectorType>
-    __device__ void fnnls(MatrixType const& AtA,
-                          VectorType const& Atb,
-                          VectorType& solution,
-                          int& npassive,
-                          calo::multifit::ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
-                          calo::multifit::MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
-                          double const eps,
-                          int const maxIterations) {
-      // constants
-      constexpr auto NPULSES = VectorType::RowsAtCompileTime;
-
-      // to keep track of where to terminate if converged
-      Eigen::Index w_max_idx_prev = 0;
-      float w_max_prev = 0;
-      auto eps_to_use = eps;
-      bool recompute = false;
-
-      // used throughout
-      VectorType s;
-      float reg_b[NPULSES];
-      //float matrixLStorage[MapSymM<float, NPULSES>::total];
-      //MapSymM<float, NPULSES> matrixL{matrixLStorage};
-
-      int iter = 0;
-      while (true) {
-        if (iter > 0 || npassive == 0) {
-          auto const nactive = NPULSES - npassive;
-          // exit if there are no more pulses to constrain
-          if (nactive == 0)
-            break;
-
-          // compute the gradient
-          //w.tail(nactive) = Atb.tail(nactive) - (AtA * solution).tail(nactive);
-          Eigen::Index w_max_idx;
-          float w_max = -std::numeric_limits<float>::max();
-          for (int icol = npassive; icol < NPULSES; icol++) {
-            auto const icol_real = pulseOffsets(icol);
-            auto const atb = Atb(icol_real);
-            float sum = 0;
-#pragma unroll
-            for (int counter = 0; counter < NPULSES; counter++)
-              sum += counter > icol_real ? AtA(counter, icol_real) * solution(counter)
-                                         : AtA(icol_real, counter) * solution(counter);
-
-            auto const w = atb - sum;
-            if (w > w_max) {
-              w_max = w;
-              w_max_idx = icol - npassive;
-            }
-          }
-
-          // check for convergence
-          if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
-            break;
-
-          if (iter >= maxIterations)
-            break;
-
-          w_max_prev = w_max;
-          w_max_idx_prev = w_max_idx;
-
-          // move index to the right part of the vector
-          w_max_idx += npassive;
-
-          Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(w_max_idx));
-          ++npassive;
-        }
-
-        // inner loop
-        while (true) {
-          if (npassive == 0)
-            break;
-
-          //s.head(npassive)
-          //auto const& matrixL =
-          //    AtA.topLeftCorner(npassive, npassive)
-          //        .llt().matrixL();
-          //.solve(Atb.head(npassive));
-          if (recompute || iter == 0)
-            compute_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
-          else
-            update_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
-
-          // run backward substituion
-          s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
-          for (int i = npassive - 2; i >= 0; --i) {
-            float total = 0;
-            for (int j = i + 1; j < npassive; j++)
-              total += matrixL(j, i) * s(j);
-
-            s(i) = (reg_b[i] - total) / matrixL(i, i);
-          }
-
-          // done if solution values are all positive
-          bool hasNegative = false;
-          bool hasNans = false;
-          for (int counter = 0; counter < npassive; counter++) {
-            auto const s_ii = s(counter);
-            hasNegative |= s_ii <= 0;
-            hasNans |= std::isnan(s_ii);
-          }
-
-          // FIXME: temporary solution. my cholesky impl is unstable yielding nans
-          // this check removes nans - do not accept solution unless all values
-          // are stable
-          if (hasNans)
-            break;
-          if (!hasNegative) {
-            for (int i = 0; i < npassive; i++) {
-              auto const i_real = pulseOffsets(i);
-              solution(i_real) = s(i);
-            }
-            //solution.head(npassive) = s.head(npassive);
-            recompute = false;
-            break;
-          }
-
-          // there were negative values -> have to recompute the whole decomp
-          recompute = true;
-
-          auto alpha = std::numeric_limits<float>::max();
-          Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
-          for (int i = 0; i < npassive; i++) {
-            if (s[i] <= 0.) {
-              auto const i_real = pulseOffsets(i);
-              auto const ratio = solution[i_real] / (solution[i_real] - s[i]);
-              if (ratio < alpha) {
-                alpha = ratio;
-                alpha_idx = i;
-                alpha_idx_real = i_real;
-              }
-            }
-          }
-
-          // upadte solution
-          for (int i = 0; i < npassive; i++) {
-            auto const i_real = pulseOffsets(i);
-            solution(i_real) += alpha * (s(i) - solution(i_real));
-          }
-          //solution.head(npassive) += alpha *
-          //    (s.head(npassive) - solution.head(npassive));
-          solution[alpha_idx_real] = 0;
-          --npassive;
-
-          Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(alpha_idx));
-        }
-
-        // as in cpu
-        ++iter;
-        if (iter % 16 == 0)
-          eps_to_use *= 2;
-      }
-    }
-
-  }  // namespace multifit
-}  // namespace ecal
-
 namespace ecal {
   namespace reconstruction {