Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move multifit/MAHI common code to DataFormats/CaloRecHit #557

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CondFormats/HcalObjects/interface/HcalCondObjectContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class HcalCondObjectContainer : public HcalCondObjectContainerBase {
const Item* getValues(DetId fId, bool throwOnFail = true) const;

// does the object exist ?
const bool exists(DetId fId) const;
bool exists(DetId fId) const;

// set the object/fill it in:
bool addValues(const Item& myItem);
Expand Down Expand Up @@ -220,7 +220,7 @@ const Item* HcalCondObjectContainer<Item>::getValues(DetId fId, bool throwOnFail
}

template <class Item>
const bool HcalCondObjectContainer<Item>::exists(DetId fId) const {
bool HcalCondObjectContainer<Item>::exists(DetId fId) const {
const Item* cell = getValues(fId, false);

if (cell) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DataFormats_Math_interface_EigenComputations_h
#define DataFormats_Math_interface_EigenComputations_h
#ifndef DataFormats_CaloRecHit_interface_MultifitComputations_h
#define DataFormats_CaloRecHit_interface_MultifitComputations_h

#include <cmath>
#include <limits>
Expand Down Expand Up @@ -32,16 +32,16 @@ namespace calo {
static constexpr int stride = Stride;
T* data;

__forceinline__ __device__ MapSymM(T* data) : data{data} {}
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapSymM(T* data) : data{data} {}

__forceinline__ __device__ T const& operator()(int const row, int const col) const {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC T const& operator()(int const row, int const col) const {
auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
auto const index = total - tmp + row - col;
return data[index];
}

template <typename U = T>
__forceinline__ __device__ typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
operator()(int const row, int const col) {
auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
auto const index = total - tmp + row - col;
Expand All @@ -58,17 +58,17 @@ namespace calo {
using base_type = typename std::remove_cv<type>::type;

type* data;
__forceinline__ __device__ MapMForPM(type* data) : data{data} {}
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapMForPM(type* data) : data{data} {}

__forceinline__ __device__ base_type operator()(int const row, int const col) const {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC base_type operator()(int const row, int const col) const {
auto const index = 2 - col + row;
return index >= 0 ? data[index] : 0;
}
};

// simple/trivial cholesky decomposition impl
template <typename MatrixType1, typename MatrixType2>
__forceinline__ __device__ void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
auto const sqrtm_0_0 = std::sqrt(M(0, 0));
L(0, 0) = sqrtm_0_0;
using T = typename MatrixType1::base_type;
Expand All @@ -94,7 +94,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2>
__forceinline__ __device__ void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
auto const sqrtm_0_0 = std::sqrt(M(0, 0));
L(0, 0) = sqrtm_0_0;
using T = typename MatrixType1::base_type;
Expand All @@ -119,7 +119,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename VectorType>
__forceinline__ __device__ void compute_decomposition_forwardsubst_with_offsets(
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(
MatrixType1& L,
MatrixType2 const& M,
float b[MatrixType1::stride],
Expand Down Expand Up @@ -158,7 +158,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename VectorType>
__forceinline__ __device__ void update_decomposition_forwardsubst_with_offsets(
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(
MatrixType1& L,
MatrixType2 const& M,
float b[MatrixType1::stride],
Expand Down Expand Up @@ -190,7 +190,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
__device__ void solve_forward_subst_matrix(MatrixType1& A,
EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1& A,
MatrixType2 const& pulseMatrixView,
MatrixType3 const& matrixL) {
// FIXME: this assumes pulses are on columns and samples on rows
Expand All @@ -205,7 +205,12 @@ namespace calo {
// preload a column and load column 0 of cholesky
#pragma unroll
for (int i = 0; i < NSAMPLES; i++) {
#ifdef __CUDA_ARCH__
// load through the read-only cache
reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
#else
reg_b[i] = pulseMatrixView.coeffRef(i, icol);
#endif // __CUDA_ARCH__
reg_L[i] = matrixL(i, 0);
}

Expand Down Expand Up @@ -236,7 +241,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2>
__device__ void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
MatrixType1 inputAmplitudesView,
MatrixType2 matrixL) {
constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
Expand Down Expand Up @@ -276,24 +281,24 @@ namespace calo {
}
}

/*
// TODO: add active bxs
template <typename MatrixType, typename VectorType>
__device__ void fnnls(MatrixType const& AtA,
EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,
VectorType const& Atb,
VectorType& solution,
int& npassive,
ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
double const eps,
int const maxIterations) {
double eps, // convergence condition
const int maxIterations, // maximum number of iterations
const int relaxationPeriod, // every "relaxationPeriod" iterations
const int relaxationFactor) { // multiply "eps" by "relaxationFactor"
// constants
constexpr auto NPULSES = VectorType::RowsAtCompileTime;

// to keep track of where to terminate if converged
Eigen::Index w_max_idx_prev = 0;
float w_max_prev = 0;
auto eps_to_use = eps;
bool recompute = false;

// used throughout
Expand Down Expand Up @@ -331,7 +336,7 @@ namespace calo {
}

// check for convergence
if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
if (w_max < eps || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
break;

if (iter >= maxIterations)
Expand Down Expand Up @@ -428,13 +433,12 @@ namespace calo {

// as in cpu
++iter;
if (iter % 16 == 0)
eps_to_use *= 2;
if (iter % relaxationPeriod == 0)
eps *= relaxationFactor;
}
}
*/

} // namespace multifit
} // namespace calo

#endif // DataFormats_Math_interface_EigenComputations_h
#endif // DataFormats_CaloRecHit_interface_MultifitComputations_h
Original file line number Diff line number Diff line change
Expand Up @@ -279,15 +279,17 @@ namespace ecal {
//DataType matrixLForFnnlsStorage[MapSymM<DataType, NPULSES>::total];
calo::multifit::MapSymM<DataType, NPULSES> matrixLForFnnls{shrMatrixLForFnnlsStorage};

fnnls(AtA,
Atb,
//amplitudes[idx],
resultAmplitudes,
npassive,
pulseOffsets,
matrixLForFnnls,
1e-11,
500);
calo::multifit::fnnls(AtA,
Atb,
//amplitudes[idx],
resultAmplitudes,
npassive,
pulseOffsets,
matrixLForFnnls,
1e-11,
500,
16,
2);

{
DataType accum[NSAMPLES];
Expand Down
164 changes: 1 addition & 163 deletions RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h
Original file line number Diff line number Diff line change
@@ -1,176 +1,14 @@
#ifndef RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h
#define RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h

#include "DataFormats/Math/interface/EigenComputations.h"
#include "DataFormats/CaloRecHit/interface/MultifitComputations.h"

#include <cmath>
#include <limits>
#include <type_traits>

#include <Eigen/Dense>

namespace ecal {
namespace multifit {

// TODO: add active bxs
template <typename MatrixType, typename VectorType>
__device__ void fnnls(MatrixType const& AtA,
VectorType const& Atb,
VectorType& solution,
int& npassive,
calo::multifit::ColumnVector<VectorType::RowsAtCompileTime, int>& pulseOffsets,
calo::multifit::MapSymM<float, VectorType::RowsAtCompileTime>& matrixL,
double const eps,
int const maxIterations) {
// constants
constexpr auto NPULSES = VectorType::RowsAtCompileTime;

// to keep track of where to terminate if converged
Eigen::Index w_max_idx_prev = 0;
float w_max_prev = 0;
auto eps_to_use = eps;
bool recompute = false;

// used throughout
VectorType s;
float reg_b[NPULSES];
//float matrixLStorage[MapSymM<float, NPULSES>::total];
//MapSymM<float, NPULSES> matrixL{matrixLStorage};

int iter = 0;
while (true) {
if (iter > 0 || npassive == 0) {
auto const nactive = NPULSES - npassive;
// exit if there are no more pulses to constrain
if (nactive == 0)
break;

// compute the gradient
//w.tail(nactive) = Atb.tail(nactive) - (AtA * solution).tail(nactive);
Eigen::Index w_max_idx;
float w_max = -std::numeric_limits<float>::max();
for (int icol = npassive; icol < NPULSES; icol++) {
auto const icol_real = pulseOffsets(icol);
auto const atb = Atb(icol_real);
float sum = 0;
#pragma unroll
for (int counter = 0; counter < NPULSES; counter++)
sum += counter > icol_real ? AtA(counter, icol_real) * solution(counter)
: AtA(icol_real, counter) * solution(counter);

auto const w = atb - sum;
if (w > w_max) {
w_max = w;
w_max_idx = icol - npassive;
}
}

// check for convergence
if (w_max < eps_to_use || w_max_idx == w_max_idx_prev && w_max == w_max_prev)
break;

if (iter >= maxIterations)
break;

w_max_prev = w_max;
w_max_idx_prev = w_max_idx;

// move index to the right part of the vector
w_max_idx += npassive;

Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(w_max_idx));
++npassive;
}

// inner loop
while (true) {
if (npassive == 0)
break;

//s.head(npassive)
//auto const& matrixL =
// AtA.topLeftCorner(npassive, npassive)
// .llt().matrixL();
//.solve(Atb.head(npassive));
if (recompute || iter == 0)
compute_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);
else
update_decomposition_forwardsubst_with_offsets(matrixL, AtA, reg_b, Atb, npassive, pulseOffsets);

// run backward substituion
s(npassive - 1) = reg_b[npassive - 1] / matrixL(npassive - 1, npassive - 1);
for (int i = npassive - 2; i >= 0; --i) {
float total = 0;
for (int j = i + 1; j < npassive; j++)
total += matrixL(j, i) * s(j);

s(i) = (reg_b[i] - total) / matrixL(i, i);
}

// done if solution values are all positive
bool hasNegative = false;
bool hasNans = false;
for (int counter = 0; counter < npassive; counter++) {
auto const s_ii = s(counter);
hasNegative |= s_ii <= 0;
hasNans |= std::isnan(s_ii);
}

// FIXME: temporary solution. my cholesky impl is unstable yielding nans
// this check removes nans - do not accept solution unless all values
// are stable
if (hasNans)
break;
if (!hasNegative) {
for (int i = 0; i < npassive; i++) {
auto const i_real = pulseOffsets(i);
solution(i_real) = s(i);
}
//solution.head(npassive) = s.head(npassive);
recompute = false;
break;
}

// there were negative values -> have to recompute the whole decomp
recompute = true;

auto alpha = std::numeric_limits<float>::max();
Eigen::Index alpha_idx = 0, alpha_idx_real = 0;
for (int i = 0; i < npassive; i++) {
if (s[i] <= 0.) {
auto const i_real = pulseOffsets(i);
auto const ratio = solution[i_real] / (solution[i_real] - s[i]);
if (ratio < alpha) {
alpha = ratio;
alpha_idx = i;
alpha_idx_real = i_real;
}
}
}

// upadte solution
for (int i = 0; i < npassive; i++) {
auto const i_real = pulseOffsets(i);
solution(i_real) += alpha * (s(i) - solution(i_real));
}
//solution.head(npassive) += alpha *
// (s.head(npassive) - solution.head(npassive));
solution[alpha_idx_real] = 0;
--npassive;

Eigen::numext::swap(pulseOffsets.coeffRef(npassive), pulseOffsets.coeffRef(alpha_idx));
}

// as in cpu
++iter;
if (iter % 16 == 0)
eps_to_use *= 2;
}
}

} // namespace multifit
} // namespace ecal

namespace ecal {
namespace reconstruction {

Expand Down
Loading