Skip to content

Commit

Permalink
Use Eigen and CUDA preprocessor symbols
Browse files Browse the repository at this point in the history
Use Eigen preprocessor symbols instead of explicit CUDA keywords.
Use CUDA preprocessor symbols to protect CUDA-only functions.
  • Loading branch information
fwyzard authored Oct 19, 2020
1 parent 8528d83 commit 6a50212
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions DataFormats/CaloRecHit/interface/MultifitComputations.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ namespace calo {
static constexpr int stride = Stride;
T* data;

__forceinline__ __device__ MapSymM(T* data) : data{data} {}
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapSymM(T* data) : data{data} {}

__forceinline__ __device__ T const& operator()(int const row, int const col) const {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC T const& operator()(int const row, int const col) const {
auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
auto const index = total - tmp + row - col;
return data[index];
}

template <typename U = T>
__forceinline__ __device__ typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC typename std::enable_if<std::is_same<base_type, U>::value, base_type>::type&
operator()(int const row, int const col) {
auto const tmp = (Stride - col) * (Stride - col + 1) / 2;
auto const index = total - tmp + row - col;
Expand All @@ -58,17 +58,17 @@ namespace calo {
using base_type = typename std::remove_cv<type>::type;

type* data;
__forceinline__ __device__ MapMForPM(type* data) : data{data} {}
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC MapMForPM(type* data) : data{data} {}

__forceinline__ __device__ base_type operator()(int const row, int const col) const {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC base_type operator()(int const row, int const col) const {
auto const index = 2 - col + row;
return index >= 0 ? data[index] : 0;
}
};

// simple/trivial cholesky decomposition impl
template <typename MatrixType1, typename MatrixType2>
__forceinline__ __device__ void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_unrolled(MatrixType1& L, MatrixType2 const& M) {
auto const sqrtm_0_0 = std::sqrt(M(0, 0));
L(0, 0) = sqrtm_0_0;
using T = typename MatrixType1::base_type;
Expand All @@ -94,7 +94,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2>
__forceinline__ __device__ void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition(MatrixType1& L, MatrixType2 const& M, int const N) {
auto const sqrtm_0_0 = std::sqrt(M(0, 0));
L(0, 0) = sqrtm_0_0;
using T = typename MatrixType1::base_type;
Expand All @@ -119,7 +119,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename VectorType>
__forceinline__ __device__ void compute_decomposition_forwardsubst_with_offsets(
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void compute_decomposition_forwardsubst_with_offsets(
MatrixType1& L,
MatrixType2 const& M,
float b[MatrixType1::stride],
Expand Down Expand Up @@ -158,7 +158,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename VectorType>
__forceinline__ __device__ void update_decomposition_forwardsubst_with_offsets(
EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC void update_decomposition_forwardsubst_with_offsets(
MatrixType1& L,
MatrixType2 const& M,
float b[MatrixType1::stride],
Expand Down Expand Up @@ -190,7 +190,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2, typename MatrixType3>
__device__ void solve_forward_subst_matrix(MatrixType1& A,
EIGEN_DEVICE_FUNC void solve_forward_subst_matrix(MatrixType1& A,
MatrixType2 const& pulseMatrixView,
MatrixType3 const& matrixL) {
// FIXME: this assumes pulses are on columns and samples on rows
Expand All @@ -205,7 +205,12 @@ namespace calo {
// preload a column and load column 0 of cholesky
#pragma unroll
for (int i = 0; i < NSAMPLES; i++) {
#ifdef __CUDA_ARCH__
// load through the read-only cache
reg_b[i] = __ldg(&pulseMatrixView.coeffRef(i, icol));
#else
reg_b[i] = pulseMatrixView.coeffRef(i, icol);
#endif // __CUDA_ARCH__
reg_L[i] = matrixL(i, 0);
}

Expand Down Expand Up @@ -236,7 +241,7 @@ namespace calo {
}

template <typename MatrixType1, typename MatrixType2>
__device__ void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
EIGEN_DEVICE_FUNC void solve_forward_subst_vector(float reg_b[MatrixType1::RowsAtCompileTime],
MatrixType1 inputAmplitudesView,
MatrixType2 matrixL) {
constexpr auto NSAMPLES = MatrixType1::RowsAtCompileTime;
Expand Down Expand Up @@ -278,7 +283,7 @@ namespace calo {

// TODO: add active bxs
template <typename MatrixType, typename VectorType>
__device__ void fnnls(MatrixType const& AtA,
EIGEN_DEVICE_FUNC void fnnls(MatrixType const& AtA,
VectorType const& Atb,
VectorType& solution,
int& npassive,
Expand Down

0 comments on commit 6a50212

Please sign in to comment.