Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert accidental commit to main #734

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions include/matx/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1428,8 +1428,6 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
* more dimensions of a tensor. This includes completely dropping an unwanted
* dimension, or simply taking a piece of a wanted dimension. Slice() is very
* similar to indexing operations in both Python and MATLAB.
*
* *NOTE* Users should not call Slice() directly anymore. Use the slice() operator instead.
*
* @param firsts
* List of starting index into each dimension. Indexing is 0-based
Expand All @@ -1452,10 +1450,10 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
* @returns Sliced view of tensor
*
*/
template <int N = RANK, typename StrideType>
template <int N = RANK>
__MATX_INLINE__ auto Slice([[maybe_unused]] const cuda::std::array<typename Desc::shape_type, RANK> &firsts,
[[maybe_unused]] const cuda::std::array<typename Desc::shape_type, RANK> &ends,
[[maybe_unused]] StrideType strides) const
[[maybe_unused]] const cuda::std::array<typename Desc::shape_type, RANK> &ends,
[[maybe_unused]] const cuda::std::array<typename Desc::stride_type, RANK> &strides) const
{
static_assert(N <= RANK && RANK > 0, "Must slice to a rank the same or less than current rank.");

Expand All @@ -1466,6 +1464,7 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {

T *data = this->ldata_;
int d = 0;
bool def_stride = (strides[0] == -1);

[[maybe_unused]] int end_count = 0;
for (int i = 0; i < RANK; i++) {
Expand All @@ -1487,14 +1486,9 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {

MATX_ASSERT_STR(first < end, matxInvalidSize, "Slice must be at least one element long");

[[maybe_unused]] typename Desc::stride_type stride_mult;

if constexpr (std::is_same_v<StrideType, detail::NoStride>) {
stride_mult = 1;
}
else {
stride_mult = (strides[i] == matxKeepStride) ? 1 : strides[i];
}
[[maybe_unused]] typename Desc::stride_type stride_mult = (def_stride || strides[i] == matxKeepStride)
? 1
: strides[i]; // custom stride

MATX_ASSERT_STR(first < end, matxInvalidParameter,
"Starting slice must be less than end slice");
Expand Down Expand Up @@ -1531,10 +1525,10 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
return tensor_t<T, N, Storage, decltype(new_desc)>{storage_, std::move(new_desc), data};
}

template <typename StrideType, int N = RANK>
template <int N = RANK>
__MATX_INLINE__ auto Slice(const typename Desc::shape_type (&firsts)[RANK],
const typename Desc::shape_type (&ends)[RANK],
StrideType strides) const
const typename Desc::shape_type (&ends)[RANK],
const typename Desc::stride_type (&strides)[RANK]) const
{
return Slice<N>(detail::to_array(firsts), detail::to_array(ends), detail::to_array(strides));
}
Expand Down Expand Up @@ -1565,13 +1559,15 @@ class tensor_t : public detail::tensor_impl_t<T,RANK,Desc> {
*/
template <int N = RANK>
__MATX_INLINE__ auto Slice(const cuda::std::array<typename Desc::shape_type, RANK> &firsts,
const cuda::std::array<typename Desc::shape_type, RANK> &ends) const
const cuda::std::array<typename Desc::shape_type, RANK> &ends) const
{
static_assert(N <= RANK && RANK > 0, "Must slice to a rank the same or less than current rank.");

MATX_NVTX_START("", matx::MATX_NVTX_LOG_API)

return Slice<detail::NoStride, N>(firsts, ends, detail::NoStride{});
const cuda::std::array<typename Desc::stride_type, RANK> strides = {-1};

return Slice<N>(firsts, ends, strides);
}

template <int N = RANK>
Expand Down
1 change: 0 additions & 1 deletion include/matx/core/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ enum class MemoryLayout {
namespace detail {
struct NoShape{};
struct EmptyOp{};
struct NoStride{};

template <typename T>
struct is_noshape : std::integral_constant<bool, std::is_same_v<NoShape, T>> {};
Expand Down
119 changes: 22 additions & 97 deletions include/matx/operators/slice.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,20 @@ namespace matx
* Slices elements from an operator/tensor.
*/
namespace detail {

template <int DIM, typename T, typename StrideType>
class SliceOp : public BaseOp<SliceOp<DIM, T, StrideType>>
template <int DIM, typename T>
class SliceOp : public BaseOp<SliceOp<DIM, T>>
{
public:
using value_type = typename T::value_type;
using shape_type = index_t;
using self_type = SliceOp<DIM, T, StrideType>;
using self_type = SliceOp<DIM, T>;

private:
typename base_type<T>::type op_;
cuda::std::array<shape_type, DIM> sizes_;
cuda::std::array<int32_t, DIM> dims_;
cuda::std::array<shape_type, T::Rank()> starts_;
StrideType strides_; // Add [[no_unique_address]] in c++20
cuda::std::array<shape_type, T::Rank()> strides_;

public:
using matxop = bool;
Expand All @@ -69,7 +68,7 @@ namespace matx

__MATX_INLINE__ SliceOp(T op, const cuda::std::array<shape_type, T::Rank()> &starts,
const cuda::std::array<shape_type, T::Rank()> &ends,
StrideType strides) : op_(op) {
const cuda::std::array<shape_type, T::Rank()> &strides) : op_(op) {
int32_t d = 0;
for(int32_t i = 0; i < T::Rank(); i++) {
shape_type start = starts[i] < 0 ? op.Size(i) + starts[i] : starts[i];
Expand All @@ -81,10 +80,7 @@ namespace matx
"Slice end index out of range of operator");

starts_[i] = start;

if constexpr (!std::is_same_v<NoStride, StrideType>) {
strides_[i] = strides[i];
}
strides_[i] = strides[i];

// compute dims and sizes
if(end != matxDropDim) {
Expand All @@ -99,10 +95,7 @@ namespace matx
}

//adjust size by stride
if constexpr (!std::is_same_v<NoStride, StrideType>) {
sizes_[d] = (shape_type)std::ceil(static_cast<double>(sizes_[d])/ static_cast<double>(strides_[d]));
}

sizes_[d] = (shape_type)std::ceil(static_cast<double>(sizes_[d])/ static_cast<double>(strides_[d]));
d++;
}
}
Expand All @@ -115,7 +108,7 @@ namespace matx
static_assert(sizeof...(Is)==Rank());
static_assert((std::is_convertible_v<Is, index_t> && ... ));

#if 0
// convert variadic type to tuple so we can read/update
cuda::std::array<index_t, Rank()> inds{indices...};
cuda::std::array<index_t, T::Rank()> ind{indices...};

Expand All @@ -128,29 +121,6 @@ namespace matx
for(int32_t i = 0; i < Rank(); i++) {
ind[dims_[i]] += inds[i] * strides_[i];
}
#else
// convert variadic type to tuple so we can read/update
cuda::std::array<index_t, T::Rank()> ind;
cuda::std::array<index_t, Rank()> inds{indices...};

#pragma unroll
for (int32_t i = 0; i < T::Rank(); i++) {
#pragma unroll
for(int32_t j = 0; j < Rank(); j++) {
if(dims_[j] == i) {
if constexpr (!std::is_same_v<NoStride, StrideType>) {
ind[i] = starts_[j] + inds[j] * strides_[i];
}
else {
ind[i] = starts_[j] + inds[j];
}
}
else {
ind[i] = starts_[i];
}
}
}
#endif

//return op_(ind);
return cuda::std::apply(op_, ind);
Expand All @@ -162,42 +132,19 @@ namespace matx
static_assert(sizeof...(Is)==Rank());
static_assert((std::is_convertible_v<Is, index_t> && ... ));

#if 0
cuda::std::array<index_t, Rank()> inds{indices...};
cuda::std::array<index_t, T::Rank()> ind{indices...};
// convert variadic type to tuple so we can read/update
cuda::std::array<shape_type, Rank()> inds{indices...};
cuda::std::array<shape_type, T::Rank()> ind{indices...};

#pragma unroll
for(int32_t i = 0; i < T::Rank(); i++) {
for(int i = 0; i < T::Rank(); i++) {
ind[i] = starts_[i];
}

#pragma unroll
for(int32_t i = 0; i < Rank(); i++) {
for(int i = 0; i < Rank(); i++) {
ind[dims_[i]] += inds[i] * strides_[i];
}
#else
// convert variadic type to tuple so we can read/update
cuda::std::array<index_t, T::Rank()> ind;
cuda::std::array<index_t, Rank()> inds{indices...};

#pragma unroll
for (int32_t i = 0; i < T::Rank(); i++) {
#pragma unroll
for(int32_t j = 0; j < Rank(); j++) {
if(dims_[j] == i) {
if constexpr (!std::is_same_v<NoStride, StrideType>) {
ind[i] = starts_[j] + inds[j] * strides_[i];
}
else {
ind[i] = starts_[j] + inds[j];
}
}
else {
ind[i] = starts_[i];
}
}
}
#endif

//return op_(ind);
return cuda::std::apply(op_, ind);
Expand Down Expand Up @@ -269,23 +216,10 @@ namespace matx
if constexpr (is_tensor_view_v<OpType>) {
return op.Slice(starts, ends, strides);
} else {
return detail::SliceOp<OpType::Rank(),OpType,decltype(strides)>(op, starts, ends, strides);
return detail::SliceOp<OpType::Rank(),OpType>(op, starts, ends, strides);
}
}

template <typename OpType>
__MATX_INLINE__ auto slice( const OpType &op,
const cuda::std::array<index_t, OpType::Rank()> &starts,
const cuda::std::array<index_t, OpType::Rank()> &ends,
detail::NoStride strides)
{
if constexpr (is_tensor_view_v<OpType>) {
return op.Slice(starts, ends, strides);
} else {
return detail::SliceOp<OpType::Rank(),OpType,detail::NoStride>(op, starts, ends, detail::NoStride{});
}
}

template <typename OpType>
__MATX_INLINE__ auto slice( const OpType &op,
const index_t (&starts)[OpType::Rank()],
Expand Down Expand Up @@ -316,7 +250,10 @@ namespace matx
const cuda::std::array<index_t, OpType::Rank()> &starts,
const cuda::std::array<index_t, OpType::Rank()> &ends)
{
return slice(op, starts, ends, detail::NoStride{});
cuda::std::array<index_t, OpType::Rank()> strides;
strides.fill(1);

return slice(op, starts, ends, strides);
}
template <typename OpType>
__MATX_INLINE__ auto slice( const OpType &op,
Expand Down Expand Up @@ -354,24 +291,10 @@ namespace matx
if constexpr (is_tensor_view_v<OpType>) {
return op.template Slice<N>(starts, ends, strides);
} else {
return detail::SliceOp<N,OpType,decltype(strides)>(op, starts, ends, strides);
}
}

template <int N, typename OpType>
__MATX_INLINE__ auto slice( const OpType op,
const cuda::std::array<index_t, OpType::Rank()> &starts,
const cuda::std::array<index_t, OpType::Rank()> &ends,
detail::NoStride no_stride)
{
if constexpr (is_tensor_view_v<OpType>) {
return op.template Slice<N>(starts, ends);
} else {
return detail::SliceOp<N,OpType,detail::NoStride>(op, starts, ends, detail::NoStride{});
return detail::SliceOp<N,OpType>(op, starts, ends, strides);
}
}


template <int N, typename OpType>
__MATX_INLINE__ auto slice( const OpType op,
const index_t (&starts)[OpType::Rank()],
Expand Down Expand Up @@ -405,7 +328,9 @@ namespace matx
const cuda::std::array<index_t, OpType::Rank()> &starts,
const cuda::std::array<index_t, OpType::Rank()> &ends)
{
return slice<N,OpType,detail::NoStride>(opIn, starts, ends, detail::NoStride{});
cuda::std::array<index_t, OpType::Rank()> strides;
strides.fill(1);
return slice<N,OpType>(opIn, starts, ends, strides);
}

template <int N, typename OpType>
Expand Down