Skip to content

Commit

Permalink
Fixed remaining nvc++ warnings (#645)
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffburdick authored Jun 4, 2024
1 parent 2f9f075 commit 1816b44
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 76 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ MatX support is currently limited to **Linux only** due to the time to test Wind

**Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.5-11.8 or CUDA 12.2.1+ with MatX.

MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).
MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9, nvc++ 24.5, or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads).

MatX has been tested on and supports Pascal, Turing, Volta, Ampere, Ada, and Hopper GPU architectures. Jetson products are supported with Jetpack 5.0 or above.

Expand Down
10 changes: 5 additions & 5 deletions bench/00_operators/operators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ NVBENCH_BENCH_TYPES(random, NVBENCH_TYPE_AXES(random_types));
template<typename T> T factorial(int N) {
T prod = 1;
for(int i=2; i<=N; i++) {
prod = prod * i;
prod = prod * static_cast<T>(i);
}
return prod;
}
Expand All @@ -99,7 +99,7 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list<ValueType>)
int l = 5;
int m = 4;
int n = 600;
ValueType dx = M_PI/n;
ValueType dx = static_cast<ValueType>(M_PI/n);

cudaExecutor exec{};
auto col = range<0>({n+1},ValueType(0), ValueType(dx));
Expand All @@ -109,11 +109,11 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list<ValueType>)

auto Plm = lcollapse<3>(legendre(l, m, cos(theta)));

ValueType a = (2*l+1)*factorial<ValueType>(l-m);
ValueType b = 4*M_PI*factorial<ValueType>(l+m);
ValueType a = static_cast<ValueType>(2*l+1)*factorial<ValueType>(l-m);
ValueType b = static_cast<ValueType>(4*M_PI)*factorial<ValueType>(l+m);
ValueType C = cuda::std::sqrt(a/b);

auto Ylm = C * Plm * exp(cuda::std::complex<ValueType>(0,1)*(m*phi));
auto Ylm = C * Plm * exp(cuda::std::complex<ValueType>(0,1)*(static_cast<ValueType>(m)*phi));
auto [ Xm, Ym, Zm ] = sph2cart(phi, ValueType(M_PI)/2 - theta, abs(real(Ylm)));

// Work around C++17 restriction, structured bindings cannot be captured
Expand Down
4 changes: 2 additions & 2 deletions bench/00_transform/conv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ void conv2d_direct_batch(nvbench::state &state,
flops.set_string("description", "Trillions of operations per second");

if constexpr (is_complex_v<ValueType>) {
flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4 / seconds / 1e12);
flops.set_float64("value", static_cast<double>(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4) / seconds / 1e12);
} else {
flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) / seconds / 1e12);
flops.set_float64("value", static_cast<double>(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1)) / seconds / 1e12);
}
}
NVBENCH_BENCH_TYPES(conv2d_direct_batch, NVBENCH_TYPE_AXES(conv_types));
6 changes: 3 additions & 3 deletions bench/00_transform/qr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ void qr_batch(nvbench::state &state,
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
cudaExecutor exec{stream};

int batch = state.get_int64("batch");
int m = state.get_int64("rows");
int n = state.get_int64("cols");
int64_t batch = state.get_int64("batch");
int64_t m = state.get_int64("rows");
int64_t n = state.get_int64("cols");

auto A = make_tensor<AType>({batch, m, n});
auto Q = make_tensor<AType>({batch, m, m});
Expand Down
16 changes: 8 additions & 8 deletions bench/00_transform/svd_power.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ void svdpi_batch(nvbench::state &state,
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
cudaExecutor exec{stream};

int batch = state.get_int64("batch");
int m = state.get_int64("rows");
int n = state.get_int64("cols");
int64_t batch = state.get_int64("batch");
int64_t m = state.get_int64("rows");
int64_t n = state.get_int64("cols");

int r = std::min(n,m);
int64_t r = std::min(n,m);
auto A = make_tensor<AType>({batch, m, n});
auto U = make_tensor<AType>({batch, m, r});
auto VT = make_tensor<AType>({batch, r, n});
Expand Down Expand Up @@ -68,11 +68,11 @@ void svdbpi_batch(nvbench::state &state,
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream));
cudaExecutor exec{stream};

int batch = state.get_int64("batch");
int m = state.get_int64("rows");
int n = state.get_int64("cols");
int64_t batch = state.get_int64("batch");
int64_t m = state.get_int64("rows");
int64_t n = state.get_int64("cols");

int r = std::min(n,m);
int64_t r = std::min(n,m);
auto A = make_tensor<AType>({batch, m, n});
auto U = make_tensor<AType>({batch, m, r});
auto VT = make_tensor<AType>({batch, r, n});
Expand Down
2 changes: 1 addition & 1 deletion docs_input/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ the CPM_ documentation or the documentation for each package for more informatio

System Requirements
-------------------
MatX requires **CUDA 11.5** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. See the CUDA toolkit documentation
MatX requires **CUDA 11.5** or higher, and **g++ 9.3+**, **clang 17+**, or **nvc++ 24.5** for the host compiler. See the CUDA toolkit documentation
for supported host compilers. Other requirements for optional components are listed below.

.. warning:: Using MatX with an unsupported compiler may result in compiler and/or runtime errors.
Expand Down
7 changes: 0 additions & 7 deletions include/matx/generators/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,6 @@ namespace matx
else {
return first_ + T(static_cast<T>(idx) * step_);
}

if constexpr (!is_matx_half_v<T>) {
return first_ + T(static_cast<T>(idx) * step_);
}
else {
return first_ + T(static_cast<T>((float)idx) * step_);
}
}
};
}
Expand Down
18 changes: 0 additions & 18 deletions include/matx/operators/repmat.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,6 @@ namespace matx
UpdateIndex(tup);
return cuda::std::apply(op_, tup);
}

if constexpr (Rank() != 0) {
auto tup = cuda::std::make_tuple(indices...);
UpdateIndex(tup);
return cuda::std::apply(op_, tup);
}
else {
return op_();
}
}

template <typename... Is>
Expand All @@ -127,15 +118,6 @@ namespace matx
UpdateIndex(tup);
return cuda::std::apply(op_, tup);
}

if constexpr (Rank() != 0) {
auto tup = cuda::std::make_tuple(indices...);
UpdateIndex(tup);
return cuda::std::apply(op_, tup);
}
else {
return op_();
}
}

template <typename ShapeType, typename Executor>
Expand Down
18 changes: 0 additions & 18 deletions include/matx/operators/reverse.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,6 @@ namespace matx
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
return cuda::std::apply(op_, tup);
}

if constexpr (Rank() != 0) {
auto tup = cuda::std::make_tuple(indices...);
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
return cuda::std::apply(op_, tup);
}
else {
return op_();
}
}

template <typename... Is>
Expand All @@ -96,15 +87,6 @@ namespace matx
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
return cuda::std::apply(op_, tup);
}

if constexpr (Rank() != 0) {
auto tup = cuda::std::make_tuple(indices...);
cuda::std::get<DIM>(tup) = Size(DIM) - cuda::std::get<DIM>(tup) - 1;
return cuda::std::apply(op_, tup);
}
else {
return op_();
}
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
Expand Down
16 changes: 3 additions & 13 deletions include/matx/operators/scalar_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,10 +496,7 @@ template <typename T1, typename T2> struct FModF {
static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; }

static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) {
return _internal_fmod(v1, v2);

// Unreachable, but required by the compiler
return typename std::invoke_result_t<decltype(op), T1, T2>{0};
return _internal_fmod(v1, v2);
}
};
template <typename T1, typename T2> using FModOp = BinOp<T1, T2, FModF<T1, T2>>;
Expand All @@ -520,10 +517,7 @@ template <typename T1, typename T2> struct Atan2F {
static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; }

static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) {
return _internal_atan2(v1, v2);

// Unreachable, but required by the compiler
return typename std::invoke_result_t<decltype(op), T1, T2>{0};
return _internal_atan2(v1, v2);
}
};
template <typename T1, typename T2> using Atan2Op = BinOp<T1, T2, Atan2F<T1, T2>>;
Expand Down Expand Up @@ -649,8 +643,6 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isnan(T v1)
} else {
return cuda::std::isnan(static_cast<castType>(v1));
}

return false;
}
template <typename T>
struct IsNan {
Expand All @@ -675,9 +667,7 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isinf(T v1)
return cuda::std::isinf(static_cast<typename castType::value_type>(v1.real())) || cuda::std::isinf(static_cast<typename castType::value_type>(v1.imag()));
} else {
return cuda::std::isinf(static_cast<castType>(v1));
}

return false;
}
}
template <typename T>
struct IsInf {
Expand Down

0 comments on commit 1816b44

Please sign in to comment.