From 1816b441b6d26f4ee3c48eda3099d70c3c5292e7 Mon Sep 17 00:00:00 2001 From: Cliff Burdick <30670611+cliffburdick@users.noreply.github.com> Date: Tue, 4 Jun 2024 14:11:06 -0700 Subject: [PATCH] Fixed remaining nvc++ warnings (#645) --- README.md | 2 +- bench/00_operators/operators.cu | 10 +++++----- bench/00_transform/conv.cu | 4 ++-- bench/00_transform/qr.cu | 6 +++--- bench/00_transform/svd_power.cu | 16 ++++++++-------- docs_input/build.rst | 2 +- include/matx/generators/range.h | 7 ------- include/matx/operators/repmat.h | 18 ------------------ include/matx/operators/reverse.h | 18 ------------------ include/matx/operators/scalar_ops.h | 16 +++------------- 10 files changed, 23 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 02bbb3ff..168627f0 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ MatX support is currently limited to **Linux only** due to the time to test Wind **Note**: CUDA 12.0.0 through 12.2.0 have an issue that causes building MatX unit tests to show a compiler error or cause a segfault in the compiler. Please use CUDA 11.5-11.8 or CUDA 12.2.1+ with MatX. -MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9 or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads). +MatX is using features in C++17 and the latest CUDA compilers and libraries. For this reason, when running with GPU support, CUDA 11.5 and g++9, nvc++ 24.5, or clang 17 or newer is required. You can download the CUDA Toolkit [here](https://developer.nvidia.com/cuda-downloads). MatX has been tested on and supports Pascal, Turing, Volta, Ampere, Ada, and Hopper GPU architectures. Jetson products are supported with Jetpack 5.0 or above. diff --git a/bench/00_operators/operators.cu b/bench/00_operators/operators.cu index 8bbbf342..720b0c24 100644 --- a/bench/00_operators/operators.cu +++ b/bench/00_operators/operators.cu @@ -87,7 +87,7 @@ NVBENCH_BENCH_TYPES(random, NVBENCH_TYPE_AXES(random_types)); template T factorial(int N) { T prod = 1; for(int i=2; i<=N; i++) { - prod = prod * i; + prod = prod * static_cast(i); } return prod; } @@ -99,7 +99,7 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list) int l = 5; int m = 4; int n = 600; - ValueType dx = M_PI/n; + ValueType dx = static_cast(M_PI/n); cudaExecutor exec{}; auto col = range<0>({n+1},ValueType(0), ValueType(dx)); @@ -109,11 +109,11 @@ void sphericalharmonics(nvbench::state &state, nvbench::type_list) auto Plm = lcollapse<3>(legendre(l, m, cos(theta))); - ValueType a = (2*l+1)*factorial(l-m); - ValueType b = 4*M_PI*factorial(l+m); + ValueType a = static_cast(2*l+1)*factorial(l-m); + ValueType b = static_cast(4*M_PI)*factorial(l+m); ValueType C = cuda::std::sqrt(a/b); - auto Ylm = C * Plm * exp(cuda::std::complex(0,1)*(m*phi)); + auto Ylm = C * Plm * exp(cuda::std::complex(0,1)*(static_cast(m)*phi)); auto [ Xm, Ym, Zm ] = sph2cart(phi, ValueType(M_PI)/2 - theta, abs(real(Ylm))); // Work around C++17 restriction, structured bindings cannot be captured diff --git a/bench/00_transform/conv.cu b/bench/00_transform/conv.cu index 23b307f2..e4b545d3 100644 --- a/bench/00_transform/conv.cu +++ b/bench/00_transform/conv.cu @@ -130,9 +130,9 @@ void conv2d_direct_batch(nvbench::state &state, flops.set_string("description", "Trillions of operations per second"); if constexpr (is_complex_v) { - flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4 / seconds / 1e12); + flops.set_float64("value", static_cast(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) * 4) / seconds / 1e12); } else { - flops.set_float64("value", (double)2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1) / seconds / 1e12); + flops.set_float64("value", static_cast(2 * out.Size(2) * out.Size(1) * out.Size(0) * bt.Size(2) * bt.Size(1)) / seconds / 1e12); } } NVBENCH_BENCH_TYPES(conv2d_direct_batch, NVBENCH_TYPE_AXES(conv_types)); diff --git a/bench/00_transform/qr.cu b/bench/00_transform/qr.cu index 09db6995..725c6cba 100644 --- a/bench/00_transform/qr.cu +++ b/bench/00_transform/qr.cu @@ -19,9 +19,9 @@ void qr_batch(nvbench::state &state, state.set_cuda_stream(nvbench::make_cuda_stream_view(stream)); cudaExecutor exec{stream}; - int batch = state.get_int64("batch"); - int m = state.get_int64("rows"); - int n = state.get_int64("cols"); + int64_t batch = state.get_int64("batch"); + int64_t m = state.get_int64("rows"); + int64_t n = state.get_int64("cols"); auto A = make_tensor({batch, m, n}); auto Q = make_tensor({batch, m, m}); diff --git a/bench/00_transform/svd_power.cu b/bench/00_transform/svd_power.cu index 7736a19e..378d98c1 100644 --- a/bench/00_transform/svd_power.cu +++ b/bench/00_transform/svd_power.cu @@ -19,11 +19,11 @@ void svdpi_batch(nvbench::state &state, state.set_cuda_stream(nvbench::make_cuda_stream_view(stream)); cudaExecutor exec{stream}; - int batch = state.get_int64("batch"); - int m = state.get_int64("rows"); - int n = state.get_int64("cols"); + int64_t batch = state.get_int64("batch"); + int64_t m = state.get_int64("rows"); + int64_t n = state.get_int64("cols"); - int r = std::min(n,m); + int64_t r = std::min(n,m); auto A = make_tensor({batch, m, n}); auto U = make_tensor({batch, m, r}); auto VT = make_tensor({batch, r, n}); @@ -68,11 +68,11 @@ void svdbpi_batch(nvbench::state &state, state.set_cuda_stream(nvbench::make_cuda_stream_view(stream)); cudaExecutor exec{stream}; - int batch = state.get_int64("batch"); - int m = state.get_int64("rows"); - int n = state.get_int64("cols"); + int64_t batch = state.get_int64("batch"); + int64_t m = state.get_int64("rows"); + int64_t n = state.get_int64("cols"); - int r = std::min(n,m); + int64_t r = std::min(n,m); auto A = make_tensor({batch, m, n}); auto U = make_tensor({batch, m, r}); auto VT = make_tensor({batch, r, n}); diff --git a/docs_input/build.rst b/docs_input/build.rst index 44bb3ccd..72017e36 100644 --- a/docs_input/build.rst +++ b/docs_input/build.rst @@ -19,7 +19,7 @@ the CPM_ documentation or the documentation for each package for more informatio System Requirements ------------------- -MatX requires **CUDA 11.5** or higher, and **g++ 9.3+** or **clang 17+** for the host compiler. See the CUDA toolkit documentation +MatX requires **CUDA 11.5** or higher, and **g++ 9.3+**, **clang 17+**, or **nvc++ 24.5** for the host compiler. See the CUDA toolkit documentation for supported host compilers. Other requirements for optional components are listed below. .. warning:: Using MatX with an unsupported compiler may result in compiler and/or runtime errors. diff --git a/include/matx/generators/range.h b/include/matx/generators/range.h index 28078537..d189d53e 100644 --- a/include/matx/generators/range.h +++ b/include/matx/generators/range.h @@ -60,13 +60,6 @@ namespace matx else { return first_ + T(static_cast(idx) * step_); } - - if constexpr (!is_matx_half_v) { - return first_ + T(static_cast(idx) * step_); - } - else { - return first_ + T(static_cast((float)idx) * step_); - } } }; } diff --git a/include/matx/operators/repmat.h b/include/matx/operators/repmat.h index fecd8375..b8fd3883 100644 --- a/include/matx/operators/repmat.h +++ b/include/matx/operators/repmat.h @@ -105,15 +105,6 @@ namespace matx UpdateIndex(tup); return cuda::std::apply(op_, tup); } - - if constexpr (Rank() != 0) { - auto tup = cuda::std::make_tuple(indices...); - UpdateIndex(tup); - return cuda::std::apply(op_, tup); - } - else { - return op_(); - } } template @@ -127,15 +118,6 @@ namespace matx UpdateIndex(tup); return cuda::std::apply(op_, tup); } - - if constexpr (Rank() != 0) { - auto tup = cuda::std::make_tuple(indices...); - UpdateIndex(tup); - return cuda::std::apply(op_, tup); - } - else { - return op_(); - } } template diff --git a/include/matx/operators/reverse.h b/include/matx/operators/reverse.h index 62393561..d72b03d0 100644 --- a/include/matx/operators/reverse.h +++ b/include/matx/operators/reverse.h @@ -74,15 +74,6 @@ namespace matx cuda::std::get(tup) = Size(DIM) - cuda::std::get(tup) - 1; return cuda::std::apply(op_, tup); } - - if constexpr (Rank() != 0) { - auto tup = cuda::std::make_tuple(indices...); - cuda::std::get(tup) = Size(DIM) - cuda::std::get(tup) - 1; - return cuda::std::apply(op_, tup); - } - else { - return op_(); - } } template @@ -96,15 +87,6 @@ namespace matx cuda::std::get(tup) = Size(DIM) - cuda::std::get(tup) - 1; return cuda::std::apply(op_, tup); } - - if constexpr (Rank() != 0) { - auto tup = cuda::std::make_tuple(indices...); - cuda::std::get(tup) = Size(DIM) - cuda::std::get(tup) - 1; - return cuda::std::apply(op_, tup); - } - else { - return op_(); - } } static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank() diff --git a/include/matx/operators/scalar_ops.h b/include/matx/operators/scalar_ops.h index e29b788f..60ffa838 100644 --- a/include/matx/operators/scalar_ops.h +++ b/include/matx/operators/scalar_ops.h @@ -496,10 +496,7 @@ template struct FModF { static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; } static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) { - return _internal_fmod(v1, v2); - - // Unreachable, but required by the compiler - return typename std::invoke_result_t{0}; + return _internal_fmod(v1, v2); } }; template using FModOp = BinOp>; @@ -520,10 +517,7 @@ template struct Atan2F { static std::string str(const std::string &str1, const std::string &str2) { return "(" + str1 + "%" + str2 + ")"; } static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto op(T1 v1, T2 v2) { - return _internal_atan2(v1, v2); - - // Unreachable, but required by the compiler - return typename std::invoke_result_t{0}; + return _internal_atan2(v1, v2); } }; template using Atan2Op = BinOp>; @@ -649,8 +643,6 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isnan(T v1) } else { return cuda::std::isnan(static_cast(v1)); } - - return false; } template struct IsNan { @@ -675,9 +667,7 @@ static __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ auto _internal_isinf(T v1) return cuda::std::isinf(static_cast(v1.real())) || cuda::std::isinf(static_cast(v1.imag())); } else { return cuda::std::isinf(static_cast(v1)); - } - - return false; + } } template struct IsInf {