From 049d6feda0d850c9fde3d56f86666064679d7b2d Mon Sep 17 00:00:00 2001 From: Felix Thaler Date: Wed, 30 Oct 2024 15:36:52 +0000 Subject: [PATCH] Remove ldg_ptr and Replace Functionality by const_ptr_deref (#1810) No performance change in the GridTools performance tests visible, however this might change performance of external code that does not rely on GridTools storages but uses `sid::composite` or `sid::dimension_to_tuple_like` which now use `__ldg` but previously did not. --- include/gridtools/common/const_ptr_deref.hpp | 38 ++++++ include/gridtools/common/ldg_ptr.hpp | 112 ------------------ include/gridtools/fn/cartesian.hpp | 4 +- include/gridtools/fn/neighbor_table.hpp | 8 +- include/gridtools/fn/sid_neighbor_table.hpp | 4 +- include/gridtools/fn/unstructured.hpp | 4 +- include/gridtools/sid/composite.hpp | 5 +- include/gridtools/sid/simple_ptr_holder.hpp | 3 +- include/gridtools/stencil/gpu/entry_point.hpp | 4 +- .../stencil/gpu_horizontal/entry_point.hpp | 4 +- include/gridtools/storage/sid.hpp | 3 +- tests/unit_tests/common/CMakeLists.txt | 5 - tests/unit_tests/common/test_ldg_ptr.cpp | 51 -------- tests/unit_tests/common/test_ldg_ptr.cu | 86 -------------- 14 files changed, 57 insertions(+), 274 deletions(-) create mode 100644 include/gridtools/common/const_ptr_deref.hpp delete mode 100644 include/gridtools/common/ldg_ptr.hpp delete mode 100644 tests/unit_tests/common/test_ldg_ptr.cpp delete mode 100644 tests/unit_tests/common/test_ldg_ptr.cu diff --git a/include/gridtools/common/const_ptr_deref.hpp b/include/gridtools/common/const_ptr_deref.hpp new file mode 100644 index 000000000..fd9fc2ac2 --- /dev/null +++ b/include/gridtools/common/const_ptr_deref.hpp @@ -0,0 +1,38 @@ +/* + * GridTools + * + * Copyright (c) 2014-2023, ETH Zurich + * All rights reserved. + * + * Please, refer to the LICENSE file in the root directory. + * SPDX-License-Identifier: BSD-3-Clause + */ +#pragma once + +#include "host_device.hpp" + +#ifdef GT_CUDACC +#include "cuda_type_traits.hpp" +#endif + +namespace gridtools { + +#ifdef GT_CUDACC + + template + GT_FUNCTION constexpr std::enable_if_t::value, T> const_ptr_deref(T const *ptr) { +#ifdef GT_CUDA_ARCH + return __ldg(ptr); +#else + return *ptr; +#endif + } + +#endif + + template + GT_FUNCTION constexpr decltype(auto) const_ptr_deref(T &&ptr) { + return *ptr; + } + +} // namespace gridtools diff --git a/include/gridtools/common/ldg_ptr.hpp b/include/gridtools/common/ldg_ptr.hpp deleted file mode 100644 index fa3ff226a..000000000 --- a/include/gridtools/common/ldg_ptr.hpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include -#include -#include - -#include "defs.hpp" -#include "host_device.hpp" - -#ifdef GT_CUDACC -#include "cuda_type_traits.hpp" -#endif - -namespace gridtools { - -#ifdef GT_CUDACC - namespace impl_ { - - template - struct ldg_ptr { - T const *m_ptr; - - static_assert(is_texture_type::value); - - GT_FUNCTION constexpr T operator*() const { -#ifdef GT_CUDA_ARCH - return __ldg(m_ptr); -#else - return *m_ptr; -#endif - } - - GT_FUNCTION constexpr ldg_ptr &operator+=(std::ptrdiff_t diff) { - m_ptr += diff; - return *this; - } - - GT_FUNCTION constexpr ldg_ptr &operator-=(std::ptrdiff_t diff) { - m_ptr -= diff; - return *this; - } - - friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, ldg_ptr const &b) { - return a.m_ptr == b.m_ptr; - } - friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, T const *b) { return a.m_ptr == b; } - friend GT_FUNCTION constexpr bool operator==(T const *a, ldg_ptr const &b) { return a == b.m_ptr; } - - friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, ldg_ptr const &b) { - return a.m_ptr != b.m_ptr; - } - friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, T const *b) { return a.m_ptr != b; } - friend GT_FUNCTION constexpr bool operator!=(T const *a, ldg_ptr const &b) { return a != b.m_ptr; } - - friend GT_FUNCTION constexpr ldg_ptr &operator++(ldg_ptr &ptr) { - ++ptr.m_ptr; - return ptr; - } - - friend GT_FUNCTION constexpr ldg_ptr &operator--(ldg_ptr &ptr) { - --ptr.m_ptr; - return ptr; - } - - friend GT_FUNCTION constexpr ldg_ptr operator++(ldg_ptr &ptr, int) { - ldg_ptr p = ptr; - ++ptr.m_ptr; - return p; - } - - friend GT_FUNCTION constexpr ldg_ptr operator--(ldg_ptr &ptr, int) { - ldg_ptr p = ptr; - --ptr.m_ptr; - return p; - } - - friend GT_FUNCTION constexpr ldg_ptr operator+(ldg_ptr const &ptr, std::ptrdiff_t diff) { - return {ptr.m_ptr + diff}; - } - - friend GT_FUNCTION constexpr ldg_ptr operator-(ldg_ptr const &ptr, std::ptrdiff_t diff) { - return {ptr.m_ptr - diff}; - } - - friend GT_FUNCTION constexpr std::ptrdiff_t operator-(ldg_ptr const &ptr, ldg_ptr const &other) { - return ptr.m_ptr - other.m_ptr; - } - }; - } // namespace impl_ - - template - GT_FUNCTION constexpr std::enable_if_t::value, impl_::ldg_ptr> as_ldg_ptr(T const *ptr) { - return {ptr}; - } - -#endif - - template - GT_FUNCTION constexpr T &&as_ldg_ptr(T &&value) { - return std::forward(value); - } - -} // namespace gridtools diff --git a/include/gridtools/fn/cartesian.hpp b/include/gridtools/fn/cartesian.hpp index 0fbdfe2b9..3e2f0729f 100644 --- a/include/gridtools/fn/cartesian.hpp +++ b/include/gridtools/fn/cartesian.hpp @@ -11,7 +11,7 @@ #include -#include "../common/ldg_ptr.hpp" +#include "../common/const_ptr_deref.hpp" #include "../common/tuple_util.hpp" #include "../sid/concept.hpp" #include "./common_interface.hpp" @@ -45,7 +45,7 @@ namespace gridtools::fn { template GT_FUNCTION auto deref(iterator const &it) { - return *as_ldg_ptr(it.m_ptr); + return const_ptr_deref(it.m_ptr); } template diff --git a/include/gridtools/fn/neighbor_table.hpp b/include/gridtools/fn/neighbor_table.hpp index aa50b6d06..682505b5a 100644 --- a/include/gridtools/fn/neighbor_table.hpp +++ b/include/gridtools/fn/neighbor_table.hpp @@ -11,7 +11,7 @@ #include -#include "../common/ldg_ptr.hpp" +#include "../common/const_ptr_deref.hpp" #include "../common/tuple_util.hpp" #include "../meta/logical.hpp" @@ -57,12 +57,12 @@ namespace gridtools::fn::neighbor_table { template ::value, int> = 0> GT_FUNCTION T const &neighbor_table_neighbors(T const *table, int index) { - return *as_ldg_ptr(&table[index]); + return const_ptr_deref(&table[index]); } template - GT_FUNCTION constexpr auto neighbors(NeighborTable const &nt, int index) - -> decltype(neighbor_table_neighbors(nt, index)) { + GT_FUNCTION constexpr auto neighbors( + NeighborTable const &nt, int index) -> decltype(neighbor_table_neighbors(nt, index)) { return neighbor_table_neighbors(nt, index); } diff --git a/include/gridtools/fn/sid_neighbor_table.hpp b/include/gridtools/fn/sid_neighbor_table.hpp index 9763aecbc..dd936dc65 100644 --- a/include/gridtools/fn/sid_neighbor_table.hpp +++ b/include/gridtools/fn/sid_neighbor_table.hpp @@ -13,7 +13,7 @@ #include #include "../common/array.hpp" -#include "../common/ldg_ptr.hpp" +#include "../common/const_ptr_deref.hpp" #include "../fn/unstructured.hpp" #include "../sid/concept.hpp" @@ -47,7 +47,7 @@ namespace gridtools::fn::sid_neighbor_table { sid::shift(ptr, sid::get_stride(table.strides), index); for (std::size_t element_idx = 0; element_idx < MaxNumNeighbors; ++element_idx) { - neighbors[element_idx] = *as_ldg_ptr(ptr); + neighbors[element_idx] = const_ptr_deref(ptr); sid::shift(ptr, sid::get_stride(table.strides), 1_c); } return neighbors; diff --git a/include/gridtools/fn/unstructured.hpp b/include/gridtools/fn/unstructured.hpp index ff216c1b3..819b25cdb 100644 --- a/include/gridtools/fn/unstructured.hpp +++ b/include/gridtools/fn/unstructured.hpp @@ -11,9 +11,9 @@ #include +#include "../common/const_ptr_deref.hpp" #include "../common/defs.hpp" #include "../common/hymap.hpp" -#include "../common/ldg_ptr.hpp" #include "../meta/logical.hpp" #include "../sid/concept.hpp" #include "./common_interface.hpp" @@ -79,7 +79,7 @@ namespace gridtools::fn { GT_FUNCTION constexpr auto deref(iterator const &it) { GT_PROMISE(can_deref(it)); decltype(auto) stride = host_device::at_key(sid::get_stride(it.m_strides)); - return *as_ldg_ptr(sid::shifted(it.m_ptr, stride, it.m_index)); + return const_ptr_deref(sid::shifted(it.m_ptr, stride, it.m_index)); } template diff --git a/include/gridtools/sid/composite.hpp b/include/gridtools/sid/composite.hpp index f7baed34c..8c38d75ba 100644 --- a/include/gridtools/sid/composite.hpp +++ b/include/gridtools/sid/composite.hpp @@ -14,6 +14,7 @@ #include #include +#include "../common/const_ptr_deref.hpp" #include "../common/defs.hpp" #include "../common/for_each.hpp" #include "../common/host_device.hpp" @@ -211,7 +212,7 @@ namespace gridtools { #if defined(__clang__) || !defined(__GNUC__) || (__GNUC__ != 9 && __GNUC__ != 10) GT_FORCE_INLINE_LAMBDA #endif - -> decltype(auto) { return *ptr; }, + -> decltype(auto) { return const_ptr_deref(ptr); }, m_vals)); } @@ -476,5 +477,5 @@ namespace gridtools { friend values tuple_getter(values) { return {}; } }; } // namespace composite - } // namespace sid + } // namespace sid } // namespace gridtools diff --git a/include/gridtools/sid/simple_ptr_holder.hpp b/include/gridtools/sid/simple_ptr_holder.hpp index 1a77ca790..38a3b9bd5 100644 --- a/include/gridtools/sid/simple_ptr_holder.hpp +++ b/include/gridtools/sid/simple_ptr_holder.hpp @@ -16,7 +16,6 @@ #include "../common/defs.hpp" #include "../common/host_device.hpp" -#include "../common/ldg_ptr.hpp" #define GT_FILENAME #include GT_ITERATE_ON_TARGETS() @@ -39,7 +38,7 @@ namespace gridtools { simple_ptr_holder() = default; GT_TARGET GT_FORCE_INLINE constexpr simple_ptr_holder(T const &ptr) : m_val{ptr} {} #endif - GT_TARGET GT_FORCE_INLINE constexpr decltype(auto) operator()() const { return as_ldg_ptr(m_val); } + GT_TARGET GT_FORCE_INLINE constexpr T const &operator()() const { return m_val; } }; template diff --git a/include/gridtools/stencil/gpu/entry_point.hpp b/include/gridtools/stencil/gpu/entry_point.hpp index fef3d2588..57238061d 100644 --- a/include/gridtools/stencil/gpu/entry_point.hpp +++ b/include/gridtools/stencil/gpu/entry_point.hpp @@ -13,12 +13,12 @@ #include #include +#include "../../common/const_ptr_deref.hpp" #include "../../common/cuda_type_traits.hpp" #include "../../common/cuda_util.hpp" #include "../../common/defs.hpp" #include "../../common/hymap.hpp" #include "../../common/integral_constant.hpp" -#include "../../common/ldg_ptr.hpp" #include "../../common/tuple_util.hpp" #include "../../meta.hpp" #include "../../sid/allocator.hpp" @@ -136,7 +136,7 @@ namespace gridtools { template GT_FUNCTION std::enable_if_t::value, T> operator()( Key, T const *ptr) const { - return *as_ldg_ptr(ptr); + return const_ptr_deref(ptr); } template GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const { diff --git a/include/gridtools/stencil/gpu_horizontal/entry_point.hpp b/include/gridtools/stencil/gpu_horizontal/entry_point.hpp index 0d687816c..ca2068888 100644 --- a/include/gridtools/stencil/gpu_horizontal/entry_point.hpp +++ b/include/gridtools/stencil/gpu_horizontal/entry_point.hpp @@ -12,6 +12,7 @@ #include #include +#include "../../common/const_ptr_deref.hpp" #include "../../common/cuda_type_traits.hpp" #include "../../common/cuda_util.hpp" #include "../../common/defs.hpp" @@ -20,7 +21,6 @@ #include "../../common/host_device.hpp" #include "../../common/hymap.hpp" #include "../../common/integral_constant.hpp" -#include "../../common/ldg_ptr.hpp" #include "../../common/tuple_util.hpp" #include "../../meta.hpp" #include "../../sid/as_const.hpp" @@ -45,7 +45,7 @@ namespace gridtools { template GT_FUNCTION std::enable_if_t::value, T> operator()( Key, T const *ptr) const { - return *as_ldg_ptr(ptr); + return const_ptr_deref(ptr); } template GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const { diff --git a/include/gridtools/storage/sid.hpp b/include/gridtools/storage/sid.hpp index 61c43d204..7ee0807c2 100644 --- a/include/gridtools/storage/sid.hpp +++ b/include/gridtools/storage/sid.hpp @@ -18,7 +18,6 @@ #include "../common/hymap.hpp" #include "../common/integral_constant.hpp" #include "../common/layout_map.hpp" -#include "../common/ldg_ptr.hpp" #include "../common/tuple.hpp" #include "../common/tuple_util.hpp" #include "../meta.hpp" @@ -37,7 +36,7 @@ namespace gridtools { template struct ptr_holder { T *m_val; - GT_FUNCTION constexpr auto operator()() const { return as_ldg_ptr(m_val); } + GT_FUNCTION constexpr T *operator()() const { return m_val; } friend GT_FORCE_INLINE constexpr ptr_holder operator+(ptr_holder obj, int_t arg) { return {obj.m_val + arg}; diff --git a/tests/unit_tests/common/CMakeLists.txt b/tests/unit_tests/common/CMakeLists.txt index c3b36f13a..cdc525e10 100644 --- a/tests/unit_tests/common/CMakeLists.txt +++ b/tests/unit_tests/common/CMakeLists.txt @@ -17,7 +17,6 @@ gridtools_add_unit_test(test_gt_math SOURCES test_gt_math.cpp NO_NVCC) gridtools_add_unit_test(test_hypercube_iterator SOURCES test_hypercube_iterator.cpp NO_NVCC) gridtools_add_unit_test(test_tuple SOURCES test_tuple.cpp NO_NVCC) gridtools_add_unit_test(test_int_vector SOURCES test_int_vector.cpp NO_NVCC) -gridtools_add_unit_test(test_ldg_ptr SOURCES test_ldg_ptr.cpp NO_NVCC) if(TARGET _gridtools_cuda) gridtools_check_compilation(test_cuda_type_traits test_cuda_type_traits.cu) @@ -50,8 +49,4 @@ if(TARGET _gridtools_cuda) SOURCES test_tuple.cu LIBRARIES _gridtools_cuda LABELS cuda) - gridtools_add_unit_test(test_ldg_ptr_cuda - SOURCES test_ldg_ptr.cu - LIBRARIES _gridtools_cuda - LABELS cuda) endif() diff --git a/tests/unit_tests/common/test_ldg_ptr.cpp b/tests/unit_tests/common/test_ldg_ptr.cpp deleted file mode 100644 index f122c5274..000000000 --- a/tests/unit_tests/common/test_ldg_ptr.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include - -#include - -namespace gridtools { - namespace { - TEST(as_ldg_ptr, non_const_host) { - float data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - - auto ptr = as_ldg_ptr(&data[2]); - EXPECT_EQ(*ptr, 2.0f); - EXPECT_EQ(ptr + 2, as_ldg_ptr(&data[4])); - EXPECT_EQ(ptr - 2, as_ldg_ptr(&data[0])); - EXPECT_EQ(*(ptr + 2), 4.0f); - EXPECT_EQ(*(ptr - 2), 0.0f); - EXPECT_EQ(*(++ptr), 3.0f); - EXPECT_EQ(*(ptr++), 3.0f); - EXPECT_EQ(*(ptr--), 4.0f); - EXPECT_EQ(*(--ptr), 2.0f); - EXPECT_EQ((ptr + 2) - ptr, 2); - *ptr = 5.0f; - EXPECT_EQ(*ptr, 5.0f); - } - - TEST(as_ldg_ptr, const_host) { - float const data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - - auto ptr = as_ldg_ptr(&data[2]); - EXPECT_EQ(*ptr, 2.0f); - EXPECT_EQ(ptr + 2, as_ldg_ptr(&data[4])); - EXPECT_EQ(ptr - 2, as_ldg_ptr(&data[0])); - EXPECT_EQ(*(ptr + 2), 4.0f); - EXPECT_EQ(*(ptr - 2), 0.0f); - EXPECT_EQ(*(++ptr), 3.0f); - EXPECT_EQ(*(ptr++), 3.0f); - EXPECT_EQ(*(ptr--), 4.0f); - EXPECT_EQ(*(--ptr), 2.0f); - EXPECT_EQ((ptr + 2) - ptr, 2); - } - } // namespace -} // namespace gridtools diff --git a/tests/unit_tests/common/test_ldg_ptr.cu b/tests/unit_tests/common/test_ldg_ptr.cu deleted file mode 100644 index cb6917742..000000000 --- a/tests/unit_tests/common/test_ldg_ptr.cu +++ /dev/null @@ -1,86 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2023, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ - -#include - -#include - -#include - -namespace gridtools { - namespace { - __device__ bool test_non_const_device() { - volatile float data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - - auto ptr = as_ldg_ptr(&data[2]); - if (*ptr != 2.0f) - return false; - if (ptr + 2 != as_ldg_ptr(&data[4])) - return false; - if (ptr - 2 != as_ldg_ptr(&data[0])) - return false; - if (*(ptr + 2) != 4.0f) - return false; - if (*(ptr - 2) != 0.0f) - return false; - if (*(++ptr) != 3.0f) - return false; - if (*(ptr++) != 3.0f) - return false; - if (*(ptr--) != 4.0f) - return false; - if (*(--ptr) != 2.0f) - return false; - if ((ptr + 2) - ptr != 2) - return false; - *ptr = 5.0f; - if (*ptr != 5.0f) - return false; - return true; - } - - TEST(as_ldg_ptr, non_const_device) { - EXPECT_TRUE(on_device::exec(GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&test_non_const_device))); - } - - __device__ bool test_const_device() { - volatile float const data[5] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - - auto ptr = as_ldg_ptr(&data[2]); - if (*ptr != 2.0f) - return false; - if (ptr + 2 != as_ldg_ptr(&data[4])) - return false; - if (ptr - 2 != as_ldg_ptr(&data[0])) - return false; - if (*(ptr + 2) != 4.0f) - return false; - if (*(ptr - 2) != 0.0f) - return false; - if (*(++ptr) != 3.0f) - return false; - if (*(ptr++) != 3.0f) - return false; - if (*(ptr--) != 4.0f) - return false; - if (*(--ptr) != 2.0f) - return false; - if ((ptr + 2) - ptr != 2) - return false; - return true; - } - - TEST(as_ldg_ptr, const_device) { - EXPECT_TRUE(on_device::exec(GT_MAKE_INTEGRAL_CONSTANT_FROM_VALUE(&test_const_device))); - } - } // namespace -} // namespace gridtools - -#include "test_ldg_ptr.cpp"