Merge Add dpcpp cooperative group, ConfigSet

This PR adds dpcpp cooperative group, ConfigSet, some helper function Summary: - dim3 and sycl_nd_range: a cuda-like usage for sycl range and nd_range with tests - helper gives default implementation macro for simple kernel cases (no explicit template parameter and 1d block) `__WG_BOUND__` gives something like `__launch_bound__` but it needs the 3d information not the product `__WG_BOUND_CONFIG__` can use ConfigSet for easy unpack - cooperative group implementation and set the test result individually - another selection for config (it allows bool, int, size_type template by roughly go through all kernel template) - update format_header such that it can handle the generated dpcpp file (the script is not yet here) - add ConfigSet and related decode/encode information Related PR: #757
ginkgo-project · May 28, 2021 · da19a97 · da19a97
2 parents 95c7652 + 7f72418
commit da19a97
Show file tree

Hide file tree

Showing 16 changed files with 1,582 additions and 7 deletions.
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
@@ -33,6 +33,7 @@ function(ginkgo_create_dpcpp_test test_name)
     add_executable(${TEST_TARGET_NAME} ${test_name}.dp.cpp)
     target_compile_features("${TEST_TARGET_NAME}" PUBLIC cxx_std_17)
     target_compile_options("${TEST_TARGET_NAME}" PRIVATE "${GINKGO_DPCPP_FLAGS}")
+    target_link_options("${TEST_TARGET_NAME}" PRIVATE -fsycl-device-code-split=per_kernel)
     if (GINKGO_DPCPP_SINGLE_MODE)
         target_compile_definitions("${TEST_TARGET_NAME}" PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
     endif()

diff --git a/core/base/types.hpp b/core/base/types.hpp
@@ -0,0 +1,209 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2021, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_BASE_TYPES_HPP_
+#define GKO_CORE_BASE_TYPES_HPP_
+
+
+#include <array>
+#include <cstdint>
+#include <type_traits>
+
+
+namespace gko {
+namespace detail {
+
+
+/**
+ * mask gives the integer with Size activated bits in the end
+ *
+ * @tparam Size  the number of activated bits
+ * @tparam ValueType  the type of mask, which uses std::uint32_t as default
+ *
+ * @return the ValueType with Size activated bits in the end
+ */
+template <int Size, typename ValueType = std::uint32_t>
+constexpr std::enable_if_t<(Size < sizeof(ValueType) * 8), ValueType> mask()
+{
+    return (ValueType{1} << Size) - 1;
+}
+
+/**
+ * @copydoc mask()
+ *
+ * @note this is special case for the Size = the number of bits of ValueType
+ */
+template <int Size, typename ValueType = std::uint32_t>
+constexpr std::enable_if_t<Size == sizeof(ValueType) * 8, ValueType> mask()
+{
+    return ~ValueType{};
+}
+
+
+/**
+ * shift calculates the number of bits for shifting
+ *
+ * @tparam current_shift  the current position of shifting
+ * @tparam num_groups  the number of elements in array
+ *
+ * @return the number of shifting bits
+ *
+ * @note this is the last case of nested template
+ */
+template <int current_shift, int num_groups>
+constexpr std::enable_if_t<(num_groups == current_shift + 1), int> shift(
+    const std::array<unsigned char, num_groups> &bits)
+{
+    return 0;
+}
+
+/**
+ * @copydoc shift(const std::array<char, num_groups>)
+ *
+ * @note this is the usual case of nested template
+ */
+template <int current_shift, int num_groups>
+constexpr std::enable_if_t<(num_groups > current_shift + 1), int> shift(
+    const std::array<unsigned char, num_groups> &bits)
+{
+    return bits[current_shift + 1] +
+           shift<(current_shift + 1), num_groups>(bits);
+}
+
+
+}  // namespace detail
+
+
+/**
+ * ConfigSet is a way to embed several information into one integer by given
+ * certain bits.
+ *
+ * The usage will be the following
+ * Set the method with bits Cfg = ConfigSet<b_0, b_1, ..., b_k>
+ * Encode the given infomation encoded = Cfg::encode(x_0, x_1, ..., x_k)
+ * Decode the specific position information x_t = Cfg::decode<t>(encoded)
+ * The encoded result will use 32 bits to record
+ * rrrrr0..01....1...k..k, which 1/2/.../k means the bits store the information
+ * for 1/2/.../k position and r is for rest of unused bits.
+ *
+ * Denote $B_t = \sum_{i = t+1}^k b_i$ and $F(X) = Cfg::encode(x_0, ..., x_k)$.
+ * Have $F(X) = \sum_{i = 0}^k (x_i << B_i) = \sum_{i = 0}^k (x_i * 2^{B_i})$.
+ * For all i, we have $0 <= x_i < 2^{b_i}$.
+ * $x_i$, $2^{B_i}$ are non-negative, so
+ * $F(X) = 0$ <=> $X = \{0\}$, $x_i = 0$ for all i.
+ * Assume $F(X) = F(Y)$, then
+ * $0 = |F(X) - F(Y)| = |F(X-Y)| = F(|X - Y|)$.
+ * $|x_i - y_i|$ is still in the same range $0 <= |x_i - y_i| < 2^{b_i}$.
+ * Thus, $F(|X - Y|) = 0$ -> $|X - Y| = \{0\}$, $x_i - y_i = 0$ -> $X = Y$.
+ * F is one-to-one function if $0 <= x_i < 2^{b_i}$ for all i.
+ * For any encoded result R, we can use the following to get the decoded series.
+ * for i = k to 0;
+ *   $x_i = R % b_i$;
+ *   $R = R / bi$;
+ * endfor;
+ * For any R in the range $[0, 2^{B_0})$, we have X such that $F(X) = R$.
+ * F is onto function.
+ * Thus, F is bijection.
+ *
+ * @tparam num_bits...  the number of bits for each position.
+ *
+ * @note the num_bit is required at least $ceil(log_2(maxval) + 1)$
+ */
+template <unsigned char... num_bits>
+class ConfigSet {
+public:
+    static constexpr unsigned num_groups = sizeof...(num_bits);
+    static constexpr std::array<unsigned char, num_groups> bits{num_bits...};
+
+    /**
+     * Decodes the `position` information from encoded
+     *
+     * @tparam position  the position of desired information
+     *
+     * @param encoded  the encoded integer
+     *
+     * @return the decoded information at position
+     */
+    template <int position>
+    static constexpr std::uint32_t decode(std::uint32_t encoded)
+    {
+        static_assert(position < num_groups,
+                      "This position is over the bounds.");
+        constexpr int shift = detail::shift<position, num_groups>(bits);
+        constexpr auto mask = detail::mask<bits[position]>();
+        return (encoded >> shift) & mask;
+    }
+
+    /**
+     * Encodes the information with given bit set to encoded integer.
+     *
+     * @note the last case of nested template.
+     */
+    template <unsigned current_iter>
+    static constexpr std::enable_if_t<(current_iter == num_groups),
+                                      std::uint32_t>
+    encode()
+    {
+        return 0;
+    }
+
+    /**
+     * Encodes the information with given bit set to encoded integer.
+     *
+     * @tparam current_iter  the encoded place
+     * @tparam Rest...  the rest type
+     *
+     * @param first  the current encoded information
+     * @param rest...  the rest of other information waiting for encoding
+     *
+     * @return the encoded integer
+     */
+    template <unsigned current_iter = 0, typename... Rest>
+    static constexpr std::enable_if_t<(current_iter < num_groups),
+                                      std::uint32_t>
+    encode(std::uint32_t first, Rest &&... rest)
+    {
+        constexpr int shift = detail::shift<current_iter, num_groups>(bits);
+        if (current_iter == 0) {
+            static_assert(
+                bits[current_iter] + shift <= sizeof(std::uint32_t) * 8,
+                "the total bits usage is larger than std::uint32_t bits");
+        }
+        return (first << shift) |
+               encode<current_iter + 1>(std::forward<Rest>(rest)...);
+    }
+};
+
+
+}  // namespace gko
+
+#endif  // GKO_CORE_BASE_TYPES_HPP_
diff --git a/core/synthesizer/implementation_selection.hpp b/core/synthesizer/implementation_selection.hpp
@@ -70,6 +70,38 @@ namespace syn {
         }                                                                    \
     }
 
+#define GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(_name, _callable)         \
+    template <typename Predicate, bool... BoolArgs, int... IntArgs,          \
+              gko::size_type... SizeTArgs, typename... TArgs,                \
+              typename... InferredArgs>                                      \
+    inline void _name(::gko::syn::value_list<std::uint32_t>, Predicate,      \
+                      ::gko::syn::value_list<bool, BoolArgs...>,             \
+                      ::gko::syn::value_list<int, IntArgs...>,               \
+                      ::gko::syn::value_list<gko::size_type, SizeTArgs...>,  \
+                      ::gko::syn::type_list<TArgs...>, InferredArgs...)      \
+        GKO_KERNEL_NOT_FOUND;                                                \
+                                                                             \
+    template <std::uint32_t K, std::uint32_t... Rest, typename Predicate,    \
+              bool... BoolArgs, int... IntArgs, gko::size_type... SizeTArgs, \
+              typename... TArgs, typename... InferredArgs>                   \
+    inline void _name(                                                       \
+        ::gko::syn::value_list<std::uint32_t, K, Rest...>,                   \
+        Predicate is_eligible,                                               \
+        ::gko::syn::value_list<bool, BoolArgs...> bool_args,                 \
+        ::gko::syn::value_list<int, IntArgs...> int_args,                    \
+        ::gko::syn::value_list<gko::size_type, SizeTArgs...> size_args,      \
+        ::gko::syn::type_list<TArgs...> type_args, InferredArgs... args)     \
+    {                                                                        \
+        if (is_eligible(K)) {                                                \
+            _callable<BoolArgs..., IntArgs..., SizeTArgs..., TArgs..., K>(   \
+                std::forward<InferredArgs>(args)...);                        \
+        } else {                                                             \
+            _name(::gko::syn::value_list<std::uint32_t, Rest...>(),          \
+                  is_eligible, bool_args, int_args, size_args, type_args,    \
+                  std::forward<InferredArgs>(args)...);                      \
+        }                                                                    \
+    }
+
 
 }  // namespace syn
 }  // namespace gko

diff --git a/core/test/base/types.cpp b/core/test/base/types.cpp
@@ -33,9 +33,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/types.hpp>
 
 
+#include <array>
+#include <cstdint>
+#include <stdexcept>
+#include <type_traits>
+
+
 #include <gtest/gtest.h>
 
 
+#include "core/base/types.hpp"
+
+
 namespace {
 
 
@@ -100,4 +109,108 @@ TEST(PrecisionReduction, ComputesCommonEncoding)
 }
 
 
+TEST(ConfigSet, MaskCorrectly)
+{
+    constexpr auto mask3_u = gko::detail::mask<3>();
+    constexpr auto fullmask_u = gko::detail::mask<32>();
+    constexpr auto mask3_u64 = gko::detail::mask<3, std::uint64_t>();
+    constexpr auto fullmask_u64 = gko::detail::mask<64, std::uint64_t>();
+
+    ASSERT_EQ(mask3_u, 7u);
+    ASSERT_EQ(fullmask_u, 0xffffffffu);
+    ASSERT_TRUE((std::is_same<decltype(mask3_u), const std::uint32_t>::value));
+    ASSERT_TRUE(
+        (std::is_same<decltype(fullmask_u), const std::uint32_t>::value));
+    ASSERT_EQ(mask3_u64, 7ull);
+    ASSERT_EQ(fullmask_u64, 0xffffffffffffffffull);
+    ASSERT_TRUE(
+        (std::is_same<decltype(mask3_u64), const std::uint64_t>::value));
+    ASSERT_TRUE(
+        (std::is_same<decltype(fullmask_u64), const std::uint64_t>::value));
+}
+
+
+TEST(ConfigSet, ShiftCorrectly)
+{
+    constexpr std::array<unsigned char, 3> bits{3, 5, 7};
+
+
+    constexpr auto shift0 = gko::detail::shift<0, 3>(bits);
+    constexpr auto shift1 = gko::detail::shift<1, 3>(bits);
+    constexpr auto shift2 = gko::detail::shift<2, 3>(bits);
+
+    ASSERT_EQ(shift0, 12);
+    ASSERT_EQ(shift1, 7);
+    ASSERT_EQ(shift2, 0);
+}
+
+
+TEST(ConfigSet, ConfigSet1Correctly)
+{
+    using Cfg = gko::ConfigSet<3>;
+
+    constexpr auto encoded = Cfg::encode(2);
+    constexpr auto decoded = Cfg::decode<0>(encoded);
+
+    ASSERT_EQ(encoded, 2);
+    ASSERT_EQ(decoded, 2);
+}
+
+
+TEST(ConfigSet, ConfigSet1FullCorrectly)
+{
+    using Cfg = gko::ConfigSet<32>;
+
+    constexpr auto encoded = Cfg::encode(0xffffffff);
+    constexpr auto decoded = Cfg::decode<0>(encoded);
+
+    ASSERT_EQ(encoded, 0xffffffff);
+    ASSERT_EQ(decoded, 0xffffffff);
+}
+
+
+TEST(ConfigSet, ConfigSet2FullCorrectly)
+{
+    using Cfg = gko::ConfigSet<1, 31>;
+
+    constexpr auto encoded = Cfg::encode(1, 33);
+
+    ASSERT_EQ(encoded, (1u << 31) + 33);
+}
+
+
+TEST(ConfigSet, ConfigSetSomeCorrectly)
+{
+    using Cfg = gko::ConfigSet<3, 5, 7>;
+
+    constexpr auto encoded = Cfg::encode(2, 11, 13);
+    constexpr auto decoded_0 = Cfg::decode<0>(encoded);
+    constexpr auto decoded_1 = Cfg::decode<1>(encoded);
+    constexpr auto decoded_2 = Cfg::decode<2>(encoded);
+
+    ASSERT_EQ(encoded, (2 << 12) + (11 << 7) + 13);
+    ASSERT_EQ(decoded_0, 2);
+    ASSERT_EQ(decoded_1, 11);
+    ASSERT_EQ(decoded_2, 13);
+}
+
+
+TEST(ConfigSet, ConfigSetSomeFullCorrectly)
+{
+    using Cfg = gko::ConfigSet<2, 6, 7, 17>;
+
+    constexpr auto encoded = Cfg::encode(2, 11, 13, 19);
+    constexpr auto decoded_0 = Cfg::decode<0>(encoded);
+    constexpr auto decoded_1 = Cfg::decode<1>(encoded);
+    constexpr auto decoded_2 = Cfg::decode<2>(encoded);
+    constexpr auto decoded_3 = Cfg::decode<3>(encoded);
+
+    ASSERT_EQ(encoded, (2 << 30) + (11 << 24) + (13 << 17) + 19);
+    ASSERT_EQ(decoded_0, 2);
+    ASSERT_EQ(decoded_1, 11);
+    ASSERT_EQ(decoded_2, 13);
+    ASSERT_EQ(decoded_3, 19);
+}
+
+
 }  // namespace
diff --git a/dev_tools/scripts/config b/dev_tools/scripts/config
@@ -32,6 +32,8 @@
     - RemoveTest: "true"
 - "_builder\.hpp"
     - CoreSuffix: "_builder"
+- "dpcpp/test/base/dim3\.dp\.cpp"
+    - FixInclude: "dpcpp/base/dim3.dp.hpp"
 - "components.*_kernels(\.hip|\.dp)?\.(cu|cpp|hpp|cuh)"
     - CoreSuffix: "_kernels"
     - RemoveTest: "true"