From 6bcfac5bd7f526de4080663fa97f8ed75815dec1 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 28 Feb 2023 15:11:05 -0700 Subject: [PATCH] Adds team- and thread-based lower-bound and upper-bound search and predicates. --- common/src/KokkosKernels_Iota.hpp | 2 + common/src/KokkosKernels_LowerBound.hpp | 470 ++++++++++++++++++++ common/src/KokkosKernels_Predicates.hpp | 167 +++++++ common/src/KokkosKernels_UpperBound.hpp | 101 +++++ common/unit_test/Test_Common.hpp | 2 + common/unit_test/Test_Common_Iota.hpp | 9 + common/unit_test/Test_Common_LowerBound.hpp | 256 +++++++++++ common/unit_test/Test_Common_UpperBound.hpp | 245 ++++++++++ 8 files changed, 1252 insertions(+) create mode 100644 common/src/KokkosKernels_LowerBound.hpp create mode 100644 common/src/KokkosKernels_Predicates.hpp create mode 100644 common/src/KokkosKernels_UpperBound.hpp create mode 100644 common/unit_test/Test_Common_LowerBound.hpp create mode 100644 common/unit_test/Test_Common_UpperBound.hpp diff --git a/common/src/KokkosKernels_Iota.hpp b/common/src/KokkosKernels_Iota.hpp index c5d6a8dfac..5b7e24ca24 100644 --- a/common/src/KokkosKernels_Iota.hpp +++ b/common/src/KokkosKernels_Iota.hpp @@ -135,6 +135,8 @@ template struct is_iota> : public std::true_type {}; template struct is_iota> : public std::true_type {}; +template +inline constexpr bool is_iota_v = is_iota::value; } // namespace Impl } // namespace KokkosKernels diff --git a/common/src/KokkosKernels_LowerBound.hpp b/common/src/KokkosKernels_LowerBound.hpp new file mode 100644 index 0000000000..22df9545ef --- /dev/null +++ b/common/src/KokkosKernels_LowerBound.hpp @@ -0,0 +1,470 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef _KOKKOSKERNELS_LOWERBOUND_HPP +#define _KOKKOSKERNELS_LOWERBOUND_HPP + +/*! \file KokkosKernels_LowerBound.hpp + Define thread and team-collaborative lower-bound search + + Lower-bound search takes a Kokkos::View, a search value, and a binary + predicate. + It returns an index to the first element of the view that does not + satisfy pred(element, value), or the size of the view if no such + element exists. + + All elements for which pred(element, value) is true must precede those + for which it is false. + + The default predicate is less-than, i.e. pred(a,b) = a < b. + In this case, lower-bound search returns the first index where the value is + >= the view entry. + + The type of the predicate function must be equivalent to the following: + \verbatim + bool operator(const T &a, const T&b); + \endverbatim + KokkosKernels_Predicates.hpp defines a variety of common predicates, + available in KokkosKernels namespace. + + Examples: + \verbatim + value = 3 + view = {0,1,2,3,4} + = {t,t,t,f,f} + result = 3 + + value = -1 + view = {0,1,2,3,4} + = {f,f,f,f,f} + result = 0 + + value = 5 + view = {0,1,2,3,4} + = {t,t,t,t,t} + result = 5 + + value = 1 + view = {0,1,1,1,2} + = {t,f,f,f,f} + result = 1 + \endverbatim + + Contrast with upper-bound, which returns first index for which pred(value, + element) is true + */ + +#include + +#include "KokkosKernels_Predicates.hpp" +#include "KokkosKernels_SimpleUtils.hpp" + +namespace KokkosKernels { +namespace Impl { + +/*! \brief Single-thread sequential lower-bound search + + \tparam ViewLike A Kokkos::View or KokkosKernels::Impl::Iota + \tparam Pred a binary predicate function + \param view the view to search + \param value the value to search for + \param pred a binary predicate function + \returns index of first element in view where pred(element, value) is false, + or view.size if no such element exists + + At most view.size() predicate function calls +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type +lower_bound_sequential_thread( + const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { + using size_type = typename ViewLike::size_type; + static_assert(1 == ViewLike::rank, + "lower_bound_sequential_thread requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_sequential_thread requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + + size_type i = 0; + while (i < view.size() && pred(view(i), value)) { + ++i; + } + return i; +} + +/*! \brief Single-thread binary lower-bound search + + \tparam ViewLike A Kokkos::View or KokkosKernels::Impl::Iota + \tparam Pred a binary predicate function + \param view the view to search + \param value the value to search for + \param pred a binary predicate function + \returns index of first element in view where pred(element, value) is false, + or view.size if no such element exists + + At most log2(view.size()) + 1 predicate function calls +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_binary_thread( + const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { + using size_type = typename ViewLike::size_type; + static_assert(1 == ViewLike::rank, + "lower_bound_binary_thread requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_binary_thread requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + + size_type lo = 0; + size_type hi = view.size(); + while (lo < hi) { + size_type mid = (lo + hi) / 2; + const auto &ve = view(mid); + if (pred(ve, value)) { // mid satisfies predicate, look in higher half not + // including mid + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; +} + +} // namespace Impl + +/*! \brief single-thread lower-bound search + + \tparam ViewLike A Kokkos::View or KokkosKernels::Impl::Iota + \tparam Pred a binary predicate function + \param view the view to search + \param value the value to search for + \param pred a binary predicate function + \returns index of first element in view where pred(element, value) is false, + or view.size if no such element exists + + This minimizes the calls to predicate: + for view.size() >= 8, this does a binary search, otherwise, a linear search +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_thread( + const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { + static_assert(1 == ViewLike::rank, + "lower_bound_thread requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_thread requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + /* + sequential search makes on average 0.5 * view.size memory accesses + binary search makes log2(view.size)+1 accesses + + log2(x) <= 0.5x roughly when x >= 8 + */ + if (view.size() >= 8) { + return Impl::lower_bound_binary_thread(view, value, pred); + } else { + return Impl::lower_bound_sequential_thread(view, value, pred); + } +} + +namespace Impl { + +/*! \brief Team-collaborative sequential lower-bound search + + \tparam TeamMember the team policy member type + \tparam ViewLike A Kokkos::View or KokkosKernels::Iota + \tparam Pred The type of the predicate function to call + + \param handle The Kokkos team handle + \param view The view-like to search + \param value The value to compare in the predicate + \param lo The first index to search + \param hi One-past the last index to search + \param pred Apply pred(view(i), value) + + \returns To all team members, the smallest i for which pred(view(i), value) + is false for i in [lo, hi), or hi if no such value + + Uses a single thread to call \c lower_bound_thread, and broadcasts that + to all team members. +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_single_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + typename ViewLike::size_type idx; + Kokkos::single( + Kokkos::PerTeam(handle), + [&](typename ViewLike::size_type &lidx) { + lidx = KokkosKernels::lower_bound_thread(view, value, pred); + }, + idx); + return idx; +} + +/*! \brief Team-collaborative sequential lower-bound search + + \tparam TeamMember the team policy member type + \tparam ViewLike A Kokkos::View or KokkosKernels::Iota + \tparam Pred The type of the predicate function to call + + \param handle The Kokkos team handle + \param view The view-like to search + \param value The value to compare in the predicate + \param lo The first index to search + \param hi One-past the last index to search + \param pred Apply pred(view(i), value) + + \returns To all team members, the smallest i for which pred(view(i), value) + is false for i in [lo, hi), or hi if no such value + + Apply pred(view(i), value) for i in [lo, hi) +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, + typename ViewLike::size_type lo, typename ViewLike::size_type hi, + Pred pred = Pred()) { + using size_type = typename ViewLike::size_type; + static_assert(1 == ViewLike::rank, + "lower_bound_sequential_team requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_sequential_team requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + + if (lo == hi) { + return hi; + } + size_type teamI; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(handle, lo, hi), + [&](const size_type &i, size_type &li) { + li = KOKKOSKERNELS_MACRO_MIN(li, hi); + if (i < li) { // no need to search higher than the smallest so far + if (!pred(view(i), value)) { // look for the smallest index that does + // not satisfy + li = i; + } + } + }, + Kokkos::Min(teamI)); + return teamI; +} + +/*! \brief Team-collaborative sequential lower-bound search + + \tparam TeamMember the team policy member type + \tparam ViewLike A Kokkos::View or KokkosKernels::Iota + \tparam Pred The type of the predicate function to call + + \param handle The Kokkos team handle + \param view The view-like to search + \param value The value to compare in the predicate + \param pred Apply pred(view(i), value) + + \returns To all team members, the smallest i for which pred(view(i), value) + is false or view.size() if no such value +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_sequential_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + return lower_bound_sequential_team(handle, view, value, 0, view.size(), pred); +} + +/*! \brief A range for the k-ary lower bound search + + The RangeReducer will maximize the lower bound and + minimize the upper bound +*/ +template +struct Range { + T lb; /// lower-bound + T ub; /// upper-bound + + KOKKOS_INLINE_FUNCTION + Range() { init(); } + + KOKKOS_INLINE_FUNCTION + constexpr Range(const T &_lb, const T &_ub) : lb(_lb), ub(_ub) {} + + KOKKOS_INLINE_FUNCTION + void init() { + lb = Kokkos::Experimental::finite_min_v; // will be max'd + ub = Kokkos::Experimental::finite_max_v; // will be min'd + } +}; + +/// \brief maximizes the lower bound, and minimizes the upper bound of a Range +template +struct RangeReducer { + using reducer = RangeReducer; + using value_type = Range; + using result_view_type = + Kokkos::View *, Space, Kokkos::MemoryUnmanaged>; + + private: + value_type &value; + + public: + KOKKOS_INLINE_FUNCTION + RangeReducer(value_type &value_) : value(value_) {} + + KOKKOS_INLINE_FUNCTION + void join(value_type &dst, const value_type &src) const { + dst.lb = KOKKOSKERNELS_MACRO_MAX(dst.lb, src.lb); + dst.ub = KOKKOSKERNELS_MACRO_MIN(dst.ub, src.ub); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type &val) const { val.init(); } + + KOKKOS_INLINE_FUNCTION + value_type &reference() const { return value; } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { return result_view_type(&value, 1); } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { return true; } +}; + +/*! \brief team-collaborative K-ary lower-bound search + + \tparam TeamMember the team policy member type + \tparam ViewLike A Kokkos::View or KokkosKernels::Iota + \tparam Pred the binary predicate function type + + Actually, K+1-ary, where K is the size of the team + Split the view into k+1 segments at K points + Evalute the predicate in parallel at each point and use a joint min-max + parallel reduction: + * The lower bound is after the max index where the predicate was true + * The upper bound is no greater than the min index where the predicate was + false Once there are fewer values left than threads in the team, switch to + team sequential search +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_kary_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + static_assert(1 == ViewLike::rank, + "lower_bound_kary_team requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_kary_team requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + + using size_type = typename ViewLike::size_type; + + size_type lo = 0; + size_type hi = view.size(); + while (lo < hi) { + // if fewer than team_size elements left, just hit them all sequentially + if (lo + handle.team_size() >= hi) { + return lower_bound_sequential_team(handle, view, value, lo, hi, pred); + } + + // otherwise, split the region up among threads + size_type mid = + lo + (hi - lo) * (handle.team_rank() + 1) / (handle.team_size() + 1); + auto ve = view(mid); + + // reduce across threads to figure out where the new search bounds are + // if a thread satisfies the predicate, the first element that does not + // satisfy must be after that thread's search point. we want the max such + // point across all threads if a thread does not satisfy the predicate, the + // first element that does not satisfy must be before or equal. we want the + // min such point across all threads + Range teamRange; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(handle, 0, handle.team_size()), + [&](const int &, Range &lr) { + lr.lb = KOKKOSKERNELS_MACRO_MAX(lo, lr.lb); // no lower than lo + lr.ub = KOKKOSKERNELS_MACRO_MIN(hi, lr.ub); // no higher than hi + // if pred(view(mid), value), then the lower bound is above this + if (pred(ve, value)) { + lr.lb = mid + 1; + } else { // otherwise the lower bound is no larger than this + lr.ub = mid; + } + }, + RangeReducer(teamRange)); + + // next iteration, search in the newly-discovered window + hi = teamRange.ub; + lo = teamRange.lb; + } + return lo; +} + +} // namespace Impl + +/*! \brief Team-collaborative lower-bound search + + \tparam TeamMember the team policy member type the Kokkos team handle + \tparam View the type of view + \tparam Pred the type of the predicate + + \param handle a Kokkos team handle + \param view a Kokkos::View to search + \param value the value to search for + \param pred the predicate to test entries in the view + + \returns The smallest i in range [0, view.size()) for which pred(view(i), + value) is not true, or view.size() if no such `i` exists + + default pred is `element < value`, i.e. return the index to the first + element in the view that does not satisfy `element < value`. For well-ordered + types this is the first element where element >= value + + Pred should be a binary function comparing two `typename + View::non_const_value_type` +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type lower_bound_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + static_assert(1 == ViewLike::rank, "lower_bound_team requires rank-1 views"); + static_assert(is_iota_v || Kokkos::is_view::value, + "lower_bound_team requires a " + "KokkosKernels::Impl::Iota or a Kokkos::View"); + + /* kary search is A = (k-1) * (logk(view.size()) + 1) accesses + + sequential search is B = view.size() accesses + + A < B is true ruoughly when view.size() > 3 * k + */ + if (view.size() > 3 * size_t(handle.team_size())) { + return Impl::lower_bound_kary_team(handle, view, value, pred); + } else { + return Impl::lower_bound_sequential_team(handle, view, value, pred); + } +} + +} // namespace KokkosKernels + +#endif // _KOKKOSKERNELS_LOWERBOUND_HPP \ No newline at end of file diff --git a/common/src/KokkosKernels_Predicates.hpp b/common/src/KokkosKernels_Predicates.hpp new file mode 100644 index 0000000000..a741d1353a --- /dev/null +++ b/common/src/KokkosKernels_Predicates.hpp @@ -0,0 +1,167 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef _KOKKOSKERNELS_PREDICATES_HPP +#define _KOKKOSKERNELS_PREDICATES_HPP + +#include "Kokkos_ArithTraits.hpp" + +/*! \file KokkosKernels_Predicates.hpp + * Define predicates for KokkosKernels search functions + */ + +namespace KokkosKernels { + +/** + * @brief Struct template for a greater-than predicate + * @tparam T Type to be compared + */ +template +struct GT { + using value_type = T; + static_assert(!Kokkos::ArithTraits::is_complex, + "Please define custom predicates for ordering complex types"); + + /** + * @brief Return true if a is greater than b + * @param a First value to be compared + * @param b Second value to be compared + */ + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const + noexcept { + return a > b; + } +}; + +/*! \brief "Greater-than-or-equal" predicate, a >= b + \tparam T the type to compare +*/ +template +struct GTE { + using value_type = T; + static_assert(!Kokkos::ArithTraits::is_complex, + "Please define custom predicates for ordering complex types"); + + /// \brief return a >= b + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const + noexcept { + return a >= b; + } +}; + +/*! \brief "Less-than" predicate, a < b + \tparam T the type to compare +*/ +template +struct LT { + using value_type = T; + static_assert(!Kokkos::ArithTraits::is_complex, + "Please define custom predicates for ordering complex types"); + + /// \brief return a < b + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const + noexcept { + return a < b; + } +}; + +/*! \brief "Less-than-or-equal" predicate, a <= b + \tparam T the type to compare +*/ +template +struct LTE { + using value_type = T; + static_assert(!Kokkos::ArithTraits::is_complex, + "Please define custom predicates for ordering complex types"); + + /// \brief return a <= b + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const + noexcept { + return a <= b; + } +}; + +/*! \brief "Equal" predicate, a == b + \tparam T the type to compare +*/ +template +struct Equal { + using value_type = T; + + /// \brief return a == b + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const { + return a == b; + } +}; + +/** + * @brief Struct template for inverting a predicate + * @tparam Pred Predicate type to be inverted + */ +template +struct Neg { + using value_type = typename Pred::value_type; + + /** + * @brief Constructor + * @param pred Predicate object to be inverted + */ + KOKKOS_INLINE_FUNCTION + constexpr Neg(const Pred &pred) : pred_(pred) {} + + /** + * @brief Return the boolean inverse of the underlying predicate + * @param a First value to be compared by the predicate + * @param b Second value to be compared by the predicate + * @return Boolean inverse of the result of the predicate applied to a and b + */ + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const { + return !pred_(a, b); + } + + private: + Pred pred_; //< Underlying predicate object +}; + +/*! \brief Reflect a predicate, pred(b, a) + \tparam Pred the type of the predicate to reflect +*/ +template +struct Refl { + using value_type = typename Pred::value_type; + + KOKKOS_INLINE_FUNCTION + constexpr Refl(const Pred &pred) : pred_(pred) {} + + /// \brief return the underlying binary predicate with reversed arguments + KOKKOS_INLINE_FUNCTION constexpr bool operator()(const value_type &a, + const value_type &b) const { + return pred_(b, a); + } + + private: + Pred pred_; +}; + +} // namespace KokkosKernels + +#endif // _KOKKOSKERNELS_PREDICATES_HPP \ No newline at end of file diff --git a/common/src/KokkosKernels_UpperBound.hpp b/common/src/KokkosKernels_UpperBound.hpp new file mode 100644 index 0000000000..901c865743 --- /dev/null +++ b/common/src/KokkosKernels_UpperBound.hpp @@ -0,0 +1,101 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef _KOKKOSKERNELS_UPPERBOUND_HPP +#define _KOKKOSKERNELS_UPPERBOUND_HPP + +/*! \file KokkosKernels_UpperBound.hpp + Define thread and team-collaborative upper-bound search + + Upper-bound search takes a Kokkos::View, a search value, and a binary + predicate. + It returns an index to the first element of the view such that pred(value, + element) is true + + This is implemented by calling lower_bound functions with inverted and + reflected predicates, i.e. upper_bound(view, val, pred) = lower_bound(value, + val, Inv(Refl(pred))); + + Examples: + \verbatim + value = 3 + view = {0,1,2,3,4} + = {f,f,f,f,t} + result = 4 + + value = -1 + view = {0,1,2,3,4} + = {t,t,t,t,t} + result = 0 + + value = 5 + view = {0,1,2,3,4} + = {f,f,f,f,f} + result = 5 + + value = 1 + view = {0,1,1,1,2} + = {f,f,f,f,t} + result = 4 + \endverbatim + + Contrast with lower-bound, which returns first index for which pred(element, + value) is false + */ + +#include "KokkosKernels_LowerBound.hpp" + +namespace KokkosKernels { + +/*! \brief single-thread upper-bound search + + \tparam ViewLike A Kokkos::View or KokkosKernels::Impl::Iota + \tparam Pred a binary predicate function + \param view the view to search + \param value the value to search for + \param pred a binary predicate function + \returns index of first element in view where pred(value,element) is true, + or view.size if no such element exists +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type upper_bound_thread( + const ViewLike &view, const typename ViewLike::non_const_value_type &value, + Pred pred = Pred()) { + return lower_bound_thread(view, value, Neg(Refl(pred))); +} + +/*! \brief team-collaborative upper-bound search + + \tparam ViewLike A Kokkos::View or KokkosKernels::Impl::Iota + \tparam Pred a binary predicate function + \param view the view to search + \param value the value to search for + \param pred a binary predicate function + \returns index of first element in view where pred(value,element) is true, + or view.size if no such element exists +*/ +template > +KOKKOS_INLINE_FUNCTION typename ViewLike::size_type upper_bound_team( + const TeamMember &handle, const ViewLike &view, + const typename ViewLike::non_const_value_type &value, Pred pred = Pred()) { + return lower_bound_team(handle, view, value, Neg(Refl(pred))); +} + +} // namespace KokkosKernels + +#endif // _KOKKOSKERNELS_UPPERBOUND_HPP \ No newline at end of file diff --git a/common/unit_test/Test_Common.hpp b/common/unit_test/Test_Common.hpp index 9b26f9bf9e..2ccf9c2103 100644 --- a/common/unit_test/Test_Common.hpp +++ b/common/unit_test/Test_Common.hpp @@ -25,5 +25,7 @@ #include #include #include +#include +#include #endif // TEST_COMMON_HPP diff --git a/common/unit_test/Test_Common_Iota.hpp b/common/unit_test/Test_Common_Iota.hpp index 7207d6f4b1..cae207d56b 100644 --- a/common/unit_test/Test_Common_Iota.hpp +++ b/common/unit_test/Test_Common_Iota.hpp @@ -85,8 +85,17 @@ void test_iota_subview() { EXPECT_EQ(sub(1), 9); } +template +void test_is_iota() { + static_assert(KokkosKernels::Impl::is_iota_v>, + "Iota should be an Iota"); + static_assert(!KokkosKernels::Impl::is_iota_v, + "int should not be an Iota"); +} + template void test_iota() { + test_is_iota(); test_iota_constructor(); test_iota_rank(); test_iota_subview(); diff --git a/common/unit_test/Test_Common_LowerBound.hpp b/common/unit_test/Test_Common_LowerBound.hpp new file mode 100644 index 0000000000..f2b54eed32 --- /dev/null +++ b/common/unit_test/Test_Common_LowerBound.hpp @@ -0,0 +1,256 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file Test_Common_LowerBound.hpp +/// \brief Tests lower bounds search routines + +#include +#include + +template +size_t std_lower_bound(const std::vector &haystack, + const Ordinal needle) { + const auto it = std::lower_bound(haystack.begin(), haystack.end(), needle); + return it - haystack.begin(); +} + +/*! \brief count the number of incorrect values */ +template +struct ThreadLowerBoundFunctor { + using hv_value_type = typename HaystackView::non_const_value_type; + using hv_size_type = typename HaystackView::size_type; + + ThreadLowerBoundFunctor(const hv_size_type &expected, + const HaystackView &haystack, + const hv_value_type &needle) + : expected_(expected), haystack_(haystack), needle_(needle) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i, int &lerrCount) const { + if (0 == i) { + hv_size_type idx = KokkosKernels::lower_bound_thread(haystack_, needle_); + if (idx != expected_) { + KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", + __FILE__, __LINE__, int(i), + int(expected_), int(idx)); + ++lerrCount; + } + } + } + + hv_size_type expected_; + HaystackView haystack_; + hv_value_type needle_; +}; + +template +void test_lower_bound_thread(const std::vector &_haystack, + const T &_needle) { + using execution_space = typename Device::execution_space; + using Policy = Kokkos::RangePolicy; + using view_t = Kokkos::View; + using u_const_view_t = Kokkos::View>; + using size_type = typename u_const_view_t::size_type; + + // get expected value + const size_type expected = std_lower_bound(_haystack, _needle); + + // create device views of input data + u_const_view_t uhaystack(_haystack.data(), _haystack.size()); + view_t haystack("haystack", uhaystack.size()); + Kokkos::deep_copy(haystack, uhaystack); + + // test lower_bound search + int errCount; + // run a single thread + Kokkos::parallel_reduce(Policy(0, 1), + ThreadLowerBoundFunctor(expected, haystack, _needle), + errCount); + + EXPECT_EQ(0, errCount); +} + +/*! \brief count the number of incorrect values */ +template +struct TeamLowerBoundFunctor { + using hv_value_type = typename HaystackView::non_const_value_type; + using hv_size_type = typename HaystackView::size_type; + + TeamLowerBoundFunctor(const hv_size_type &expected, + const HaystackView &haystack, + const hv_value_type &needle) + : expected_(expected), haystack_(haystack), needle_(needle) {} + + KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, + int &lerrCount) const { + hv_size_type idx = + KokkosKernels::lower_bound_team(handle, haystack_, needle_); + if (idx != expected_) { + KOKKOS_IMPL_DO_NOT_USE_PRINTF("%s:%d thread %d expected %d got %d\n", + __FILE__, __LINE__, int(handle.team_rank()), + int(expected_), int(idx)); + ++lerrCount; + } + } + + hv_size_type expected_; + HaystackView haystack_; + hv_value_type needle_; +}; + +template +void test_lower_bound_team(const std::vector &_haystack, const T _needle) { + using execution_space = typename Device::execution_space; + using Policy = Kokkos::TeamPolicy; + using Member = typename Policy::member_type; + using view_t = Kokkos::View; + using u_const_view_t = Kokkos::View>; + using size_type = typename u_const_view_t::size_type; + + // get expected value + const size_type expected = std_lower_bound(_haystack, _needle); + + // create device views of input data + u_const_view_t uhaystack(_haystack.data(), _haystack.size()); + view_t haystack("haystack", uhaystack.size()); + Kokkos::deep_copy(haystack, uhaystack); + + // test lower_bound search + const int leagueSize = 1; + const int teamSize = + KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; + int errCount; + Kokkos::parallel_reduce( + Policy(leagueSize, teamSize), + TeamLowerBoundFunctor(expected, haystack, _needle), + errCount); + + EXPECT_EQ(0, errCount); +} + +template +void test_lower_bound(const std::vector &haystack, const T needle) { + test_lower_bound_thread(haystack, needle); + test_lower_bound_team(haystack, needle); +} + +template +T randn(T n) { + if constexpr (std::is_floating_point_v) { + return T(rand()) / T(RAND_MAX) * n; + } else { + return T(rand()) % n; + } +} + +/* define specific and random lower-bound test cases + */ +template +void test_lower_bound() { + test_lower_bound({}, T(0)); + test_lower_bound({}, T(1)); + test_lower_bound({}, T(-1)); + + test_lower_bound({0}, T(0)); + test_lower_bound({0}, T(1)); + test_lower_bound({0}, T(-1)); + + test_lower_bound({1}, T(0)); + test_lower_bound({1}, T(1)); + test_lower_bound({1}, T(-1)); + + test_lower_bound({T(-1)}, T(0)); + test_lower_bound({T(-1)}, T(1)); + test_lower_bound({T(-1)}, T(-1)); + + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(-1)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(0)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(1)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(2)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(2.4)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(2.5)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(2.6)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(3)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(4)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(5)); + test_lower_bound({0, 1, T(2.5), 3, 4, 5}, T(6)); + + auto randn = [](T n) { + if constexpr (std::is_floating_point_v) { + return T(rand()) / T(RAND_MAX) * n; + } else { + return T(rand()) % n; + } + }; + + T maxEntry = 20; + const int numTests = 100; + for (int n = 0; n < numTests; ++n) { + for (size_t sz : {10, 100, 1000}) { + // generate a sorted random vector + std::vector haystack; + for (size_t i = 0; i < sz; ++i) { + haystack.push_back(randn(maxEntry)); + } + std::sort(haystack.begin(), haystack.end()); + + // generate a random value to search for + const T needle = randn(maxEntry); + + // do the test + test_lower_bound(haystack, needle); + } + } +} + +#define EXECUTE_TEST(T, DEVICE) \ + TEST_F(TestCategory, common##_##lower_bound##_##T##_##DEVICE) { \ + test_lower_bound(); \ + } + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(int, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(int64_t, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(size_t, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_FLOAT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(float, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_DOUBLE)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(double, TestExecSpace) +#endif + +#undef EXECUTE_TEST \ No newline at end of file diff --git a/common/unit_test/Test_Common_UpperBound.hpp b/common/unit_test/Test_Common_UpperBound.hpp new file mode 100644 index 0000000000..b99ffbb0a6 --- /dev/null +++ b/common/unit_test/Test_Common_UpperBound.hpp @@ -0,0 +1,245 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file Test_Common_UpperBound.hpp +/// \brief Tests upper bounds search routines + +#include +#include + +template +size_t std_upper_bound(const std::vector &haystack, + const Ordinal needle) { + const auto it = std::upper_bound(haystack.begin(), haystack.end(), needle); + return it - haystack.begin(); +} + +/*! \brief count the number of incorrect values */ +template +struct ThreadUpperBoundFunctor { + using hv_value_type = typename HaystackView::non_const_value_type; + using hv_size_type = typename HaystackView::size_type; + + ThreadUpperBoundFunctor(const hv_size_type &expected, + const HaystackView &haystack, + const hv_value_type &needle) + : expected_(expected), haystack_(haystack), needle_(needle) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i, int &lerrCount) const { + if (0 == i) { + hv_size_type idx = KokkosKernels::upper_bound_thread(haystack_, needle_); + if (idx != expected_) { + printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, + int(i), int(expected_), int(idx)); + ++lerrCount; + } + } + } + + hv_size_type expected_; + HaystackView haystack_; + hv_value_type needle_; +}; + +template +void test_upper_bound_thread(const std::vector &_haystack, + const T &_needle) { + using execution_space = typename Device::execution_space; + using Policy = Kokkos::RangePolicy; + using view_t = Kokkos::View; + using u_const_view_t = Kokkos::View>; + using hv_size_type = typename u_const_view_t::size_type; + + // get expected value + const hv_size_type expected = std_upper_bound(_haystack, _needle); + + // create device views of input data + u_const_view_t uhaystack(_haystack.data(), _haystack.size()); + view_t haystack("haystack", uhaystack.size()); + Kokkos::deep_copy(haystack, uhaystack); + + // test upper_bound search + int errCount; + // run a single thread + Kokkos::parallel_reduce(Policy(0, 1), + ThreadUpperBoundFunctor(expected, haystack, _needle), + errCount); + + EXPECT_EQ(0, errCount); +} + +/*! \brief count the number of incorrect values */ +template +struct TeamUpperBoundFunctor { + using hv_value_type = typename HaystackView::non_const_value_type; + using hv_size_type = typename HaystackView::size_type; + + TeamUpperBoundFunctor(const hv_size_type &expected, + const HaystackView &haystack, + const hv_value_type &needle) + : expected_(expected), haystack_(haystack), needle_(needle) {} + + KOKKOS_INLINE_FUNCTION void operator()(const Member &handle, + int &lerrCount) const { + hv_size_type idx = + KokkosKernels::upper_bound_team(handle, haystack_, needle_); + if (idx != expected_) { + printf("%s:%d thread %d expected %d got %d\n", __FILE__, __LINE__, + int(handle.team_rank()), int(expected_), int(idx)); + ++lerrCount; + } + } + + hv_size_type expected_; + HaystackView haystack_; + hv_value_type needle_; +}; + +template +void test_upper_bound_team(const std::vector &_haystack, const T _needle) { + using execution_space = typename Device::execution_space; + using Policy = Kokkos::TeamPolicy; + using Member = typename Policy::member_type; + using view_t = Kokkos::View; + using u_const_view_t = Kokkos::View>; + using hv_size_type = typename u_const_view_t::size_type; + + // get expected value + const hv_size_type expected = std_upper_bound(_haystack, _needle); + + // create device views of input data + u_const_view_t uhaystack(_haystack.data(), _haystack.size()); + view_t haystack("haystack", uhaystack.size()); + Kokkos::deep_copy(haystack, uhaystack); + + // test upper_bound search + const int leagueSize = 1; + const int teamSize = + KokkosKernels::Impl::kk_is_gpu_exec_space() ? 64 : 1; + int errCount; + Kokkos::parallel_reduce( + Policy(leagueSize, teamSize), + TeamUpperBoundFunctor(expected, haystack, _needle), + errCount); + + EXPECT_EQ(0, errCount); +} + +template +void test_upper_bound(const std::vector &haystack, const T needle) { + test_upper_bound_thread(haystack, needle); + test_upper_bound_team(haystack, needle); +} + +/* define specific and random lower-bound test cases + */ +template +void test_upper_bound() { + test_upper_bound({}, T(0)); + test_upper_bound({}, T(1)); + test_upper_bound({}, T(-1)); + + test_upper_bound({0}, T(0)); + test_upper_bound({0}, T(1)); + test_upper_bound({0}, T(-1)); + + test_upper_bound({1}, T(0)); + test_upper_bound({1}, T(1)); + test_upper_bound({1}, T(-1)); + + test_upper_bound({T(-1)}, T(0)); + test_upper_bound({T(-1)}, T(1)); + test_upper_bound({T(-1)}, T(-1)); + + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(-1)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(0)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(1)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(2)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(2.4)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(2.5)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(2.6)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(3)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(4)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(5)); + test_upper_bound({0, 1, T(2.5), 3, 4, 5}, T(6)); + + auto randn = [](T n) { + if constexpr (std::is_floating_point_v) { + return T(rand()) / T(RAND_MAX) * n; + } else { + return T(rand()) % n; + } + }; + + constexpr T maxEntry = 20; + const int numTests = 100; + for (int n = 0; n < numTests; ++n) { + for (size_t sz : {10, 100, 1000}) { + // generate a sorted random vector + std::vector haystack; + for (size_t i = 0; i < sz; ++i) { + haystack.push_back(randn(maxEntry)); + } + std::sort(haystack.begin(), haystack.end()); + + // generate a random value to search for + const T needle = randn(maxEntry); + + // do the test + test_upper_bound(haystack, needle); + } + } +} + +#define EXECUTE_TEST(T, DEVICE) \ + TEST_F(TestCategory, common##_##upper_bound##_##T##_##DEVICE) { \ + test_upper_bound(); \ + } + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(int, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(int64_t, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_ORDINAL_INT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(size_t, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_FLOAT)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(float, TestExecSpace) +#endif + +#if (defined(KOKKOSKERNELS_INST_DOUBLE)) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +EXECUTE_TEST(double, TestExecSpace) +#endif + +#undef EXECUTE_TEST \ No newline at end of file