diff --git a/lapack/unit_test/Test_Lapack_svd.hpp b/lapack/unit_test/Test_Lapack_svd.hpp index a0a1f31ab0..032b9f86c6 100644 --- a/lapack/unit_test/Test_Lapack_svd.hpp +++ b/lapack/unit_test/Test_Lapack_svd.hpp @@ -477,7 +477,8 @@ int impl_test_svd(const int m, const int n) { std::cout << "Running impl_test_svd with sizes: " << m << "x" << n << std::endl; - const mag_type tol = 1000 * KAT_S::eps(); + const mag_type max_val = 10; + const mag_type tol = 1000 * max_val * KAT_S::eps(); AMatrix A("A", m, n), U("U", m, m), Vt("Vt", n, n), Aref("A ref", m, n); vector_type S("S", Kokkos::min(m, n)); @@ -488,11 +489,23 @@ int impl_test_svd(const int m, const int n) { // Initialize A with random numbers scalar_type randStart = 0, randEnd = 0; - Test::getRandomBounds(10.0, randStart, randEnd); + Test::getRandomBounds(max_val, randStart, randEnd); Kokkos::fill_random(A, rand_pool, randStart, randEnd); Kokkos::deep_copy(Aref, A); + // Working around CUSOLVER constraint for m >= n +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + if constexpr (std::is_same_v) { + if (m >= n) { + KokkosLapack::svd("A", "A", A, S, U, Vt); + } + } else { + KokkosLapack::svd("A", "A", A, S, U, Vt); + } +#else KokkosLapack::svd("A", "A", A, S, U, Vt); +#endif check_unitary_orthogonal_matrix(U, tol); check_unitary_orthogonal_matrix(Vt, tol); diff --git a/perf_test/CMakeLists.txt b/perf_test/CMakeLists.txt index cf1905d6d4..28271dfb0d 100644 --- a/perf_test/CMakeLists.txt +++ b/perf_test/CMakeLists.txt @@ -49,6 +49,7 @@ if (KokkosKernels_ENABLE_PERFTESTS) ADD_COMPONENT_SUBDIRECTORY(sparse) ADD_COMPONENT_SUBDIRECTORY(blas) ADD_COMPONENT_SUBDIRECTORY(ode) + ADD_COMPONENT_SUBDIRECTORY(lapack) ADD_SUBDIRECTORY(performance) #ADD_SUBDIRECTORY(common) diff --git a/perf_test/lapack/CMakeLists.txt b/perf_test/lapack/CMakeLists.txt new file mode 100644 index 0000000000..478703d38a --- /dev/null +++ b/perf_test/lapack/CMakeLists.txt @@ -0,0 +1,8 @@ +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +if(KOKKOSKERNELS_ENABLE_BENCHMARK) + KOKKOSKERNELS_ADD_BENCHMARK( + lapack_svd SOURCES KokkosLapack_SVD_benchmark.cpp + ) +endif() diff --git a/perf_test/lapack/KokkosLapack_SVD_benchmark.cpp b/perf_test/lapack/KokkosLapack_SVD_benchmark.cpp new file mode 100644 index 0000000000..1ac9381ff8 --- /dev/null +++ b/perf_test/lapack/KokkosLapack_SVD_benchmark.cpp @@ -0,0 +1,124 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "KokkosLapack_svd.hpp" + +#include "KokkosKernels_TestUtils.hpp" +#include "KokkosKernels_perf_test_utilities.hpp" + +#include +#include "Benchmark_Context.hpp" + +struct svd_parameters { + int numRows, numCols; + bool verbose; + + svd_parameters(const int numRows_, const int numCols_, const bool verbose_) + : numRows(numRows_), numCols(numCols_), verbose(verbose_){}; +}; + +void print_options() { + std::cerr << "Options\n" << std::endl; + + std::cerr << perf_test::list_common_options(); + + std::cerr << "\t[Optional] --verbose :: enable verbose output" + << std::endl; + std::cerr << "\t[Optional] --m :: number of rows of A" << std::endl; + std::cerr << "\t[Optional] --n :: number of columns of A" + << std::endl; +} // print_options + +int parse_inputs(svd_parameters& params, int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + if (perf_test::check_arg_int(i, argc, argv, "--m", params.numRows)) { + ++i; + } else if (perf_test::check_arg_int(i, argc, argv, "--n", params.numCols)) { + ++i; + } else if (perf_test::check_arg_bool(i, argc, argv, "--verbose", + params.verbose)) { + } else { + std::cerr << "Unrecognized command line argument #" << i << ": " + << argv[i] << std::endl; + print_options(); + return 1; + } + } + return 0; +} // parse_inputs + +template +void run_svd_benchmark(benchmark::State& state, + const svd_parameters& svd_params) { + using mat_type = Kokkos::View; + using vec_type = Kokkos::View; + + const int m = svd_params.numRows; + const int n = svd_params.numCols; + + mat_type A("A", m, n), U("U", m, m), Vt("Vt", n, n); + vec_type S("S", Kokkos::min(m, n)); + + const uint64_t seed = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + Kokkos::Random_XorShift64_Pool rand_pool(seed); + + // Initialize A with random numbers + double randStart = 0, randEnd = 0; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(A, rand_pool, randStart, randEnd); + + for (auto _ : state) { + (void)_; + KokkosLapack::svd("A", "A", A, S, U, Vt); + Kokkos::fence(); + } +} + +int main(int argc, char** argv) { + Kokkos::initialize(argc, argv); + + benchmark::Initialize(&argc, argv); + benchmark::SetDefaultTimeUnit(benchmark::kMillisecond); + KokkosKernelsBenchmark::add_benchmark_context(true); + + perf_test::CommonInputParams common_params; + perf_test::parse_common_options(argc, argv, common_params); + svd_parameters svd_params(0, 0, false); + parse_inputs(svd_params, argc, argv); + + std::string bench_name = "KokkosLapack_SVD"; + + if (0 < common_params.repeat) { + benchmark::RegisterBenchmark( + bench_name.c_str(), run_svd_benchmark, + svd_params) + ->UseRealTime() + ->Iterations(common_params.repeat); + } else { + benchmark::RegisterBenchmark( + bench_name.c_str(), run_svd_benchmark, + svd_params) + ->UseRealTime(); + } + + benchmark::RunSpecifiedBenchmarks(); + + benchmark::Shutdown(); + Kokkos::finalize(); + + return 0; +}