From d3ffe8214f9f1b5287c093fc98ed1d917ef74e43 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 6 Feb 2023 17:13:01 -0700 Subject: [PATCH] Perf Tests: adding utilities and instantiation wrapper The goal of this work is to create a common core infrastructure for the performance test in order to simplify maintenance. Here two ideas are introduced: 1. the instantiation wrapper 2. the common input parser both are trying to capture some of the implementation of our performance test in generic functions that can be called instead of duplicating logic around instantiation and command line input parsing. The new parsing routine checks the parameter name and that the associated value can be casted properly. It also add some logic to remove the arguments from argv and argc once they are parsed properly. --- .../KokkosKernels_perf_test_instantiation.hpp | 133 +++++++++ .../KokkosKernels_perf_test_utilities.hpp | 146 ++++++++++ perf_test/sparse/KokkosSparse_mdf.cpp | 268 ++++++------------ 3 files changed, 364 insertions(+), 183 deletions(-) create mode 100644 perf_test/KokkosKernels_perf_test_instantiation.hpp create mode 100644 perf_test/KokkosKernels_perf_test_utilities.hpp diff --git a/perf_test/KokkosKernels_perf_test_instantiation.hpp b/perf_test/KokkosKernels_perf_test_instantiation.hpp new file mode 100644 index 0000000000..9ed5ec23bc --- /dev/null +++ b/perf_test/KokkosKernels_perf_test_instantiation.hpp @@ -0,0 +1,133 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +// +// Created by Berger-Vergiat, Luc on 2/6/23. +// + +#ifndef KOKKOSKERNELS_PERF_TEST_INSTANTIATION_HPP +#define KOKKOSKERNELS_PERF_TEST_INSTANTIATION_HPP + +#include "KokkosKernels_perf_test_utilities.hpp" + +#ifndef KOKKOSKERNELS_PERF_TEST_NAME +#error "The macro KOKKOSKERNELS_PERF_TEST_NAME was not defined" +#endif + +int main_instantiation(int argc, char** argv) { + perf_test::CommonInputParams params; + perf_test::parse_common_options(argc, argv, params); + + /* Assumption is that use_openmp/use_threads variables are */ + /* provided as numbers of threads */ + int num_threads = 1; + if (params.use_openmp) { + num_threads = params.use_openmp; + } else if (params.use_threads) { + num_threads = params.use_threads; + } + + int device_id = 0; + if (params.use_cuda) + device_id = params.use_cuda - 1; + else if (params.use_hip) + device_id = params.use_hip - 1; + else if (params.use_sycl) + device_id = params.use_sycl - 1; + + Kokkos::initialize(Kokkos::InitializationSettings() + .set_num_threads(num_threads) + .set_device_id(device_id)); + Kokkos::print_configuration(std::cout); + std::cout << '\n'; + + bool ran = false; + + if (params.use_openmp) { +#if defined(KOKKOS_ENABLE_OPENMP) + std::cout << "Running on OpenMP backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); + ran = true; +#else + std::cout << "ERROR: OpenMP requested, but not available.\n"; + Kokkos::finalize(); + return 1; +#endif + } + if (params.use_threads) { +#if defined(KOKKOS_ENABLE_THREADS) + std::cout << "Running on Threads backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); + ran = true; +#else + std::cout << "ERROR: Threads requested, but not available.\n"; + Kokkos::finalize(); + return 1; +#endif + } + if (params.use_cuda) { +#if defined(KOKKOS_ENABLE_CUDA) + std::cout << "Running on Cuda backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); + ran = true; +#else + std::cout << "ERROR: CUDA requested, but not available.\n"; + Kokkos::finalize(); + return 1; +#endif + } + if (params.use_hip) { +#if defined(KOKKOS_ENABLE_HIP) + std::cout << "Running on HIP backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); + ran = true; +#else + std::cout << "ERROR: HIP requested, but not available.\n"; + Kokkos::finalize(); + return 1; +#endif + } + if (params.use_sycl) { +#if defined(KOKKOS_ENABLE_SYCL) + std::cout << "Running on SYCL backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, + params); + ran = true; +#else + std::cout << "ERROR: SYCL requested, but not available.\n"; + Kokkos::finalize(); + return 1; +#endif + } + if (!ran) { +#if defined(KOKKOS_ENABLE_SERIAL) + std::cout << "Running on Serial backend.\n"; + KOKKOSKERNELS_PERF_TEST_NAME(argc, argv, params); +#else + std::cout << "ERROR: Tried to run on Serial device (as no parallel" + " backends requested), but Serial is not enabled.\n"; + Kokkos::finalize(); + return 1; +#endif + } + Kokkos::finalize(); + return 0; +} + +// Undefine the macro to avoid potential bad interaction +// with other parts of the code... +#undef KOKKOSKERNELS_PERF_TEST_NAME + +#endif // KOKKOSKERNELS_PERF_TEST_INSTANTIATION_HPP diff --git a/perf_test/KokkosKernels_perf_test_utilities.hpp b/perf_test/KokkosKernels_perf_test_utilities.hpp new file mode 100644 index 0000000000..b798d55a8e --- /dev/null +++ b/perf_test/KokkosKernels_perf_test_utilities.hpp @@ -0,0 +1,146 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +// +// Created by Berger-Vergiat, Luc on 2/6/23. +// + +#ifndef KOKKOSKERNELS_PERF_TEST_UTILITIES_HPP +#define KOKKOSKERNELS_PERF_TEST_UTILITIES_HPP + +// Namepsace that defines common utilities +// for performance tests +namespace perf_test { + +struct CommonInputParams { + int use_cuda = 0; + int use_hip = 0; + int use_sycl = 0; + int use_openmp = 0; + int use_threads = 0; +}; + +std::string list_common_options() { + std::ostringstream common_options; + common_options + << "\t[Required] BACKEND:\n" + << "\t\t'--threads [numThreads]' |\n" + << "\t\t'--openmp [numThreads]' |\n" + << "\t\t'--cuda [deviceIndex]' |\n" + << "\t\t'--hip [deviceIndex]' |\n" + << "\t\t'--sycl [deviceIndex]'\n\n" + << "\tIf no parallel backend is requested, Serial will be used " + "(if enabled)\n\n"; + + return common_options.str(); +} + +void process_arg_int(char const* str_val, int& val) { + errno = 0; + char* ptr_end; + val = std::strtol(str_val, &ptr_end, 10); + + if (str_val == ptr_end) { + std::stringstream ss; + ss << "Error: cannot convert command line argument '" << str_val + << "' to an integer.\n"; + throw std::invalid_argument(ss.str()); + } + + if (errno == ERANGE) { + std::stringstream ss; + ss << "Error: converted value for command line argument '" << str_val + << "' falls out of range.\n"; + throw std::invalid_argument(ss.str()); + } +} + +bool check_arg_int(int const i, int const argc, char** argv, char const* name, + int& val) { + if (0 != Test::string_compare_no_case(argv[i], name)) { + return false; + } + + if (i < argc - 1) { + process_arg_int(argv[i + 1], val); + } else { + std::stringstream msg; + msg << name << " input argument needs to be followed by an int"; + throw std::invalid_argument(msg.str()); + } + return true; +} + +bool check_arg_bool(int const i, int const /*argc*/, char** argv, + char const* name, bool& val) { + if (0 != Test::string_compare_no_case(argv[i], name)) { + return false; + } + val = true; + return true; +} + +bool check_arg_str(int const i, int const argc, char** argv, char const* name, + std::string& val) { + if (0 != Test::string_compare_no_case(argv[i], name)) { + return false; + } + + if (i < argc - 1) { + val = std::string(argv[i + 1]); + } else { + std::stringstream msg; + msg << name << " input argument needs to be followed by a string"; + throw std::invalid_argument(msg.str()); + } + return true; +} + +void parse_common_options(int& argc, char** argv, CommonInputParams& params) { + // Skip the program name, start with argIdx=1 + int argIdx = 1; + while (argIdx < argc) { + bool remove_flag = false; + if (check_arg_int(argIdx, argc, argv, "--threads", params.use_threads)) { + remove_flag = true; + } else if (check_arg_int(argIdx, argc, argv, "--openmp", + params.use_openmp)) { + remove_flag = true; + } else if (check_arg_int(argIdx, argc, argv, "--cuda", params.use_cuda)) { + remove_flag = true; + } else if (check_arg_int(argIdx, argc, argv, "--hip", params.use_hip)) { + remove_flag = true; + } else if (check_arg_int(argIdx, argc, argv, "--sycl", params.use_sycl)) { + remove_flag = true; + } + + if (remove_flag) { + // Shift the remainder of the argv list by one. Note that argv has + // (argc + 1) arguments, the last one always being nullptr. The following + // loop moves the trailing nullptr element as well + for (int k = argIdx; k < argc - 1; ++k) { + argv[k] = argv[k + 2]; + argv[k + 1] = argv[k + 3]; + } + argc = argc - 2; + } else { + ++argIdx; + } + } +} // parse_common_options() + +} // namespace perf_test + +#endif // KOKKOSKERNELS_PERF_TEST_UTILITIES_HPP diff --git a/perf_test/sparse/KokkosSparse_mdf.cpp b/perf_test/sparse/KokkosSparse_mdf.cpp index ca48df8fd2..8f1ddc4e14 100644 --- a/perf_test/sparse/KokkosSparse_mdf.cpp +++ b/perf_test/sparse/KokkosSparse_mdf.cpp @@ -1,4 +1,4 @@ -//@HEADER +//@HEADERA // ************************************************************************ // // Kokkos v. 4.0 @@ -19,15 +19,14 @@ #include "KokkosKernels_Handle.hpp" #include "KokkosSparse_IOUtils.hpp" #include "KokkosSparse_Utils_cusparse.hpp" -#include "KokkosSparse_mdf.hpp" #include "KokkosKernels_TestUtils.hpp" +#include "KokkosKernels_perf_test_utilities.hpp" + +#include "KokkosSparse_mdf.hpp" + +using perf_test::CommonInputParams; -struct Params { - int use_cuda = 0; - int use_hip = 0; - int use_sycl = 0; - int use_openmp = 0; - int use_threads = 0; +struct LocalParams { std::string amtx; int m = 10000; int n = 10000; @@ -54,8 +53,61 @@ struct diag_generator_functor { } }; -template -void run_experiment(const Params& params) { +void print_options() { + std::cerr << "Options\n" << std::endl; + + std::cerr << perf_test::list_common_options(); + + std::cerr << "\t[Optional] --amtx :: input matrix" << std::endl; + std::cerr << "\t[Optional] --repeat :: how many times to repeat overall " + "MDF" + << std::endl; + std::cerr << "\t[Optional] --verbose :: enable verbose output" + << std::endl; + std::cerr << "\nSettings for randomly generated A matrix" << std::endl; + std::cerr << "\t[Optional] --m :: number of rows to generate" + << std::endl; + std::cerr << "\t[Optional] --n :: number of cols to generate" + << std::endl; + std::cerr + << "\t[Optional] --nnz :: number of entries per row to generate" + << std::endl; + std::cerr << "\t[Optional] --diag :: generate a diagonal matrix" + << std::endl; +} // print_options + +int parse_inputs(LocalParams& params, int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + if (perf_test::check_arg_str(i, argc, argv, "--amtx", params.amtx)) { + ++i; + } else if (perf_test::check_arg_int(i, argc, argv, "--m", params.m)) { + ++i; + } else if (perf_test::check_arg_int(i, argc, argv, "--n", params.n)) { + ++i; + } else if (perf_test::check_arg_int(i, argc, argv, "--nnz", + params.nnzPerRow)) { + ++i; + } else if (perf_test::check_arg_bool(i, argc, argv, "--diag", + params.diag)) { + } else if (perf_test::check_arg_int(i, argc, argv, "--repeat", + params.repeat)) { + ++i; + } else if (perf_test::check_arg_bool(i, argc, argv, "--verbose", + params.verbose)) { + } else { + std::cerr << "Unrecognized command line argument #" << i << ": " + << argv[i] << std::endl; + print_options(); + return 1; + } + } + return 0; +} // parse_inputs + +template +void run_experiment(int argc, char** argv, CommonInputParams /*params*/) { + using crsMat_t = + KokkosSparse::CrsMatrix; using size_type = typename crsMat_t::size_type; using lno_t = typename crsMat_t::ordinal_type; using scalar_t = typename crsMat_t::value_type; @@ -67,19 +119,22 @@ void run_experiment(const Params& params) { using entries_t = typename graph_t::entries_type::non_const_type; using values_t = typename crsMat_t::values_type::non_const_type; + LocalParams localParams; + parse_inputs(localParams, argc, argv); + std::cout << "************************************* \n"; std::cout << "************************************* \n"; crsMat_t A; - lno_t m = params.m; - lno_t n = params.n; - if (params.amtx.length()) { - std::cout << "Loading A from " << params.amtx << '\n'; + lno_t m = localParams.m; + lno_t n = localParams.n; + if (localParams.amtx.length()) { + std::cout << "Loading A from " << localParams.amtx << '\n'; A = KokkosSparse::Impl::read_kokkos_crst_matrix( - params.amtx.c_str()); + localParams.amtx.c_str()); m = A.numRows(); n = A.numCols(); } else { - if (params.diag) { + if (localParams.diag) { std::cout << "Randomly generating diag matrix\n"; rowmap_t rowmapA("A row map", m + 1); entries_t entriesA("A entries", m); @@ -100,13 +155,13 @@ void run_experiment(const Params& params) { A = crsMat_t("A matrix", m, valuesA, graph); } else { std::cout << "Randomly generating matrix\n"; - size_type nnzUnused = m * params.nnzPerRow; + size_type nnzUnused = m * localParams.nnzPerRow; A = KokkosSparse::Impl::kk_generate_sparse_matrix( m, n, nnzUnused, 0, (n + 3) / 3); } } - if (params.verbose) { + if (localParams.verbose) { std::cout << "Matrix A" << std::endl; std::cout << " row_map A:" << std::endl; KokkosKernels::Impl::print_1Dview(A.graph.row_map); @@ -125,9 +180,12 @@ void run_experiment(const Params& params) { timer.reset(); KokkosSparse::Experimental::MDF_handle handle(A); handle.set_verbosity(0); + if (localParams.verbose) { + handle.set_verbosity(1); + } handleTime += timer.seconds(); - for (int sumRep = 0; sumRep < params.repeat; sumRep++) { + for (int sumRep = 0; sumRep < localParams.repeat; sumRep++) { timer.reset(); KokkosSparse::Experimental::mdf_symbolic(A, handle); Kokkos::fence(); @@ -140,16 +198,16 @@ void run_experiment(const Params& params) { } std::cout << "Mean total time: " - << handleTime + (symbolicTime / params.repeat) + - (numericTime / params.repeat) + << handleTime + (symbolicTime / localParams.repeat) + + (numericTime / localParams.repeat) << std::endl << "Handle time: " << handleTime << std::endl - << "Mean symbolic time: " << (symbolicTime / params.repeat) + << "Mean symbolic time: " << (symbolicTime / localParams.repeat) << std::endl - << "Mean numeric time: " << (numericTime / params.repeat) + << "Mean numeric time: " << (numericTime / localParams.repeat) << std::endl; - if (params.verbose) { + if (localParams.verbose) { entries_t permutation = handle.get_permutation(); std::cout << "MDF permutation:" << std::endl; @@ -157,164 +215,8 @@ void run_experiment(const Params& params) { } } // run_experiment -void print_options() { - std::cerr << "Options\n" << std::endl; - - std::cerr - << "\t[Required] BACKEND: '--threads[numThreads]' | '--openmp " - "[numThreads]' | '--cuda [cudaDeviceIndex]' | '--hip [hipDeviceIndex]'" - " | '--sycl [syclDeviceIndex]'" - << std::endl; - - std::cerr << "\t[Optional] --amtx :: input matrix" << std::endl; - std::cerr << "\t[Optional] --repeat :: how many times to repeat overall " - "MDF" - << std::endl; - std::cerr << "\t[Optional] --verbose :: enable verbose output" - << std::endl; - std::cerr << "\nSettings for randomly generated A matrix" << std::endl; - std::cerr << "\t[Optional] --m :: number of rows to generate" - << std::endl; - std::cerr << "\t[Optional] --n :: number of cols to generate" - << std::endl; - std::cerr - << "\t[Optional] --nnz :: number of entries per row to generate" - << std::endl; - std::cerr << "\t[Optional] --diag :: generate a diagonal matrix" - << std::endl; -} // print_options - -int parse_inputs(Params& params, int argc, char** argv) { - for (int i = 1; i < argc; ++i) { - if (0 == Test::string_compare_no_case(argv[i], "--threads")) { - params.use_threads = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--openmp")) { - params.use_openmp = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--cuda")) { - params.use_cuda = atoi(argv[++i]) + 1; - } else if (0 == Test::string_compare_no_case(argv[i], "--hip")) { - params.use_hip = atoi(argv[++i]) + 1; - } else if (0 == Test::string_compare_no_case(argv[i], "--sycl")) { - params.use_sycl = atoi(argv[++i]) + 1; - } else if (0 == Test::string_compare_no_case(argv[i], "--amtx")) { - params.amtx = argv[++i]; - } else if (0 == Test::string_compare_no_case(argv[i], "--m")) { - params.m = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--n")) { - params.n = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--nnz")) { - params.nnzPerRow = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--diag")) { - params.diag = true; - } else if (0 == Test::string_compare_no_case(argv[i], "--repeat")) { - params.repeat = atoi(argv[++i]); - } else if (0 == Test::string_compare_no_case(argv[i], "--verbose")) { - params.verbose = true; - } else { - std::cerr << "Unrecognized command line argument #" << i << ": " - << argv[i] << std::endl; - print_options(); - return 1; - } - } - return 0; -} // parse_inputs - +#define KOKKOSKERNELS_PERF_TEST_NAME run_experiment +#include "KokkosKernels_perf_test_instantiation.hpp" int main(int argc, char** argv) { - Params params; - - if (parse_inputs(params, argc, argv)) { - return 1; - } - const int num_threads = - std::max(params.use_openmp, - params.use_threads); // Assumption is that use_openmp variable - // is provided as number of threads - - // If cuda, hip or sycl is used, set device_id - int device_id = 0; - if (params.use_cuda > 0) { - device_id = params.use_cuda - 1; - } - if (params.use_hip > 0) { - device_id = params.use_hip - 1; - } - if (params.use_sycl > 0) { - device_id = params.use_sycl - 1; - } - - Kokkos::initialize(Kokkos::InitializationSettings() - .set_num_threads(num_threads) - .set_device_id(device_id)); - - bool useOMP = params.use_openmp != 0; - bool useThreads = params.use_threads != 0; - bool useCUDA = params.use_cuda != 0; - bool useHIP = params.use_hip != 0; - bool useSYCL = params.use_sycl != 0; - bool useSerial = !useOMP && !useCUDA && !useHIP && !useSYCL; - - if (useOMP) { -#if defined(KOKKOS_ENABLE_OPENMP) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: OpenMP requested, but not available.\n"; - return 1; -#endif - } - if (useThreads) { -#if defined(KOKKOS_ENABLE_THREADS) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: OpenMP requested, but not available.\n"; - return 1; -#endif - } - if (useCUDA) { -#if defined(KOKKOS_ENABLE_CUDA) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: CUDA requested, but not available.\n"; - return 1; -#endif - } - if (useHIP) { -#if defined(KOKKOS_ENABLE_HIP) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: HIP requested, but not available.\n"; - return 1; -#endif - } - if (useSYCL) { -#if defined(KOKKOS_ENABLE_SYCL) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: SYCL requested, but not available.\n"; - return 1; -#endif - } - if (useSerial) { -#if defined(KOKKOS_ENABLE_SERIAL) - using crsMat_t = - KokkosSparse::CrsMatrix; - run_experiment(params); -#else - std::cout << "ERROR: Serial device requested, but not available.\n"; - return 1; -#endif - } - Kokkos::finalize(); - return 0; + return main_instantiation(argc, argv); } // main