diff --git a/CHANGELOG.md b/CHANGELOG.md index 661a1d804..06f40d34d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ Full documentation for rocSOLVER is available at [rocsolver.readthedocs.io](http ### Added ### Optimized ### Changed +- Changed `ROCSOLVER_EMBED_FMT` default to `ON` for users building directly with CMake. + This matches the existing default when building with install.sh or rmake.py. ### Deprecated ### Removed ### Fixed diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fad03547..02386e670 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) -option(ROCSOLVER_EMBED_FMT "Hide libfmt symbols" OFF) +option(ROCSOLVER_EMBED_FMT "Hide libfmt symbols" ON) option(OPTIMAL "Build specialized kernels for small matrix sizes" ON) option(ROCSOLVER_FIND_PACKAGE_LAPACK_CONFIG "Skip module mode search for LAPACK" ON) diff --git a/clients/CMakeLists.txt b/clients/CMakeLists.txt index 8e1b528d0..522b46aad 100755 --- a/clients/CMakeLists.txt +++ b/clients/CMakeLists.txt @@ -104,7 +104,7 @@ if(BUILD_CLIENTS_BENCHMARKS OR BUILD_CLIENTS_TESTS) ) set(common_source_files common/lapack_host_reference.cpp - rocblascommon/utility.cpp + rocblascommon/clients_utility.cpp ${explicit_inst_files} ) diff --git a/clients/gtest/rocsolver_gtest_main.cpp b/clients/gtest/rocsolver_gtest_main.cpp index 1edae9cfa..afc2a1bcb 100644 --- a/clients/gtest/rocsolver_gtest_main.cpp +++ b/clients/gtest/rocsolver_gtest_main.cpp @@ -1,11 +1,12 @@ /* ************************************************************************ - * Copyright (c) 2020-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2020-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #include #include #include +#include #include #include diff --git a/clients/include/clientcommon.hpp b/clients/include/clientcommon.hpp index 96570a6fa..c3d7045d5 100644 --- a/clients/include/clientcommon.hpp +++ b/clients/include/clientcommon.hpp @@ -8,9 +8,9 @@ #include "common_host_helpers.hpp" #include "rocsolver_datatype2string.hpp" // rocblas common +#include "rocblascommon/clients_utility.hpp" #include "rocblascommon/rocblas_test.hpp" #include "rocblascommon/rocblas_vector.hpp" -#include "rocblascommon/utility.hpp" //#include "rocblascommon/device_vector.hpp" //#include "rocblascommon/device_batch_vector.hpp" //#include "rocblascommon/device_strided_batch_vector.hpp" diff --git a/clients/include/rocsolver_arguments.hpp b/clients/include/rocsolver_arguments.hpp index 54fcfeb80..f7710dec5 100644 --- a/clients/include/rocsolver_arguments.hpp +++ b/clients/include/rocsolver_arguments.hpp @@ -7,6 +7,7 @@ #include #include +#include #include #include diff --git a/clients/rocblascommon/utility.cpp b/clients/rocblascommon/clients_utility.cpp similarity index 95% rename from clients/rocblascommon/utility.cpp rename to clients/rocblascommon/clients_utility.cpp index c1224e308..86f95d526 100644 --- a/clients/rocblascommon/utility.cpp +++ b/clients/rocblascommon/clients_utility.cpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #include @@ -7,9 +7,10 @@ #include #include +#include +#include "clients_utility.hpp" #include "rocblas_random.hpp" -#include "utility.hpp" // Random number generator // Note: We do not use random_device to initialize the RNG, because we want diff --git a/clients/rocblascommon/utility.hpp b/clients/rocblascommon/clients_utility.hpp similarity index 98% rename from clients/rocblascommon/utility.hpp rename to clients/rocblascommon/clients_utility.hpp index 52ecff66a..e977ba213 100644 --- a/clients/rocblascommon/utility.hpp +++ b/clients/rocblascommon/clients_utility.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once diff --git a/clients/rocblascommon/d_vector.hpp b/clients/rocblascommon/d_vector.hpp index 015222ec9..074b96273 100644 --- a/clients/rocblascommon/d_vector.hpp +++ b/clients/rocblascommon/d_vector.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -8,6 +8,7 @@ #include #include +#include #include #include "rocblas_init.hpp" diff --git a/clients/rocblascommon/rocblas_init.hpp b/clients/rocblascommon/rocblas_init.hpp index fe3d4272d..d95c6437a 100644 --- a/clients/rocblascommon/rocblas_init.hpp +++ b/clients/rocblascommon/rocblas_init.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -9,6 +9,7 @@ #include #include +#include #include #include "rocblas_math.hpp" diff --git a/clients/rocblascommon/rocblas_test.hpp b/clients/rocblascommon/rocblas_test.hpp index fba95e0a2..94c78efaa 100644 --- a/clients/rocblascommon/rocblas_test.hpp +++ b/clients/rocblascommon/rocblas_test.hpp @@ -1,5 +1,5 @@ /* ************************************************************************ - * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include +#include #include // Suppress warnings about hipMalloc(), hipFree() except in rocblas-test and diff --git a/common/include/common_host_helpers.hpp b/common/include/common_host_helpers.hpp index 53cca19c5..615b74b0e 100644 --- a/common/include/common_host_helpers.hpp +++ b/common/include/common_host_helpers.hpp @@ -14,7 +14,8 @@ #include #include -#include "rocblascommon/utility.hpp" +#include "fmt_rocblas_types.hpp" +#include "rocblas_utility.hpp" /* * =========================================================================== @@ -81,8 +82,8 @@ void pairs_to_string(std::string& str, const char* sep, T1 arg1, T2 arg2, Ts... /** Set of helpers to print out data hosted in the CPU and/or the GPU **/ /***********************************************************************/ -/*! \brief Print provided data into specified stream (real case)*/ -template , int> = 0> +/*! \brief Print provided data into specified stream */ +template void print_to_stream(std::ostream& os, const std::string name, const rocblas_int m, @@ -148,73 +149,6 @@ void print_to_stream(std::ostream& os, os.flush(); } -/*! \brief Print provided data into specified stream (complex cases)*/ -template , int> = 0> -void print_to_stream(std::ostream& os, - const std::string name, - const rocblas_int m, - const rocblas_int n, - T* A, - const rocblas_int lda, - const rocblas_fill uplo) -{ - std::string s; - bool empty = name.empty(); - if(!empty) - s += fmt::format("{}-by-{} matrix: {}\n", m, n, name); - - if(uplo == rocblas_fill_full) - { - // normal case - for(int i = 0; i < m; i++) - { - if(!empty) - s += " "; - for(int j = 0; j < n; j++) - { - s += fmt::format("{}+{}*i", A[j * lda + i].real(), A[j * lda + i].imag()); - if(j < n - 1) - s += ", "; - } - s += '\n'; - } - } - else - { - // symmetric case - for(int i = 0; i < min(m, n); i++) - { - if(!empty) - s += " "; - for(int j = 0; j < min(m, n); j++) - { - if(uplo == rocblas_fill_upper) - { - if(i < j) - s += fmt::format("{}+{}*i", A[j * lda + i].real(), A[j * lda + i].imag()); - else - s += fmt::format("{}+{}*i", A[i * lda + j].real(), A[i * lda + j].imag()); - } - else - { - if(i > j) - s += fmt::format("{}+{}*i", A[j * lda + i].real(), A[j * lda + i].imag()); - else - s += fmt::format("{}+{}*i", A[i * lda + j].real(), A[i * lda + j].imag()); - } - - if(j < n - 1) - s += ", "; - } - s += '\n'; - } - } - - s += '\n'; - os << s; - os.flush(); -} - /*! \brief Print data from a normal or strided_batched array on the GPU to screen*/ template void print_device_matrix(std::ostream& os, @@ -380,37 +314,7 @@ void print_host_matrix(std::ostream& os, os.flush(); } -template , int> = 0> -void print_host_matrix(std::ostream& os, - const std::string name, - const rocblas_int m, - const rocblas_int n, - T* CPU_result, - T* GPU_result, - const rocblas_int lda, - double error_tolerance) -{ - std::string s; - bool empty = name.empty(); - if(!empty) - s += fmt::format("{}-by-{} matrix: {}\n", m, n, name); - - for(size_t j = 0; j < n; j++) - { - for(size_t i = 0; i < m; i++) - { - T comp = (CPU_result[j * lda + i] - GPU_result[j * lda + i]) / CPU_result[j * lda + i]; - if(abs(comp) > error_tolerance) - s += fmt::format("matrix row {}, col {}, CPU result={}, GPU result={}\n", i, j, - CPU_result[j * lda + i], GPU_result[j * lda + i]); - } - } - s += '\n'; - os << s; - os.flush(); -} - -template , int> = 0> +template void print_host_matrix(std::ostream& os, const std::string name, const rocblas_int m, @@ -430,7 +334,7 @@ void print_host_matrix(std::ostream& os, for(size_t i = 0; i < m; i++) { T comp = (CPU_result[j * lda + i] - GPU_result[j * lda + i]) / CPU_result[j * lda + i]; - if(sqrt(comp.real() * comp.real() + comp.imag() * comp.imag()) > error_tolerance) + if(std::abs(comp) > error_tolerance) s += fmt::format("matrix row {}, col {}, CPU result={}, GPU result={}\n", i, j, CPU_result[j * lda + i], GPU_result[j * lda + i]); } diff --git a/common/include/fmt_rocblas_types.hpp b/common/include/fmt_rocblas_types.hpp new file mode 100644 index 000000000..5fb816528 --- /dev/null +++ b/common/include/fmt_rocblas_types.hpp @@ -0,0 +1,34 @@ +/* ************************************************************************ + * Copyright (c) 2021-2022 Advanced Micro Devices, Inc. + * ************************************************************************ */ + +#pragma once + +#include +#include +#include + +/* The format function for user-defined types cannot be const before fmt v8.0 + but must be const in fmt v8.1 if the type is used in a tuple. */ +#if FMT_VERSION < 80000 +#define ROCSOLVER_FMT_CONST +#else +#define ROCSOLVER_FMT_CONST const +#endif + +namespace fmt +{ +template +struct formatter> : formatter +{ + template + auto format(const rocblas_complex_num& value, FormatCtx& ctx) ROCSOLVER_FMT_CONST + { + formatter::format(value.real(), ctx); + format_to(ctx.out(), "+"); + formatter::format(value.imag(), ctx); + format_to(ctx.out(), "*i"); + return ctx.out(); + } +}; +} diff --git a/library/src/rocblascommon/utility.hpp b/common/include/rocblas_utility.hpp similarity index 99% rename from library/src/rocblascommon/utility.hpp rename to common/include/rocblas_utility.hpp index bac182a93..8bb874eaa 100644 --- a/library/src/rocblascommon/utility.hpp +++ b/common/include/rocblas_utility.hpp @@ -4,16 +4,16 @@ #pragma once -#include "definitions.hpp" -#include "rocblas/rocblas.h" -#include "rocsolver/rocsolver.h" #include #include #include -#include #include #include +#include +#include +#include + #pragma STDC CX_LIMITED_RANGE ON // half vectors diff --git a/install.sh b/install.sh index 2bf658a8b..d43ce7a6b 100755 --- a/install.sh +++ b/install.sh @@ -540,7 +540,6 @@ cmake_common_options+=( '-DCPACK_SET_DESTDIR=OFF' "-DCMAKE_INSTALL_PREFIX=${lib_dir}" "-DCPACK_PACKAGING_INSTALL_PREFIX=${install_dir}" - '-DROCSOLVER_EMBED_FMT=ON' "-DCMAKE_BUILD_TYPE=${build_type}" ) diff --git a/library/src/common/rocsolver_logger.cpp b/library/src/common/rocsolver_logger.cpp index 1bc830603..6c8aadab1 100644 --- a/library/src/common/rocsolver_logger.cpp +++ b/library/src/common/rocsolver_logger.cpp @@ -8,7 +8,7 @@ #include #include -#include "rocblascommon/utility.hpp" +#include "rocblas_utility.hpp" #include "rocsolver_logger.hpp" #define STRINGIFY(s) STRINGIFY_HELPER(s) diff --git a/library/src/include/libcommon.hpp b/library/src/include/libcommon.hpp index 5ef896ea7..a3a3bada1 100644 --- a/library/src/include/libcommon.hpp +++ b/library/src/include/libcommon.hpp @@ -1,9 +1,10 @@ /* ************************************************************************ - * Copyright (c) 2019-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2019-2022 Advanced Micro Devices, Inc. * ************************************************************************ */ #pragma once -#include "rocblascommon/utility.hpp" #include #include + +#include "rocblas_utility.hpp" diff --git a/library/src/include/rocsolver_logvalue.hpp b/library/src/include/rocsolver_logvalue.hpp index 2f656660c..569e66373 100644 --- a/library/src/include/rocsolver_logvalue.hpp +++ b/library/src/include/rocsolver_logvalue.hpp @@ -5,17 +5,11 @@ #pragma once #include +#include +#include "fmt_rocblas_types.hpp" #include "rocsolver_datatype2string.hpp" -/* The format function for user-defined types cannot be const before fmt v8.0 - but must be const in fmt v8.1 if the type is used in a tuple. */ -#if FMT_VERSION < 80000 -#define ROCSOLVER_FMT_CONST -#else -#define ROCSOLVER_FMT_CONST const -#endif - /*************************************************************************** * Wrapper for types passed to logger, so we can more easily adjust the * default way of printing built-in types without doing it globally. (e.g. diff --git a/library/src/rocblascommon/definitions.hpp b/library/src/rocblascommon/definitions.hpp deleted file mode 100644 index 62c3a5ece..000000000 --- a/library/src/rocblascommon/definitions.hpp +++ /dev/null @@ -1,89 +0,0 @@ -/* ************************************************************************ - * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. - * - * ************************************************************************ */ - -#pragma once - -#include -#include - -/******************************************************************************* - * Definitions - ******************************************************************************/ -#define RETURN_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != hipSuccess) \ - { \ - return get_rocblas_status_for_hip_status(TMP_STATUS_FOR_CHECK); \ - } \ - } while(0) - -#define RETURN_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ - { \ - return TMP_STATUS_FOR_CHECK; \ - } \ - } while(0) - -#define THROW_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != hipSuccess) \ - { \ - throw get_rocblas_status_for_hip_status(TMP_STATUS_FOR_CHECK); \ - } \ - } while(0) - -#define THROW_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ - { \ - throw TMP_STATUS_FOR_CHECK; \ - } \ - } while(0) - -#define PRINT_IF_HIP_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - hipError_t TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != hipSuccess) \ - { \ - fmt::print(stderr, "hip error code: '{}':{} at {}:{}\n", \ - hipGetErrorName(TMP_STATUS_FOR_CHECK), TMP_STATUS_FOR_CHECK, __FILE__, \ - __LINE__); \ - } \ - } while(0) - -#define PRINT_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ - { \ - fmt::print(stderr, "rocblas error: '{}':{} at {}:{}\n", \ - rocblas_status_to_string(TMP_STATUS_FOR_CHECK), TMP_STATUS_FOR_CHECK, \ - __FILE__, __LINE__); \ - } \ - } while(0) - -#define PRINT_AND_RETURN_IF_ROCBLAS_ERROR(INPUT_STATUS_FOR_CHECK) \ - do \ - { \ - rocblas_status TMP_STATUS_FOR_CHECK = INPUT_STATUS_FOR_CHECK; \ - if(TMP_STATUS_FOR_CHECK != rocblas_status_success) \ - { \ - fmt::print(stderr, "rocblas error: '{}':{} at {}:{}\n", \ - rocblas_status_to_string(TMP_STATUS_FOR_CHECK), TMP_STATUS_FOR_CHECK, \ - __FILE__, __LINE__); \ - return TMP_STATUS_FOR_CHECK; \ - } \ - } while(0) diff --git a/rmake.py b/rmake.py index 617a0b513..f89a13ae6 100755 --- a/rmake.py +++ b/rmake.py @@ -92,7 +92,7 @@ def config_cmd(): global OS_info cwd_path = os.getcwd() cmake_executable = "" - cmake_options = ['-DROCSOLVER_EMBED_FMT=ON'] + cmake_options = [] src_path = cmake_path(cwd_path) cmake_platform_opts = [] cmake_prefix_path = []