From 9e1bdb29ebf5a104228486109dff2d8c1d1ebdec Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Tue, 27 Aug 2024 11:21:47 -0600 Subject: [PATCH 1/4] Add KokkosKernels::eager_initialize() to common --- cmake/KokkosKernels_config.h.in | 8 ++ cmake/kokkoskernels_components.cmake | 11 ++- common/CMakeLists.txt | 2 + common/src/KokkosKernels_EagerInitialize.cpp | 83 +++++++++++++++++++ common/src/KokkosKernels_EagerInitialize.hpp | 40 +++++++++ common/unit_test/Test_Common.hpp | 1 + .../unit_test/Test_Common_EagerInitialize.hpp | 28 +++++++ 7 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 common/src/KokkosKernels_EagerInitialize.cpp create mode 100644 common/src/KokkosKernels_EagerInitialize.hpp create mode 100644 common/unit_test/Test_Common_EagerInitialize.hpp diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index 9f6a0b85d5..fa9f556847 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -174,4 +174,12 @@ #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY false #endif +/* Enabled components */ +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_BATCHED +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_BLAS +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_GRAPH +#cmakedefine KOKKOSKERNELS_ENABLE_COMPONENT_ODE + #endif // KOKKOSKERNELS_CONFIG_H diff --git a/cmake/kokkoskernels_components.cmake b/cmake/kokkoskernels_components.cmake index 16a784bd1f..28386f0557 100644 --- a/cmake/kokkoskernels_components.cmake +++ b/cmake/kokkoskernels_components.cmake @@ -102,4 +102,13 @@ IF ( KokkosKernels_ENABLE_COMPONENT_BATCHED ELSE() SET(KOKKOSKERNELS_ALL_COMPONENTS_ENABLED OFF CACHE BOOL "" FORCE) ENDIF() -mark_as_advanced(FORCE KOKKOSKERNELS_ALL_COMPONENTS_ENABLED) \ No newline at end of file +mark_as_advanced(FORCE KOKKOSKERNELS_ALL_COMPONENTS_ENABLED) +# Now that component enables are finalized, also set upper-case +# versions of component enables for the config.h +SET(KOKKOSKERNELS_ENABLE_COMPONENT_BATCHED KokkosKernels_ENABLE_COMPONENT_BATCHED) +SET(KOKKOSKERNELS_ENABLE_COMPONENT_BLAS KokkosKernels_ENABLE_COMPONENT_BLAS) +SET(KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK KokkosKernels_ENABLE_COMPONENT_LAPACK) +SET(KOKKOSKERNELS_ENABLE_COMPONENT_GRAPH KokkosKernels_ENABLE_COMPONENT_GRAPH) +SET(KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE KokkosKernels_ENABLE_COMPONENT_SPARSE) +SET(KOKKOSKERNELS_ENABLE_COMPONENT_ODE KokkosKernels_ENABLE_COMPONENT_ODE) + diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index b065869296..fd180f7827 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -2,3 +2,5 @@ LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/common/src) LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/common/impl) LIST(APPEND KK_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/common/unit_test) + +LIST(APPEND SOURCES common/src/KokkosKernels_EagerInitialize.cpp) diff --git a/common/src/KokkosKernels_EagerInitialize.cpp b/common/src/KokkosKernels_EagerInitialize.cpp new file mode 100644 index 0000000000..f1c07e8c22 --- /dev/null +++ b/common/src/KokkosKernels_EagerInitialize.cpp @@ -0,0 +1,83 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "KokkosKernels_EagerInitialize.hpp" +#include "KokkosKernels_config.h" +#include "Kokkos_Core.hpp" + +// Include the minimal set of headers that declare all TPL singletons +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS +#include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "KokkosBlas_Magma_tpl.hpp" +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE +//note: this file declares both cuSPARSE and rocSPARSE singletons +#include "KokkosKernels_tpl_handles_decl.hpp" +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "KokkosLapack_magma.hpp" +#endif +#endif + +namespace KokkosKernels +{ + void eager_initialize() + { + if(!Kokkos::is_initialized()) + { + throw std::runtime_error("Kokkos::intialize must be called before KokkosKernels::eager_initialize"); + } +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS + (void) KokkosBlas::Impl::CudaBlasSingleton::singleton(); +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS + (void) KokkosBlas::Impl::RocBlasSingleton::singleton(); +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "KokkosBlas_Magma_tpl.hpp" + (void) KokkosBlas::Impl::MagmaSingleton::singleton(); +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + (void) KokkosKernels::Impl::CusparseSingleton::singleton(); +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + (void) KokkosKernels::Impl::RocsparseSingleton::singleton(); +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + (void) KokkosLapack::Impl::CudaLapackSingleton::singleton(); +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + (void) KokkosLapack::Impl::MagmaSingleton::singleton(); +#endif +#endif + } +} + diff --git a/common/src/KokkosKernels_EagerInitialize.hpp b/common/src/KokkosKernels_EagerInitialize.hpp new file mode 100644 index 0000000000..1a5627b7db --- /dev/null +++ b/common/src/KokkosKernels_EagerInitialize.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOKERNELS_EAGER_INITIALIZE_HPP +#define KOKKOKERNELS_EAGER_INITIALIZE_HPP + +namespace KokkosKernels +{ + // \brief Eagerly initialize handles for all enabled TPLs, as well + // as any other globally shared resources that would otherwise be lazily initialized. + // + // Eagerly initializing a TPL means that it doesn't have to be + // lazily initialized when first calling a kernel that uses it. + // For example, \c eager_initialize() will call \c cusparseCreate() upfront + // so that the first call to \c KokkosSparse::spmv doesn't have to. + // This can add a significant amount of apparent runtime to that first kernel + // call, even though the added time isn't really spent in the kernel. + // + // Calling this before using any kernels/TPLs is optional. + // This function is idempotent (any calls after the first have no effect). + // + // \pre \c Kokkos::initialize() has been called. + void eager_initialize(); +} + +#endif + diff --git a/common/unit_test/Test_Common.hpp b/common/unit_test/Test_Common.hpp index fb93a494d6..eb18918c5c 100644 --- a/common/unit_test/Test_Common.hpp +++ b/common/unit_test/Test_Common.hpp @@ -28,5 +28,6 @@ #include #include #include +#include #endif // TEST_COMMON_HPP diff --git a/common/unit_test/Test_Common_EagerInitialize.hpp b/common/unit_test/Test_Common_EagerInitialize.hpp new file mode 100644 index 0000000000..cfb6f6a740 --- /dev/null +++ b/common/unit_test/Test_Common_EagerInitialize.hpp @@ -0,0 +1,28 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KK_EAGERINIT_TEST_HPP +#define KK_EAGERINIT_TEST_HPP + +#include "KokkosKernels_EagerInitialize.hpp" + +TEST_F(TestCategory, common_eager_initialize) +{ + KokkosKernels::eager_initialize(); + KokkosKernels::eager_initialize(); +}; + +#endif From 6b656c8f40d43dfe09655da298f339375cd0c2d1 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Tue, 27 Aug 2024 11:43:08 -0600 Subject: [PATCH 2/4] Formatting --- common/src/KokkosKernels_EagerInitialize.cpp | 34 ++++++++---------- common/src/KokkosKernels_EagerInitialize.hpp | 36 +++++++++---------- .../unit_test/Test_Common_EagerInitialize.hpp | 3 +- 3 files changed, 33 insertions(+), 40 deletions(-) diff --git a/common/src/KokkosKernels_EagerInitialize.cpp b/common/src/KokkosKernels_EagerInitialize.cpp index f1c07e8c22..ec4c6e1225 100644 --- a/common/src/KokkosKernels_EagerInitialize.cpp +++ b/common/src/KokkosKernels_EagerInitialize.cpp @@ -20,14 +20,14 @@ // Include the minimal set of headers that declare all TPL singletons #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS -#include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS +#include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA #include "KokkosBlas_Magma_tpl.hpp" #endif #endif #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE -//note: this file declares both cuSPARSE and rocSPARSE singletons +// note: this file declares both cuSPARSE and rocSPARSE singletons #include "KokkosKernels_tpl_handles_decl.hpp" #endif @@ -40,44 +40,40 @@ #endif #endif -namespace KokkosKernels -{ - void eager_initialize() - { - if(!Kokkos::is_initialized()) - { - throw std::runtime_error("Kokkos::intialize must be called before KokkosKernels::eager_initialize"); - } +namespace KokkosKernels { +void eager_initialize() { + if (!Kokkos::is_initialized()) { + throw std::runtime_error("Kokkos::intialize must be called before KokkosKernels::eager_initialize"); + } #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS - (void) KokkosBlas::Impl::CudaBlasSingleton::singleton(); + (void)KokkosBlas::Impl::CudaBlasSingleton::singleton(); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS - (void) KokkosBlas::Impl::RocBlasSingleton::singleton(); + (void)KokkosBlas::Impl::RocBlasSingleton::singleton(); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA #include "KokkosBlas_Magma_tpl.hpp" - (void) KokkosBlas::Impl::MagmaSingleton::singleton(); + (void)KokkosBlas::Impl::MagmaSingleton::singleton(); #endif #endif #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - (void) KokkosKernels::Impl::CusparseSingleton::singleton(); + (void)KokkosKernels::Impl::CusparseSingleton::singleton(); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE - (void) KokkosKernels::Impl::RocsparseSingleton::singleton(); + (void)KokkosKernels::Impl::RocsparseSingleton::singleton(); #endif #endif #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER - (void) KokkosLapack::Impl::CudaLapackSingleton::singleton(); + (void)KokkosLapack::Impl::CudaLapackSingleton::singleton(); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA - (void) KokkosLapack::Impl::MagmaSingleton::singleton(); + (void)KokkosLapack::Impl::MagmaSingleton::singleton(); #endif #endif - } } - +} // namespace KokkosKernels diff --git a/common/src/KokkosKernels_EagerInitialize.hpp b/common/src/KokkosKernels_EagerInitialize.hpp index 1a5627b7db..83ddba74ee 100644 --- a/common/src/KokkosKernels_EagerInitialize.hpp +++ b/common/src/KokkosKernels_EagerInitialize.hpp @@ -17,24 +17,22 @@ #ifndef KOKKOKERNELS_EAGER_INITIALIZE_HPP #define KOKKOKERNELS_EAGER_INITIALIZE_HPP -namespace KokkosKernels -{ - // \brief Eagerly initialize handles for all enabled TPLs, as well - // as any other globally shared resources that would otherwise be lazily initialized. - // - // Eagerly initializing a TPL means that it doesn't have to be - // lazily initialized when first calling a kernel that uses it. - // For example, \c eager_initialize() will call \c cusparseCreate() upfront - // so that the first call to \c KokkosSparse::spmv doesn't have to. - // This can add a significant amount of apparent runtime to that first kernel - // call, even though the added time isn't really spent in the kernel. - // - // Calling this before using any kernels/TPLs is optional. - // This function is idempotent (any calls after the first have no effect). - // - // \pre \c Kokkos::initialize() has been called. - void eager_initialize(); -} +namespace KokkosKernels { +// \brief Eagerly initialize handles for all enabled TPLs, as well +// as any other globally shared resources that would otherwise be lazily initialized. +// +// Eagerly initializing a TPL means that it doesn't have to be +// lazily initialized when first calling a kernel that uses it. +// For example, \c eager_initialize() will call \c cusparseCreate() upfront +// so that the first call to \c KokkosSparse::spmv doesn't have to. +// This can add a significant amount of apparent runtime to that first kernel +// call, even though the added time isn't really spent in the kernel. +// +// Calling this before using any kernels/TPLs is optional. +// This function is idempotent (any calls after the first have no effect). +// +// \pre \c Kokkos::initialize() has been called. +void eager_initialize(); +} // namespace KokkosKernels #endif - diff --git a/common/unit_test/Test_Common_EagerInitialize.hpp b/common/unit_test/Test_Common_EagerInitialize.hpp index cfb6f6a740..a6f55b7022 100644 --- a/common/unit_test/Test_Common_EagerInitialize.hpp +++ b/common/unit_test/Test_Common_EagerInitialize.hpp @@ -19,8 +19,7 @@ #include "KokkosKernels_EagerInitialize.hpp" -TEST_F(TestCategory, common_eager_initialize) -{ +TEST_F(TestCategory, common_eager_initialize) { KokkosKernels::eager_initialize(); KokkosKernels::eager_initialize(); }; From 6ea703c18d103bebf057889d88dbde746bf63d10 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Fri, 27 Sep 2024 18:16:50 -0600 Subject: [PATCH 3/4] TPL singletons: allow query of whether initialized And test KokkosKernels::eager_initialize() using this --- blas/tpls/KokkosBlas_Cuda_tpl.hpp | 18 ++- blas/tpls/KokkosBlas_Magma_tpl.hpp | 18 ++- blas/tpls/KokkosBlas_Rocm_tpl.hpp | 20 +++- blas/tpls/KokkosBlas_magma.hpp | 4 + blas/tpls/KokkosBlas_tpl_spec.hpp | 9 ++ common/src/KokkosKernels_EagerInitialize.cpp | 1 - common/unit_test/CMakeLists.txt | 7 ++ common/unit_test/Test_Common.hpp | 1 - .../unit_test/Test_Common_EagerInitialize.cpp | 113 ++++++++++++++++++ .../unit_test/Test_Common_EagerInitialize.hpp | 27 ----- lapack/tpls/KokkosLapack_Cuda_tpl.hpp | 18 ++- lapack/tpls/KokkosLapack_Magma_tpl.hpp | 18 ++- lapack/tpls/KokkosLapack_cusolver.hpp | 5 + lapack/tpls/KokkosLapack_magma.hpp | 5 + .../tpls/KokkosKernels_tpl_handles_decl.hpp | 8 ++ sparse/tpls/KokkosKernels_tpl_handles_def.hpp | 40 +++++-- 16 files changed, 256 insertions(+), 56 deletions(-) create mode 100644 common/unit_test/Test_Common_EagerInitialize.cpp delete mode 100644 common/unit_test/Test_Common_EagerInitialize.hpp diff --git a/blas/tpls/KokkosBlas_Cuda_tpl.hpp b/blas/tpls/KokkosBlas_Cuda_tpl.hpp index d80e3a23d8..fa2749c980 100644 --- a/blas/tpls/KokkosBlas_Cuda_tpl.hpp +++ b/blas/tpls/KokkosBlas_Cuda_tpl.hpp @@ -25,12 +25,24 @@ namespace Impl { CudaBlasSingleton::CudaBlasSingleton() { cublasStatus_t stat = cublasCreate(&handle); if (stat != CUBLAS_STATUS_SUCCESS) Kokkos::abort("CUBLAS initialization failed\n"); - - Kokkos::push_finalize_hook([&]() { cublasDestroy(handle); }); } CudaBlasSingleton& CudaBlasSingleton::singleton() { - static CudaBlasSingleton s; + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + cublasDestroy(instance->handle); + instance.reset(); + }); + } + return *instance; +} + +bool CudaBlasSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& CudaBlasSingleton::get_instance() { + static std::unique_ptr s; return s; } diff --git a/blas/tpls/KokkosBlas_Magma_tpl.hpp b/blas/tpls/KokkosBlas_Magma_tpl.hpp index f149a790df..bce5d4057a 100644 --- a/blas/tpls/KokkosBlas_Magma_tpl.hpp +++ b/blas/tpls/KokkosBlas_Magma_tpl.hpp @@ -25,12 +25,24 @@ namespace Impl { MagmaSingleton::MagmaSingleton() { magma_int_t stat = magma_init(); if (stat != MAGMA_SUCCESS) Kokkos::abort("MAGMA initialization failed\n"); - - Kokkos::push_finalize_hook([&]() { magma_finalize(); }); } MagmaSingleton& MagmaSingleton::singleton() { - static MagmaSingleton s; + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + magma_finalize(); + instance.reset(); + }); + } + return *instance; +} + +bool MagmaSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& MagmaSingleton::get_instance() { + static std::unique_ptr s; return s; } diff --git a/blas/tpls/KokkosBlas_Rocm_tpl.hpp b/blas/tpls/KokkosBlas_Rocm_tpl.hpp index b5a7dabf6f..699c0b5db5 100644 --- a/blas/tpls/KokkosBlas_Rocm_tpl.hpp +++ b/blas/tpls/KokkosBlas_Rocm_tpl.hpp @@ -22,14 +22,24 @@ namespace KokkosBlas { namespace Impl { -RocBlasSingleton::RocBlasSingleton() { - KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_create_handle(&handle)); +RocBlasSingleton::RocBlasSingleton() { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_create_handle(&handle)); } - Kokkos::push_finalize_hook([&]() { KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_destroy_handle(handle)); }); +RocBlasSingleton& RocBlasSingleton::singleton() { + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_destroy_handle(instance->handle)); + instance.reset(); + }); + } + return *instance; } -RocBlasSingleton& RocBlasSingleton::singleton() { - static RocBlasSingleton s; +bool RocBlasSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& RocBlasSingleton::get_instance() { + static std::unique_ptr s; return s; } diff --git a/blas/tpls/KokkosBlas_magma.hpp b/blas/tpls/KokkosBlas_magma.hpp index 5f5fcfe4e1..e86d2ee2cd 100644 --- a/blas/tpls/KokkosBlas_magma.hpp +++ b/blas/tpls/KokkosBlas_magma.hpp @@ -27,7 +27,11 @@ namespace Impl { struct MagmaSingleton { MagmaSingleton(); + static bool is_initialized(); static MagmaSingleton& singleton(); + + private: + static std::unique_ptr& get_instance(); }; } // namespace Impl diff --git a/blas/tpls/KokkosBlas_tpl_spec.hpp b/blas/tpls/KokkosBlas_tpl_spec.hpp index 7f40edf435..8ad70595fc 100644 --- a/blas/tpls/KokkosBlas_tpl_spec.hpp +++ b/blas/tpls/KokkosBlas_tpl_spec.hpp @@ -29,7 +29,11 @@ struct CudaBlasSingleton { CudaBlasSingleton(); + static bool is_initialized(); static CudaBlasSingleton& singleton(); + + private: + static std::unique_ptr& get_instance(); }; inline void cublas_internal_error_throw(cublasStatus_t cublasState, const char* name, const char* file, @@ -111,7 +115,12 @@ struct RocBlasSingleton { RocBlasSingleton(); + static bool is_initialized(); + static RocBlasSingleton& singleton(); + + private: + static std::unique_ptr& get_instance(); }; inline void rocblas_internal_error_throw(rocblas_status rocblasState, const char* name, const char* file, diff --git a/common/src/KokkosKernels_EagerInitialize.cpp b/common/src/KokkosKernels_EagerInitialize.cpp index ec4c6e1225..a8c8a39f48 100644 --- a/common/src/KokkosKernels_EagerInitialize.cpp +++ b/common/src/KokkosKernels_EagerInitialize.cpp @@ -53,7 +53,6 @@ void eager_initialize() { (void)KokkosBlas::Impl::RocBlasSingleton::singleton(); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#include "KokkosBlas_Magma_tpl.hpp" (void)KokkosBlas::Impl::MagmaSingleton::singleton(); #endif #endif diff --git a/common/unit_test/CMakeLists.txt b/common/unit_test/CMakeLists.txt index c0d8fc116f..c963e908e5 100644 --- a/common/unit_test/CMakeLists.txt +++ b/common/unit_test/CMakeLists.txt @@ -95,3 +95,10 @@ IF (KOKKOS_ENABLE_THREADS) ) ENDIF () +# Add eager_initialize test, which is not backend-specific +KOKKOSKERNELS_ADD_UNIT_TEST( + common_eager_initialize + SOURCES Test_Common_EagerInitialize.cpp + COMPONENTS common +) + diff --git a/common/unit_test/Test_Common.hpp b/common/unit_test/Test_Common.hpp index eb18918c5c..fb93a494d6 100644 --- a/common/unit_test/Test_Common.hpp +++ b/common/unit_test/Test_Common.hpp @@ -28,6 +28,5 @@ #include #include #include -#include #endif // TEST_COMMON_HPP diff --git a/common/unit_test/Test_Common_EagerInitialize.cpp b/common/unit_test/Test_Common_EagerInitialize.cpp new file mode 100644 index 0000000000..20910069fc --- /dev/null +++ b/common/unit_test/Test_Common_EagerInitialize.cpp @@ -0,0 +1,113 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KK_EAGERINIT_TEST_HPP +#define KK_EAGERINIT_TEST_HPP + +#include +#include "Kokkos_Core.hpp" +#include "KokkosKernels_config.h" +#include "KokkosKernels_EagerInitialize.hpp" + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS +#include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "KokkosBlas_Magma_tpl.hpp" +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE +// note: this file declares both cuSPARSE and rocSPARSE singletons +#include "KokkosKernels_tpl_handles_decl.hpp" +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA +#include "KokkosLapack_magma.hpp" +#endif +#endif + +// Count the number of singletons which are currently initialized, +// and the numInitialized number of singleton classes that are currently enabled +// (based on which TPLs and components were enabled at configure-time) +void countSingletons(int& numInitialized, int& numEnabled) { + numInitialized = 0; + numEnabled = 0; +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUBLAS + numEnabled++; + if (KokkosBlas::Impl::CudaBlasSingleton::is_initialized()) numInitialized++; +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS + numEnabled++; + if (KokkosBlas::Impl::RocBlasSingleton::is_initialized()) numInitialized++; +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + numEnabled++; + if (KokkosBlas::Impl::MagmaSingleton::is_initialized()) numInitialized++; +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_SPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + numEnabled++; + if (KokkosKernels::Impl::CusparseSingleton::is_initialized()) numInitialized++; +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE + numEnabled++; + if (KokkosKernels::Impl::RocsparseSingleton::is_initialized()) numInitialized++; +#endif +#endif + +#ifdef KOKKOSKERNELS_ENABLE_COMPONENT_LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + numEnabled++; + if (KokkosLapack::Impl::CudaLapackSingleton::is_initialized()) numInitialized++; +#endif +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + numEnabled++; + if (KokkosLapack::Impl::MagmaSingleton::is_initialized()) numInitialized++; +#endif +#endif +} + +int main() { + int numInitialized, numEnabled; + Kokkos::initialize(); + { + // Check that no singletons are already initialized. + countSingletons(numInitialized, numEnabled); + if (numInitialized != 0) + throw std::runtime_error("At least one singleton was initialized before it should have been"); + KokkosKernels::eager_initialize(); + // Check that all singletons are now initialized. + countSingletons(numInitialized, numEnabled); + std::cout << "Kokkos::eager_initialize() set up " << numInitialized << " of " << numEnabled << " TPL singletons.\n"; + if (numInitialized != numEnabled) + throw std::runtime_error("At least one singleton was not initialized by eager_initialize()"); + } + Kokkos::finalize(); + // Finally, make sure that all singletons were finalized during Kokkos::finalize(). + countSingletons(numInitialized, numEnabled); + if (numInitialized != 0) + throw std::runtime_error("At least one singleton was not correctly finalized by Kokkos::finalize()"); + return 0; +} + +#endif diff --git a/common/unit_test/Test_Common_EagerInitialize.hpp b/common/unit_test/Test_Common_EagerInitialize.hpp deleted file mode 100644 index a6f55b7022..0000000000 --- a/common/unit_test/Test_Common_EagerInitialize.hpp +++ /dev/null @@ -1,27 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KK_EAGERINIT_TEST_HPP -#define KK_EAGERINIT_TEST_HPP - -#include "KokkosKernels_EagerInitialize.hpp" - -TEST_F(TestCategory, common_eager_initialize) { - KokkosKernels::eager_initialize(); - KokkosKernels::eager_initialize(); -}; - -#endif diff --git a/lapack/tpls/KokkosLapack_Cuda_tpl.hpp b/lapack/tpls/KokkosLapack_Cuda_tpl.hpp index 3ead12d5f4..e3191ea93b 100644 --- a/lapack/tpls/KokkosLapack_Cuda_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Cuda_tpl.hpp @@ -25,12 +25,24 @@ namespace Impl { CudaLapackSingleton::CudaLapackSingleton() { cusolverStatus_t stat = cusolverDnCreate(&handle); if (stat != CUSOLVER_STATUS_SUCCESS) Kokkos::abort("CUSOLVER initialization failed\n"); - - Kokkos::push_finalize_hook([&]() { cusolverDnDestroy(handle); }); } CudaLapackSingleton& CudaLapackSingleton::singleton() { - static CudaLapackSingleton s; + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + cusolverDnDestroy(instance->handle); + instance.reset(); + }); + } + return *instance; +} + +bool CudaLapackSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& CudaLapackSingleton::get_instance() { + static std::unique_ptr s; return s; } diff --git a/lapack/tpls/KokkosLapack_Magma_tpl.hpp b/lapack/tpls/KokkosLapack_Magma_tpl.hpp index 636c40735d..542f681281 100644 --- a/lapack/tpls/KokkosLapack_Magma_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Magma_tpl.hpp @@ -25,12 +25,24 @@ namespace Impl { MagmaSingleton::MagmaSingleton() { magma_int_t stat = magma_init(); if (stat != MAGMA_SUCCESS) Kokkos::abort("MAGMA initialization failed\n"); - - Kokkos::push_finalize_hook([&]() { magma_finalize(); }); } MagmaSingleton& MagmaSingleton::singleton() { - static MagmaSingleton s; + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + magma_finalize(); + instance.reset(); + }); + } + return *instance; +} + +bool MagmaSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& MagmaSingleton::get_instance() { + static std::unique_ptr s; return s; } diff --git a/lapack/tpls/KokkosLapack_cusolver.hpp b/lapack/tpls/KokkosLapack_cusolver.hpp index 272fb8b3b8..15a88714af 100644 --- a/lapack/tpls/KokkosLapack_cusolver.hpp +++ b/lapack/tpls/KokkosLapack_cusolver.hpp @@ -32,6 +32,11 @@ struct CudaLapackSingleton { CudaLapackSingleton(); static CudaLapackSingleton& singleton(); + + static bool is_initialized(); + + private: + static std::unique_ptr& get_instance(); }; inline void cusolver_internal_error_throw(cusolverStatus_t cusolverStatus, const char* name, const char* file, diff --git a/lapack/tpls/KokkosLapack_magma.hpp b/lapack/tpls/KokkosLapack_magma.hpp index dfde113fa6..b1b7bb1ab6 100644 --- a/lapack/tpls/KokkosLapack_magma.hpp +++ b/lapack/tpls/KokkosLapack_magma.hpp @@ -30,6 +30,11 @@ struct MagmaSingleton { MagmaSingleton(); static MagmaSingleton& singleton(); + + static bool is_initialized(); + + private: + static std::unique_ptr& get_instance(); }; } // namespace Impl diff --git a/sparse/tpls/KokkosKernels_tpl_handles_decl.hpp b/sparse/tpls/KokkosKernels_tpl_handles_decl.hpp index a1cd3c97f5..4a14d43df0 100644 --- a/sparse/tpls/KokkosKernels_tpl_handles_decl.hpp +++ b/sparse/tpls/KokkosKernels_tpl_handles_decl.hpp @@ -30,7 +30,11 @@ struct CusparseSingleton { CusparseSingleton(); + static bool is_initialized(); static CusparseSingleton& singleton(); + + private: + static std::unique_ptr& get_instance(); }; } // namespace Impl @@ -48,7 +52,11 @@ struct RocsparseSingleton { RocsparseSingleton(); + static bool is_initialized(); static RocsparseSingleton& singleton(); + + private: + static std::unique_ptr& get_instance(); }; } // namespace Impl diff --git a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp index a88ad12130..68757841dc 100644 --- a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp +++ b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp @@ -25,14 +25,24 @@ namespace KokkosKernels { namespace Impl { -CusparseSingleton::CusparseSingleton() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); +CusparseSingleton::CusparseSingleton() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); } - Kokkos::push_finalize_hook([&]() { cusparseDestroy(cusparseHandle); }); +CusparseSingleton& CusparseSingleton::singleton() { + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(instance->cusparseHandle)); + instance.reset(); + }); + } + return *instance; } -CusparseSingleton& CusparseSingleton::singleton() { - static CusparseSingleton s; +bool CusparseSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& CusparseSingleton::get_instance() { + static std::unique_ptr s; return s; } @@ -46,14 +56,24 @@ CusparseSingleton& CusparseSingleton::singleton() { namespace KokkosKernels { namespace Impl { -RocsparseSingleton::RocsparseSingleton() { - KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_handle(&rocsparseHandle)); +RocsparseSingleton::RocsparseSingleton() { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_create_handle(&rocsparseHandle)); } - Kokkos::push_finalize_hook([&]() { KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_handle(rocsparseHandle)); }); +RocsparseSingleton& RocsparseSingleton::singleton() { + std::unique_ptr& instance = get_instance(); + if (!instance) { + instance = std::make_unique(); + Kokkos::push_finalize_hook([&]() { + KOKKOS_ROCSPARSE_SAFE_CALL_IMPL(rocsparse_destroy_handle(instance->rocsparseHandle)); + instance.reset(); + }); + } + return *instance; } -RocsparseSingleton& RocsparseSingleton::singleton() { - static RocsparseSingleton s; +bool RocsparseSingleton::is_initialized() { return get_instance() != nullptr; } + +std::unique_ptr& RocsparseSingleton::get_instance() { + static std::unique_ptr s; return s; } From 73b6bef11e6ff3909761ebe9c807c8ed672a8ef4 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Fri, 27 Sep 2024 21:50:58 -0600 Subject: [PATCH 4/4] Fix magma build - build TPL singleton file into library when magma's enabled - include correct magma TPL header with declarations, not definitions --- blas/CMakeLists.txt | 2 +- common/src/KokkosKernels_EagerInitialize.cpp | 2 +- common/unit_test/Test_Common_EagerInitialize.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/blas/CMakeLists.txt b/blas/CMakeLists.txt index 5bc7217cfd..08263826ab 100644 --- a/blas/CMakeLists.txt +++ b/blas/CMakeLists.txt @@ -36,7 +36,7 @@ IF (KOKKOSKERNELS_ENABLE_TPL_BLAS OR KOKKOSKERNELS_ENABLE_TPL_MKL OR KOKKOSKERNE ENDIF() # Include cuda blas TPL source file -IF (KOKKOSKERNELS_ENABLE_TPL_CUBLAS) +IF (KOKKOSKERNELS_ENABLE_TPL_CUBLAS OR KOKKOSKERNELS_ENABLE_TPL_MAGMA) LIST(APPEND SOURCES blas/tpls/KokkosBlas_Cuda_tpl.cpp ) diff --git a/common/src/KokkosKernels_EagerInitialize.cpp b/common/src/KokkosKernels_EagerInitialize.cpp index a8c8a39f48..214de93109 100644 --- a/common/src/KokkosKernels_EagerInitialize.cpp +++ b/common/src/KokkosKernels_EagerInitialize.cpp @@ -22,7 +22,7 @@ #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS #include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#include "KokkosBlas_Magma_tpl.hpp" +#include "KokkosBlas_magma.hpp" #endif #endif diff --git a/common/unit_test/Test_Common_EagerInitialize.cpp b/common/unit_test/Test_Common_EagerInitialize.cpp index 20910069fc..fc495e78fc 100644 --- a/common/unit_test/Test_Common_EagerInitialize.cpp +++ b/common/unit_test/Test_Common_EagerInitialize.cpp @@ -25,7 +25,7 @@ #ifdef KOKKOSKERNELS_ENABLE_COMPONENT_BLAS #include "KokkosBlas_tpl_spec.hpp" //cuBLAS, rocBLAS #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA -#include "KokkosBlas_Magma_tpl.hpp" +#include "KokkosBlas_magma.hpp" #endif #endif