diff --git a/.gitignore b/.gitignore index f4b3db8..3f2ca06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build* +*build* bin* 3rdparty* tests/data diff --git a/CMakeLists.txt b/CMakeLists.txt index d544398..83134a0 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ # Detects whether this is a top-level project get_directory_property(HAS_PARENT PARENT_DIRECTORY) + if(HAS_PARENT) set(POLYSOLVE_TOPLEVEL_PROJECT OFF) else() @@ -8,6 +9,7 @@ endif() # Check required CMake version set(REQUIRED_CMAKE_VERSION "3.14.0") + if(POLYSOLVE_TOPLEVEL_PROJECT) cmake_minimum_required(VERSION ${REQUIRED_CMAKE_VERSION}) else() @@ -24,41 +26,46 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/PolySolveOptions.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/PolySolveOptions.cmake) endif() -################################################################################ +# ############################################################################### # CMake Policies -################################################################################ - +# ############################################################################### if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") cmake_policy(SET CMP0135 NEW) # https://cmake.org/cmake/help/latest/policy/CMP0135.html endif() -################################################################################ - +# ############################################################################### project(PolySolve - DESCRIPTION "Easy-to-use wrapper for linear solver" - LANGUAGES CXX) + DESCRIPTION "Easy-to-use wrapper for linear solver" + LANGUAGES CXX) # Polysolve options -option(POLYSOLVE_WITH_SANITIZERS "Enable sanitizers in compilation targets" OFF) +option(POLYSOLVE_WITH_SANITIZERS "Enable sanitizers in compilation targets" OFF) + # Polysolve options for enabling/disabling optional libraries -option(POLYSOLVE_WITH_CHOLMOD "Enable Cholmod library" ON) -option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" ON) -option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON) -option(POLYSOLVE_WITH_MKL "Enable MKL library" ON) -option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF) - -option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF) -option(POLYSOLVE_WITH_HYPRE "Enable hypre" ON) -option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON) -option(POLYSOLVE_WITH_SPECTRA "Enable computing spectrum" ON) +option(POLYSOLVE_WITH_CHOLMOD "Enable Cholmod library" ON) +option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" ON) +option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON) +option(POLYSOLVE_WITH_MKL "Enable MKL library" ON) + +option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF) + +option(POLYSOLVE_WITH_CUDA "Enable CUDA" OFF) +option(POLYSOLVE_WITH_PETSC "Enable PETSC" OFF) +option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF) +option(POLYSOLVE_WITH_HYPRE "Enable hypre" OFF) +option(HYPRE_WITH_MPI "Enable hypre MPI" OFF) +option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON) +option(POLYSOLVE_WITH_SPECTRA "Enable computing spectrum" ON) + # Sanitizer options -option(POLYSOLVE_SANITIZE_ADDRESS "Sanitize Address" OFF) -option(POLYSOLVE_SANITIZE_MEMORY "Sanitize Memory" OFF) -option(POLYSOLVE_SANITIZE_THREAD "Sanitize Thread" OFF) -option(POLYSOLVE_SANITIZE_UNDEFINED "Sanitize Undefined" OFF) +option(POLYSOLVE_SANITIZE_ADDRESS "Sanitize Address" OFF) +option(POLYSOLVE_SANITIZE_MEMORY "Sanitize Memory" OFF) +option(POLYSOLVE_SANITIZE_THREAD "Sanitize Thread" OFF) +option(POLYSOLVE_SANITIZE_UNDEFINED "Sanitize Undefined" OFF) + # Misc. -option(POLYSOLVE_LARGE_INDEX "Build for large indices" OFF) -option(POLYSOLVE_WITH_TESTS "Build unit-tests" ${POLYSOLVE_TOPLEVEL_PROJECT}) +option(POLYSOLVE_LARGE_INDEX "Build for large indices" OFF) +option(POLYSOLVE_WITH_TESTS "Build unit-tests" ${POLYSOLVE_TOPLEVEL_PROJECT}) include(CMakeDependentOption) cmake_dependent_option(SUITE_SPARSE_WITH_MKL "Build SuiteSparse using MKL" ON "POLYSOLVE_WITH_MKL" OFF) @@ -71,11 +78,16 @@ if(POLYSOLVE_TOPLEVEL_PROJECT) set(CMAKE_CXX_EXTENSIONS OFF) endif() -if (MSVC) +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + +if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") endif() -### Configuration +# ## Configuration list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/polysolve/") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/recipes/") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/find/") @@ -93,9 +105,9 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Generate position independent code by default set(CMAKE_POSITION_INDEPENDENT_CODE ON) -################################################################################ +# ############################################################################### # PolySolve Library -################################################################################ +# ############################################################################### # Add an empty library and fill in the list of sources in `src/CMakeLists.txt`. add_library(polysolve) @@ -106,10 +118,9 @@ add_subdirectory(src) # Public include directory for Polysolve target_include_directories(polysolve PUBLIC ${PROJECT_SOURCE_DIR}/src) -################################################################################ +# ############################################################################### # Definitions -################################################################################ - +# ############################################################################### if(POLYSOLVE_LARGE_INDEX) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_LARGE_INDEX) endif() @@ -120,9 +131,9 @@ target_compile_definitions(polysolve PUBLIC -DEIGEN_STACK_ALLOCATION_LIMIT=0) # 8MB # target_compile_definitions(polysolve PUBLIC -DEIGEN_STACK_ALLOCATION_LIMIT=8388608) -################################################################################ +# ############################################################################### # Dependencies -################################################################################ +# ############################################################################### # Extra warnings include(polysolve_warnings) @@ -137,11 +148,58 @@ endif() include(eigen) target_link_libraries(polysolve PUBLIC Eigen3::Eigen) +# CUDA +if(POLYSOLVE_WITH_CUDA) + include(CheckLanguage) + check_language(CUDA) + set(CMAKE_CUDA_ARCHITECTURES "61") + find_package(CUDAToolkit) + + if(CUDAToolkit_FOUND) + message(STATUS "Found CUDAToolkit") + set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE}) + enable_language(CUDA) + target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_CUDA) + else() + message(FATAL_ERROR "No CUDA support found!") + endif() + + # We need to explicitly state that we need all CUDA files in the particle + # library to be built with -dc as the member functions could be called by + # other libraries and executables. + set_target_properties(polysolve PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + # Nvidia RTX8000 -> compute_75 + # Nvidia V100 -> compute_70 + # Nvidia 1080/1080Ti -> compute_61 + # Nvidia 3080Ti -> compute_86 + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 60 70 75 86) + endif() + + set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "60;70;75;86") + + if(APPLE) + # We need to add the path to the driver (libcuda.dylib) as an rpath, + # so that the static cuda runtime can find it at runtime. + set_property(TARGET polysolve + PROPERTY + BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + endif() + + target_link_libraries(polysolve PUBLIC -lcudart -lcusparse) +endif() + # Hypre (GNU Lesser General Public License) if(POLYSOLVE_WITH_HYPRE) include(hypre) target_link_libraries(polysolve PUBLIC HYPRE::HYPRE) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_HYPRE) + + if(HYPRE_WITH_MPI) + target_compile_definitions(polysolve PUBLIC HYPRE_WITH_MPI) + message(STATUS "HYPRE WITH MPI.") + endif() endif() # Json (MIT) @@ -164,6 +222,7 @@ endif() # Pardiso solver if(POLYSOLVE_WITH_PARDISO) include(pardiso) + if(TARGET Pardiso::Pardiso) target_link_libraries(polysolve PUBLIC Pardiso::Pardiso) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_PARDISO) @@ -175,6 +234,7 @@ endif() # UmfPack solver if(POLYSOLVE_WITH_UMFPACK) include(umfpack) + if(TARGET UMFPACK::UMFPACK) target_link_libraries(polysolve PUBLIC UMFPACK::UMFPACK) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_UMFPACK) @@ -186,6 +246,7 @@ endif() # SuperLU solver if(POLYSOLVE_WITH_SUPERLU) include(superlu) + if(TARGET SuperLU::SuperLU) target_link_libraries(polysolve PUBLIC SuperLU::SuperLU) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_SUPERLU) @@ -194,7 +255,6 @@ if(POLYSOLVE_WITH_SUPERLU) endif() endif() - # AMGCL solver if(POLYSOLVE_WITH_AMGCL) include(amgcl) @@ -209,27 +269,48 @@ if(POLYSOLVE_WITH_SPECTRA) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_SPECTRA) endif() +# ############################################################################### + # cuSolver solvers if(POLYSOLVE_WITH_CUSOLVER) include(cusolverdn) + if(TARGET CUDA::cusolver) target_link_libraries(polysolve PUBLIC CUDA::cusolver) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_CUSOLVER) + message(STATUS "cuSOLVER found!") else() message(WARNING "cuSOLVER not found, solver will not be available.") endif() endif() -################################################################################ +# ############################################################################### + +# PETSC solvers (REQUIRES MPI) +if(POLYSOLVE_WITH_PETSC) + find_package(MPI) + + # TODO: CHECK IF PETSC IS PRESENT BEFOREHAND + if(MPI_FOUND) + target_link_libraries(polysolve PUBLIC -lpetsc MPI::MPI_CXX) + target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_PETSC) + message(STATUS "PETSc found!") + else() + message(FATAL_ERROR "PETSc requires MPI!") + endif() +endif() + +# ############################################################################### + # Compiler options -################################################################################ +# ############################################################################### # Use C++14 target_compile_features(polysolve PUBLIC cxx_std_14) -################################################################################ +# ############################################################################### # Tests -################################################################################ +# ############################################################################### # Compile extras only if this is a top-level project if(POLYSOLVE_WITH_TESTS) @@ -243,4 +324,9 @@ if(POLYSOLVE_WITH_TESTS) include("${catch2_SOURCE_DIR}/contrib/Catch.cmake") add_subdirectory(tests) + + # Cuda test + if(POLYSOLVE_WITH_CUDA) + add_subdirectory(cudatest) + endif() endif() diff --git a/README.md b/README.md index 1240da1..c763030 100644 --- a/README.md +++ b/README.md @@ -45,27 +45,27 @@ Polysolve uses a json file to provide parameters to the individual solvers. The { "Eigen::LeastSquaresConjugateGradient": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::DGMRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::ConjugateGradient": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::BiCGSTAB": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::GMRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::MINRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Pardiso": { "mtype": -1 @@ -73,7 +73,7 @@ Polysolve uses a json file to provide parameters to the individual solvers. The "Hypre": { "max_iter": 1000, "pre_max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "AMGCL": { "precond": { diff --git a/cmake/polysolve/polysolve_warnings.cmake b/cmake/polysolve/polysolve_warnings.cmake index af69b22..8a67d58 100644 --- a/cmake/polysolve/polysolve_warnings.cmake +++ b/cmake/polysolve/polysolve_warnings.cmake @@ -1,7 +1,7 @@ -################################################################################ +# ############################################################################### # See comments and discussions here: # http://stackoverflow.com/questions/5088460/flags-to-enable-thorough-and-verbose-g-warnings -################################################################################ +# ############################################################################### if(TARGET polysolve::warnings) return() @@ -13,7 +13,7 @@ set(POLYSOLVE_WARNING_FLAGS -pedantic # -Wconversion - #-Wunsafe-loop-optimizations # broken with C++11 loops + # -Wunsafe-loop-optimizations # broken with C++11 loops -Wunused -Wno-long-long @@ -43,11 +43,11 @@ set(POLYSOLVE_WARNING_FLAGS -Wunused-but-set-variable -Wno-unused-parameter - #-Weffc++ + # -Weffc++ -Wno-old-style-cast - # -Wno-sign-conversion - #-Wsign-conversion + # -Wno-sign-conversion + # -Wsign-conversion -Wshadow -Wstrict-null-sentinel @@ -62,8 +62,10 @@ set(POLYSOLVE_WARNING_FLAGS -Wcast-align -Wdisabled-optimization - #-Winline # produces warning on default implicit destructor + + # -Winline # produces warning on default implicit destructor -Winvalid-pch + # -Wmissing-include-dirs -Wpacked -Wno-padded @@ -74,8 +76,8 @@ set(POLYSOLVE_WARNING_FLAGS -Wlogical-op -Wnoexcept -Woverloaded-virtual - # -Wundef + # -Wundef -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Werror=non-virtual-dtor @@ -83,62 +85,61 @@ set(POLYSOLVE_WARNING_FLAGS -Wno-sign-compare - ########### + # ########## # GCC 6.1 # - ########### - + # ########## -Wnull-dereference -fdelete-null-pointer-checks -Wduplicated-cond -Wmisleading-indentation - #-Weverything + # -Weverything - ########################### + # ########################## # Enabled by -Weverything # - ########################### - - #-Wdocumentation - #-Wdocumentation-unknown-command - #-Wfloat-equal - #-Wcovered-switch-default - - #-Wglobal-constructors - #-Wexit-time-destructors - #-Wmissing-variable-declarations - #-Wextra-semi - #-Wweak-vtables - #-Wno-source-uses-openmp - #-Wdeprecated - #-Wnewline-eof - #-Wmissing-prototypes - - #-Wno-c++98-compat - #-Wno-c++98-compat-pedantic - - ########################### + # ########################## + + # -Wdocumentation + # -Wdocumentation-unknown-command + # -Wfloat-equal + # -Wcovered-switch-default + + # -Wglobal-constructors + # -Wexit-time-destructors + # -Wmissing-variable-declarations + # -Wextra-semi + # -Wweak-vtables + # -Wno-source-uses-openmp + # -Wdeprecated + # -Wnewline-eof + # -Wmissing-prototypes + + # -Wno-c++98-compat + # -Wno-c++98-compat-pedantic + + # ########################## # Need to check if those are still valid today - ########################### + # ########################## - #-Wimplicit-atomic-properties - #-Wmissing-declarations - #-Wmissing-prototypes - #-Wstrict-selector-match - #-Wundeclared-selector - #-Wunreachable-code + # -Wimplicit-atomic-properties + # -Wmissing-declarations + # -Wmissing-prototypes + # -Wstrict-selector-match + # -Wundeclared-selector + # -Wunreachable-code # Not a warning, but enable link-time-optimization # TODO: Check out modern CMake version of setting this flag # https://cmake.org/cmake/help/latest/module/CheckIPOSupported.html - #-flto + # -flto # Gives meaningful stack traces -fno-omit-frame-pointer -fno-optimize-sibling-calls - ##################### + # #################### # Disabled warnings # - ##################### + # #################### -Wno-missing-noreturn -Wno-shadow -Wno-switch-enum @@ -162,9 +163,11 @@ add_library(polysolve::warnings ALIAS polysolve_warnings) foreach(FLAG IN ITEMS ${POLYSOLVE_WARNING_FLAGS}) string(REPLACE "=" "-" FLAG_VAR "${FLAG}") + if(NOT DEFINED IS_SUPPORTED_${FLAG_VAR}) check_cxx_compiler_flag("${FLAG}" IS_SUPPORTED_${FLAG_VAR}) endif() + if(IS_SUPPORTED_${FLAG_VAR}) target_compile_options(polysolve_warnings INTERFACE $<$:${FLAG}>) endif() diff --git a/cmake/recipes/hypre.cmake b/cmake/recipes/hypre.cmake index 498bddb..f0fd537 100644 --- a/cmake/recipes/hypre.cmake +++ b/cmake/recipes/hypre.cmake @@ -6,67 +6,23 @@ endif() message(STATUS "Third-party: creating target 'HYPRE::HYPRE'") -include(FetchContent) -FetchContent_Declare( - hypre - GIT_REPOSITORY https://github.com/hypre-space/hypre.git - GIT_TAG v2.15.1 - GIT_SHALLOW TRUE -) - -FetchContent_GetProperties(hypre) -if(NOT hypre_POPULATED) - FetchContent_Populate(hypre) - file(REMOVE ${hypre_SOURCE_DIR}/src/utilities/version) -endif() - -################################################################################ - set(HYPRE_SEQUENTIAL ON CACHE INTERNAL "" FORCE) set(HYPRE_PRINT_ERRORS ON CACHE INTERNAL "" FORCE) set(HYPRE_BIGINT ON CACHE INTERNAL "" FORCE) set(HYPRE_USING_FEI OFF CACHE INTERNAL "" FORCE) set(HYPRE_USING_OPENMP OFF CACHE INTERNAL "" FORCE) set(HYPRE_SHARED OFF CACHE INTERNAL "" FORCE) -# set(HYPRE_LONG_DOUBLE ON) -set(HYPRE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE INTERNAL "" FORCE) -add_subdirectory(${hypre_SOURCE_DIR}/src ${hypre_BINARY_DIR}) -add_library(HYPRE::HYPRE ALIAS HYPRE) - -set_property(TARGET HYPRE PROPERTY FOLDER "dependencies") - -target_include_directories(HYPRE PUBLIC ${hypre_BINARY_DIR}) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/blas) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/lapack) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/utilities) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/multivector) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/krylov) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/seq_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_block_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_matrix) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/IJ_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/matrix_matrix) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls/Euclid) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls/ParaSails) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/struct_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/struct_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/sstruct_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/sstruct_ls) +include(FetchContent) +FetchContent_Declare( + hypre + GIT_REPOSITORY https://github.com/hypre-space/hypre.git + GIT_TAG v2.25.0 + GIT_SHALLOW TRUE +) -if(HYPRE_USING_OPENMP) - find_package(OpenMP QUIET REQUIRED) - target_link_libraries(HYPRE PUBLIC OpenMP::OpenMP_CXX) -endif() +FetchContent_MakeAvailable(hypre) -if(NOT HYPRE_SEQUENTIAL) - find_package(MPI) - if(MPI_CXX_FOUND) - target_link_libraries(HYPRE PUBLIC MPI::MPI_CXX) - endif() -endif() +add_subdirectory("${hypre_SOURCE_DIR}/src") +file(REMOVE "${hypre_SOURCE_DIR}/src/utilities/version") \ No newline at end of file diff --git a/cudatest/CMakeLists.txt b/cudatest/CMakeLists.txt new file mode 100644 index 0000000..8b0febb --- /dev/null +++ b/cudatest/CMakeLists.txt @@ -0,0 +1,37 @@ +# ############################################################################### +# Tests +# ############################################################################### +set(test_sources + main.cpp + cudatest.cpp +) +add_executable(cuda_test ${test_sources}) + +# ############################################################################### +# Required Libraries +# ############################################################################### +include(catch2) +target_link_libraries(cuda_test PUBLIC Catch2::Catch2) + +target_link_libraries(cuda_test PUBLIC polysolve::polysolve) + +include(polysolve_warnings) +target_link_libraries(cuda_test PRIVATE polysolve::warnings) + +# ############################################################################### +# Register tests +# ############################################################################### +foreach(source IN ITEMS ${test_sources}) + source_group("tests" FILES "${source}") +endforeach() + +# Register tests +set(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS ON) +catch_discover_tests(cuda_test) + +# ############################################################################### +# Data +# ############################################################################### +set(DATA_DIR "${CMAKE_SOURCE_DIR}/tests/data") +target_compile_definitions(cuda_test PUBLIC -DPOLYSOLVE_DATA_DIR=\"${DATA_DIR}\") +target_link_libraries(cuda_test PUBLIC -lcudart -lcusparse) \ No newline at end of file diff --git a/cudatest/cudatest.cpp b/cudatest/cudatest.cpp new file mode 100644 index 0000000..f59cc00 --- /dev/null +++ b/cudatest/cudatest.cpp @@ -0,0 +1,239 @@ +#include +#include +#include +#include +#include +#include +#include +#include +////////////////////////////////////////////////////////////////////////// +#include + +using namespace polysolve; + +void loadSymmetric(Eigen::SparseMatrix &A, std::string PATH) +{ + std::ifstream fin(PATH); + long int M, N, L; + while (fin.peek() == '%') + { + fin.ignore(2048, '\n'); + } + fin >> M >> N >> L; + A.resize(M, N); + A.reserve(L * 2 - M); + std::vector> triple; + for (size_t i = 0; i < L; i++) + { + int m, n; + double data; + fin >> m >> n >> data; + triple.push_back(Eigen::Triplet(m - 1, n - 1, data)); + if (m != n) + { + triple.push_back(Eigen::Triplet(n - 1, m - 1, data)); + } + } + fin.close(); + A.setFromTriplets(triple.begin(), triple.end()); +}; +#ifdef POLYSOLVE_WITH_PETSC +TEST_CASE("PETSC-DEFAULTWITHCUDA", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + Eigen::SparseMatrix A; + const bool ok = loadMarket(A, path + "/A_2.mat"); + REQUIRE(ok); + + auto solver = LinearSolver::create("PETSC_Solver", ""); + // solver->setParameters(params); + Eigen::VectorXd b(A.rows()); + b.setRandom(); + Eigen::VectorXd x(b.size()); + x.setZero(); + + solver->analyzePattern(A, A.rows()); + solver->factorize(A, 1, 99); + solver->solve(b, x); + + // std::cout<<"Solver error: "< A; + loadSymmetric(A, path + "/" + MatrixName); + + std::cout << "Matrix Load OK" << std::endl; + + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(A.rows()); + x.setZero(); + { + amgcl::profiler<> prof("crystm03_GPU"); + json solver_info; + auto solver = LinearSolver::create("AMGCL_cuda", ""); + prof.tic("setup"); + json params; + params["AMGCL"]["tolerance"] = 1e-8; + params["AMGCL"]["max_iter"] = 10000; + params["AMGCL"]["solver_type"] = "cg"; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + prof.toc("setup"); + prof.tic("solve"); + solver->solve(b, x); + prof.toc("solve"); + solver->getInfo(solver_info); + REQUIRE(solver_info["num_iterations"] > 0); + std::cout << prof << std::endl; + } + REQUIRE((A * x - b).norm() / b.norm() < 1e-7); +} + +TEST_CASE("amgcl_crystm03_bicgstab", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "crystm03.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + + std::cout << "Matrix Load OK" << std::endl; + + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(A.rows()); + x.setZero(); + { + amgcl::profiler<> prof("crystm03_GPU"); + json solver_info; + auto solver = LinearSolver::create("AMGCL_cuda", ""); + prof.tic("setup"); + json params; + params["AMGCL"]["tolerance"] = 1e-8; + params["AMGCL"]["max_iter"] = 10000; + params["AMGCL"]["solver_type"] = "bicgstab"; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + prof.toc("setup"); + prof.tic("solve"); + solver->solve(b, x); + prof.toc("solve"); + solver->getInfo(solver_info); + REQUIRE(solver_info["num_iterations"] > 0); + std::cout << prof << std::endl; + } + REQUIRE((A * x - b).norm() / b.norm() < 1e-7); +} + +TEST_CASE("cusolverdn", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + Eigen::SparseMatrix A; + const bool ok = loadMarket(A, path + "/A_2.mat"); + REQUIRE(ok); + + auto solver = LinearSolver::create("cuSolverDN", ""); + // solver->setParameters(params); + Eigen::VectorXd b(A.rows()); + b.setRandom(); + Eigen::VectorXd x(b.size()); + x.setZero(); + + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + + // std::cout<<"Solver error: "<setParameters(params); + Eigen::VectorXd b(A.rows()); + b.setRandom(); + Eigen::VectorXd x(b.size()); + x.setZero(); + + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + + // std::cout<<"Solver error: "<> A(m, n); + } + } + + Eigen::VectorXd b(A.rows()); + std::string gradient_path = path + "/matrixdata-5cubes/gradient" + std::to_string(i) + ".txt"; + std::ifstream gradient_file(gradient_path); + for (int m = 0; m < 120; m++) + { + gradient_file >> b(m); + } + + Eigen::VectorXd x(b.size()); + x.setZero(); + + solver->analyzePattern(A, A.rows()); + + // std::chrono::steady_clock::time_point beginf = std::chrono::steady_clock::now(); + solver->factorize(A); + // std::chrono::steady_clock::time_point endf = std::chrono::steady_clock::now(); + // std::cout << "time to factorize: " << std::chrono::duration_cast(endf-beginf).count() << std::endl; + // factorize_times_file << std::chrono::duration_cast(endf-beginf).count() << " "; + + // std::chrono::steady_clock::time_point begins = std::chrono::steady_clock::now(); + solver->solve(b, x); + // std::chrono::steady_clock::time_point ends = std::chrono::steady_clock::now(); + // std::cout << "time to solve: " << std::chrono::duration_cast(ends-begins).count() << std::endl; + // solve_times_file << std::chrono::duration_cast(ends-begins).count() << " "; + + // std::cout << "Ax norm: " << (A*x).norm() << std::endl; + // std::cout << "b norm: " << b.norm() << std::endl; + + const double err = (A * x - b).norm(); + REQUIRE(err < 1e-8); + } +} \ No newline at end of file diff --git a/cudatest/main.cpp b/cudatest/main.cpp new file mode 100644 index 0000000..f30959d --- /dev/null +++ b/cudatest/main.cpp @@ -0,0 +1,6 @@ +//////////////////////////////////////////////////////////////////////////////// +// Keep this file empty, and implement unit tests in separate compilation units! +//////////////////////////////////////////////////////////////////////////////// + +#define CATCH_CONFIG_MAIN +#include \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cfba8a3..9e8fec9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,9 @@ set(SOURCES polysolve/LinearSolverAMGCL.cpp polysolve/LinearSolverAMGCL.hpp polysolve/LinearSolverCuSolverDN.cu + polysolve/LinearSolverPETSC.cpp polysolve/LinearSolverCuSolverDN.cuh + polysolve/LinearSolverPETSC.hpp polysolve/LinearSolverEigen.hpp polysolve/LinearSolverEigen.tpp polysolve/LinearSolverHypre.cpp @@ -16,8 +18,11 @@ set(SOURCES polysolve/LinearSolverPardiso.hpp polysolve/SaddlePointSolver.cpp polysolve/SaddlePointSolver.hpp + polysolve/LinearSolverAMGCL_cuda.cu + polysolve/LinearSolverAMGCL_cuda.hpp ) polysolve_prepend_current_path(SOURCES) polysolve_set_source_group(${SOURCES}) + target_sources(polysolve PRIVATE ${SOURCES}) diff --git a/src/polysolve/LinearSolver.cpp b/src/polysolve/LinearSolver.cpp index 6c38a5b..3822522 100644 --- a/src/polysolve/LinearSolver.cpp +++ b/src/polysolve/LinearSolver.cpp @@ -25,10 +25,16 @@ #endif #ifdef POLYSOLVE_WITH_AMGCL #include +#ifdef POLYSOLVE_WITH_CUDA +#include +#endif #endif #ifdef POLYSOLVE_WITH_CUSOLVER #include #endif +#ifdef POLYSOLVE_WITH_PETSC +#include +#endif #include //////////////////////////////////////////////////////////////////////////////// @@ -227,6 +233,12 @@ namespace polysolve { return std::make_unique(); #endif +#ifdef POLYSOLVE_WITH_PETSC + } + else if (solver == "PETSC_Solver") + { + return std::make_unique(); +#endif #ifdef POLYSOLVE_WITH_HYPRE } else if (solver == "Hypre") @@ -238,7 +250,14 @@ namespace polysolve else if (solver == "AMGCL") { return std::make_unique(); +#ifdef POLYSOLVE_WITH_CUDA + } + else if (solver == "AMGCL_cuda") + { + return std::make_unique(); +#endif #endif + #if EIGEN_VERSION_AT_LEAST(3, 3, 0) // Available only with Eigen 3.3.0 and newer #ifndef POLYSOLVE_LARGE_INDEX @@ -306,11 +325,17 @@ namespace polysolve #ifdef POLYSOLVE_WITH_CUSOLVER "cuSolverDN", #endif +#ifdef POLYSOLVE_WITH_PETSC + "PETSC_Solver", +#endif #ifdef POLYSOLVE_WITH_HYPRE "Hypre", #endif #ifdef POLYSOLVE_WITH_AMGCL "AMGCL", +#ifdef POLYSOLVE_WITH_CUDA + "AMGCL_cuda", +#endif #endif #if EIGEN_VERSION_AT_LEAST(3, 3, 0) #ifndef POLYSOLVE_LARGE_INDEX diff --git a/src/polysolve/LinearSolver.hpp b/src/polysolve/LinearSolver.hpp index 9654542..78b9e02 100644 --- a/src/polysolve/LinearSolver.hpp +++ b/src/polysolve/LinearSolver.hpp @@ -9,9 +9,9 @@ using json = nlohmann::json; #include #define POLYSOLVE_DELETE_MOVE_COPY(Base) \ - Base(Base &&) = delete; \ - Base &operator=(Base &&) = delete; \ - Base(const Base &) = delete; \ + Base(Base &&) = delete; \ + Base &operator=(Base &&) = delete; \ + Base(const Base &) = delete; \ Base &operator=(const Base &) = delete; //////////////////////////////////////////////////////////////////////////////// @@ -30,8 +30,8 @@ namespace polysolve typedef Eigen::SparseMatrix StiffnessMatrix; #endif /** - * @brief Base class for linear solver. - */ + * @brief Base class for linear solver. + */ class LinearSolver { @@ -85,6 +85,9 @@ namespace polysolve // Factorize system matrix virtual void factorize(const StiffnessMatrix &A) {} + // Factorize system matrix (for PETSC) + virtual void factorize(StiffnessMatrix &A, int AIJ_CUSPARSE, int SOLVER_INDEX) {} + // Analyze sparsity pattern of a dense matrix virtual void analyzePattern(const Eigen::MatrixXd &A, const int precond_num) {} diff --git a/src/polysolve/LinearSolverAMGCL_cuda.cu b/src/polysolve/LinearSolverAMGCL_cuda.cu new file mode 100644 index 0000000..5a5246d --- /dev/null +++ b/src/polysolve/LinearSolverAMGCL_cuda.cu @@ -0,0 +1,173 @@ +#ifdef POLYSOLVE_WITH_AMGCL +#ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +//////////////////////////////////////////////////////////////////////////////// + +namespace polysolve +{ + + namespace + { + /* https://stackoverflow.com/questions/15904896/range-based-for-loop-on-a-dynamic-array */ + template + struct WrappedArray + { + WrappedArray(const T *first, const T *last) + : begin_{first}, end_{last} {} + WrappedArray(const T *first, std::ptrdiff_t size) + : WrappedArray{first, first + size} {} + + const T *begin() const noexcept { return begin_; } + const T *end() const noexcept { return end_; } + const T &operator[](const size_t i) const { return begin_[i]; } + + const T *begin_; + const T *end_; + }; + + json default_params() + { + json params = R"({ + "precond": { + "relax": { + "type": "spai0" + }, + "class": "amg", + "max_levels": 6, + "direct_coarse": false, + "ncycle": 2, + "coarsening": { + "type": "smoothed_aggregation", + "estimate_spectral_radius": true, + "relax": 1, + "aggr": { + "eps_strong": 0 + } + } + }, + "solver": { + "tol": 1e-10, + "maxiter": 1000, + "type": "cg" + } + })"_json; + + return params; + } + + void set_params(const json ¶ms, json &out) + { + if (params.contains("AMGCL_cuda")) + { + // Patch the stored params with input ones + if (params["AMGCL_cuda"].contains("precond")) + out["precond"].merge_patch(params["AMGCL_cuda"]["precond"]); + if (params["AMGCL_cuda"].contains("solver")) + out["solver"].merge_patch(params["AMGCL_cuda"]["solver"]); + + if (out["precond"]["class"] == "schur_pressure_correction") + { + // Initialize the u and p solvers with a tolerance that is comparable to the main solver's + if (!out["precond"].contains("usolver")) + { + out["precond"]["usolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["usolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + if (!out["precond"].contains("usolver")) + { + out["precond"]["psolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["psolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + } + } + } + } // namespace + + //////////////////////////////////////////////////////////////////////////////// + + LinearSolverAMGCL_cuda::LinearSolverAMGCL_cuda() + { + params_ = default_params(); + // NOTE: usolver and psolver parameters are only used if the + // preconditioner class is "schur_pressure_correction" + precond_num_ = 0; + cusparseCreate(&backend_params_.cusparse_handle); + } + + // Set solver parameters + void LinearSolverAMGCL_cuda::setParameters(const json ¶ms) + { + if (params.contains("AMGCL_cuda")) + { + set_params(params, params_); + } + } + + void LinearSolverAMGCL_cuda::getInfo(json ¶ms) const + { + params["num_iterations"] = iterations_; + params["final_res_norm"] = residual_error_; + } + + //////////////////////////////////////////////////////////////////////////////// + + void LinearSolverAMGCL_cuda::factorize(const StiffnessMatrix &Ain) + { + assert(precond_num_ > 0); + + int numRows = Ain.rows(); + + WrappedArray ia(Ain.outerIndexPtr(), numRows + 1); + WrappedArray ja(Ain.innerIndexPtr(), Ain.nonZeros()); + WrappedArray a(Ain.valuePtr(), Ain.nonZeros()); + if (params_["precond"]["class"] == "schur_pressure_correction") + { + std::vector pmask(numRows, 0); + for (size_t i = precond_num_; i < numRows; ++i) + pmask[i] = 1; + params_["precond"]["pmask"] = pmask; + } + + // AMGCL takes the parameters as a Boost property_tree (i.e., another JSON data structure) + std::stringstream ss_params; + ss_params << params_; + boost::property_tree::ptree pt_params; + boost::property_tree::read_json(ss_params, pt_params); + auto A = std::tie(numRows, ia, ja, a); + solver_ = std::make_unique(A, pt_params, backend_params_); + // std::cout << *solver_.get() << std::endl; + iterations_ = 0; + residual_error_ = 0; + } + + //////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////// + + void LinearSolverAMGCL_cuda::solve(const Eigen::Ref rhs, Eigen::Ref result) + { + assert(result.size() == rhs.size()); + std::vector _rhs(rhs.data(), rhs.data() + rhs.size()); + std::vector x(result.data(), result.data() + result.size()); + auto rhs_b = Backend::copy_vector(_rhs, backend_params_); + auto x_b = Backend::copy_vector(x, backend_params_); + + std::tie(iterations_, residual_error_) = (*solver_)(*rhs_b, *x_b); + thrust::copy(x_b->begin(), x_b->end(), result.data()); + } + + //////////////////////////////////////////////////////////////////////////////// + + LinearSolverAMGCL_cuda::~LinearSolverAMGCL_cuda() + { + } + +} // namespace polysolve + +#endif +#endif diff --git a/src/polysolve/LinearSolverAMGCL_cuda.hpp b/src/polysolve/LinearSolverAMGCL_cuda.hpp new file mode 100644 index 0000000..a71bf78 --- /dev/null +++ b/src/polysolve/LinearSolverAMGCL_cuda.hpp @@ -0,0 +1,113 @@ +#pragma once + +#ifdef POLYSOLVE_WITH_AMGCL +#ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// SET THIS AS AN OPTIONAL HEADER +#ifdef POLYSOLVE_WITH_CUSPARSEILU0 +#include +#endif + +#include +#include +#include +#include +#include +// #include +#include +#include +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// WARNING: +// The matrix is assumed to be in row-major format, since AMGCL assumes that the +// outer index is for row. If the matrix is symmetric, you are fine, because CSR +// and CSC are the same. If the matrix is not symmetric and you pass in a +// column-major matrix, the solver will actually solve A^T x = b. +// + +namespace polysolve +{ + class LinearSolverAMGCL_cuda : public LinearSolver + { + + public: + LinearSolverAMGCL_cuda(); + ~LinearSolverAMGCL_cuda(); + + private: + POLYSOLVE_DELETE_MOVE_COPY(LinearSolverAMGCL_cuda) + + public: + ////////////////////// + // Public interface // + ////////////////////// + + // Set solver parameters + virtual void setParameters(const json ¶ms) override; + + // Retrieve information + virtual void getInfo(json ¶ms) const override; + + // Analyze sparsity pattern + virtual void analyzePattern(const StiffnessMatrix &A, const int precond_num) override + { + precond_num_ = precond_num; + } + + // Factorize system matrix + virtual void factorize(const StiffnessMatrix &A) override; + + // Solve the linear system Ax = b + virtual void solve(const Ref b, Ref x) override; + + // Name of the solver type (for debugging purposes) + virtual std::string name() const override { return "AMGCL_cuda"; } + + private: + using Backend = amgcl::backend::cuda; + using Solver = amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper>; + std::unique_ptr solver_; + json params_; + typename Backend::params backend_params_; + + int precond_num_; + int block_size_ = 1; + + // Output info + size_t iterations_; + double residual_error_; + }; + +} // namespace polysolve + +#endif +#endif diff --git a/src/polysolve/LinearSolverCuSolverDN.cu b/src/polysolve/LinearSolverCuSolverDN.cu index 9f1b67e..cf04336 100644 --- a/src/polysolve/LinearSolverCuSolverDN.cu +++ b/src/polysolve/LinearSolverCuSolverDN.cu @@ -6,10 +6,18 @@ #include #include -inline void gpuErrchk(cudaError_t code) { - if (code != cudaSuccess) { - throw cudaGetErrorString(code); - } +#define gpuErrchk(ans) \ + { \ + gpuAssert((ans), __FILE__, __LINE__); \ + } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) +{ + if (code != cudaSuccess) + { + fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) + exit(code); + } } //////////////////////////////////////////////////////////////////////////////// @@ -30,24 +38,20 @@ namespace polysolve void LinearSolverCuSolverDN::setParameters(const json ¶ms) { - } //////////////////////////////////////////////////////////////////////////////// void LinearSolverCuSolverDN::getInfo(json ¶ms) const { - } void LinearSolverCuSolverDN::analyzePattern(const StiffnessMatrix &A, const int precond_num) { - } void LinearSolverCuSolverDN::analyzePattern(const Eigen::MatrixXd &A, const int precond_num) { - } void LinearSolverCuSolverDN::factorize(const StiffnessMatrix &A) @@ -61,10 +65,10 @@ namespace polysolve { numrows = (int)A.rows(); - //copy A to device + // copy A to device gpuErrchk(cudaMalloc(reinterpret_cast(&d_A), sizeof(double) * A.size())); gpuErrchk(cudaMemcpy(d_A, (const void *)A.data(), sizeof(double) * A.size(), cudaMemcpyHostToDevice)); - + cusolverDnXgetrf_bufferSize(cuHandle, cuParams, numrows, numrows, CUDA_R_64F, d_A, numrows, CUDA_R_64F, &d_lwork, &h_lwork); gpuErrchk(cudaMalloc(reinterpret_cast(&d_work), sizeof(double) * d_lwork)); @@ -72,12 +76,13 @@ namespace polysolve gpuErrchk(cudaMalloc(reinterpret_cast(&d_info), sizeof(int))); gpuErrchk(cudaMalloc(reinterpret_cast(&d_Ipiv), sizeof(int64_t) * numrows)); int info = 0; - - //factorize - cusolverStatus_t solvererr = cusolverDnXgetrf(cuHandle, cuParams, numrows, numrows, CUDA_R_64F, d_A, - numrows, d_Ipiv, CUDA_R_64F, d_work, d_lwork, h_work, h_lwork, d_info); - if(solvererr == CUSOLVER_STATUS_INVALID_VALUE){ + // factorize + cusolverStatus_t solvererr = cusolverDnXgetrf(cuHandle, cuParams, numrows, numrows, CUDA_R_64F, d_A, + numrows, d_Ipiv, CUDA_R_64F, d_work, d_lwork, h_work, h_lwork, d_info); + + if (solvererr == CUSOLVER_STATUS_INVALID_VALUE) + { throw std::invalid_argument("CUDA returned invalid value"); } @@ -86,21 +91,22 @@ namespace polysolve void LinearSolverCuSolverDN::solve(const Ref b, Ref x) { - //copy b to device + // copy b to device gpuErrchk(cudaMalloc(reinterpret_cast(&d_b), sizeof(double) * b.size())); gpuErrchk(cudaMemcpy(d_b, (const void *)b.data(), sizeof(double) * b.size(), cudaMemcpyHostToDevice)); - //solve + // solve cusolverStatus_t solvererr = cusolverDnXgetrs(cuHandle, cuParams, CUBLAS_OP_N, numrows, 1, - CUDA_R_64F, d_A, numrows, d_Ipiv, - CUDA_R_64F, d_b, numrows, d_info); - if(solvererr == CUSOLVER_STATUS_INVALID_VALUE){ + CUDA_R_64F, d_A, numrows, d_Ipiv, + CUDA_R_64F, d_b, numrows, d_info); + if (solvererr == CUSOLVER_STATUS_INVALID_VALUE) + { throw std::invalid_argument("CUDA returned invalid value"); } int info = 0; gpuErrchk(cudaMemcpyAsync(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost, stream)); - //copy result to x + // copy result to x gpuErrchk(cudaMemcpy(x.data(), d_b, sizeof(double) * x.size(), cudaMemcpyDeviceToHost)); } @@ -117,9 +123,9 @@ namespace polysolve cusolverDnDestroyParams(cuParams); cusolverDnDestroy(cuHandle); cudaStreamDestroy(stream); - cudaDeviceReset(); + // cudaDeviceReset(); //not suitable for polyfem } - -}//namespace polysolve + +} // namespace polysolve #endif \ No newline at end of file diff --git a/src/polysolve/LinearSolverHypre.cpp b/src/polysolve/LinearSolverHypre.cpp index 243bd80..912a7f1 100644 --- a/src/polysolve/LinearSolverHypre.cpp +++ b/src/polysolve/LinearSolverHypre.cpp @@ -4,6 +4,7 @@ #include #include +#include //////////////////////////////////////////////////////////////////////////////// namespace polysolve @@ -14,16 +15,22 @@ namespace polysolve LinearSolverHypre::LinearSolverHypre() { precond_num_ = 0; -#ifdef MPI_VERSION - /* Initialize MPI */ - int argc = 1; - char name[] = ""; - char *argv[] = {name}; - char **argvv = &argv[0]; - int myid, num_procs; - MPI_Init(&argc, &argvv); - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); +#ifdef HYPRE_WITH_MPI + int done_already; + + MPI_Initialized(&done_already); + if (!done_already) + { + /* Initialize MPI */ + int argc = 1; + char name[] = ""; + char *argv[] = {name}; + char **argvv = &argv[0]; + int myid, num_procs; + MPI_Init(&argc, &argvv); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + } #endif } @@ -32,6 +39,13 @@ namespace polysolve { if (params.contains("Hypre")) { + if (params["Hypre"].contains("block_size")) + { + if (params["Hypre"]["block_size"]==2 || params["Hypre"]["block_size"]==3) + { + dimension_ = params["Hypre"]["block_size"]; + } + } if (params["Hypre"].contains("max_iter")) { max_iter_ = params["Hypre"]["max_iter"]; @@ -68,8 +82,11 @@ namespace polysolve has_matrix_ = true; const HYPRE_Int rows = Ain.rows(); const HYPRE_Int cols = Ain.cols(); - +#ifdef HYPRE_WITH_MPI HYPRE_IJMatrixCreate(MPI_COMM_WORLD, 0, rows - 1, 0, cols - 1, &A); +#else + HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, 0, rows - 1, 0, cols - 1, &A); +#endif // HYPRE_IJMatrixSetPrintLevel(A, 2); HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize(A); @@ -180,11 +197,18 @@ namespace polysolve HYPRE_IJVector x; HYPRE_ParVector par_x; +#ifdef HYPRE_WITH_MPI HYPRE_IJVectorCreate(MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#else + HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#endif HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); - +#ifdef HYPRE_WITH_MPI HYPRE_IJVectorCreate(MPI_COMM_WORLD, 0, rhs.size() - 1, &x); +#else + HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#endif HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); @@ -210,7 +234,11 @@ namespace polysolve /* Create solver */ HYPRE_Solver solver, precond; +#ifdef HYPRE_WITH_MPI HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); +#else + HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); +#endif /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, max_iter_); /* max iterations */ diff --git a/src/polysolve/LinearSolverHypre.hpp b/src/polysolve/LinearSolverHypre.hpp index ffc4e6f..6458296 100644 --- a/src/polysolve/LinearSolverHypre.hpp +++ b/src/polysolve/LinearSolverHypre.hpp @@ -12,7 +12,6 @@ #include #include #include -#include //////////////////////////////////////////////////////////////////////////////// // diff --git a/src/polysolve/LinearSolverPETSC.cpp b/src/polysolve/LinearSolverPETSC.cpp new file mode 100644 index 0000000..1334c4d --- /dev/null +++ b/src/polysolve/LinearSolverPETSC.cpp @@ -0,0 +1,197 @@ +#ifdef POLYSOLVE_WITH_PETSC + +//////////////////////////////////////////////////////////////////////////////// +#include +#include +#include +#include + +namespace polysolve +{ + LinearSolverPETSC::LinearSolverPETSC() + { + init(); + } + + int LinearSolverPETSC::init() + { + PetscCall(PetscInitialize(NULL, NULL, NULL, NULL)); + PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); + return 0; + } + + void LinearSolverPETSC::setParameters(const json ¶ms) + { + } + + //////////////////////////////////////////////////////////////////////////////// + + void LinearSolverPETSC::getInfo(json ¶ms) const + { + } + + void LinearSolverPETSC::analyzePattern(const StiffnessMatrix &A, const int precond_num) + { + } + + void LinearSolverPETSC::analyzePattern(const Eigen::MatrixXd &A, const int precond_num) + { + } + + void LinearSolverPETSC::factorize(StiffnessMatrix &A, int AIJ_CUSPARSE, int SOLVER_INDEX) + { + GPU_vec = AIJ_CUSPARSE; + /*CHOLMOD requires 64-bit int indices for GPU backend support*/ +#ifdef CHOLMOD_WITH_GPU + std::vector outer_(A.outerSize() + 1); + std::vector inner_(A.nonZeros()); + for (int k = 0; k < A.outerSize() + 1; ++k) + { + outer_[k] = A.outerIndexPtr()[k]; + } + for (int j = 0; j < A.nonZeros(); ++j) + { + inner_[j] = A.innerIndexPtr()[j]; + } + MatCreateSeqAIJWithArrays(PETSC_COMM_WORLD, A.rows(), A.cols(), outer_.data(), inner_.data(), A.valuePtr(), &A_petsc); + MatConvert(A_petsc, MATAIJCUSPARSE, MAT_INPLACE_MATRIX, &A_petsc); +#else + MatCreateSeqAIJWithArrays(PETSC_COMM_WORLD, A.rows(), A.cols(), A.outerIndexPtr(), A.innerIndexPtr(), A.valuePtr(), &A_petsc); + if (AIJ_CUSPARSE) + MatConvert(A_petsc, MATAIJCUSPARSE, MAT_INPLACE_MATRIX, &A_petsc); +#endif + + // IF EIGEN MATRIX IS ROW MAJOR WE DO A TRANSPOSE + // MatTranspose(A_petsc, MAT_INPLACE_MATRIX, &A_petsc); + + MatCreateVecs(A_petsc, &x_petsc, NULL); + + KSPCreate(PETSC_COMM_WORLD, &ksp); + KSPSetOperators(ksp, A_petsc, A_petsc); + if (SOLVER_INDEX == 5) + { + KSPSetType(ksp, KSPGMRES); + KSPSetTolerances(ksp, PETSC_DEFAULT, 1e-50, PETSC_DEFAULT, PETSC_DEFAULT); + } + else + KSPSetType(ksp, KSPPREONLY); + + KSPGetPC(ksp, &pc); + + switch (SOLVER_INDEX) + { + case 0: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERMKL_PARDISO); + break; + case 1: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST); + break; + case 2: + PCSetType(pc, PCCHOLESKY); + PCFactorSetMatSolverType(pc, MATSOLVERCHOLMOD); + break; + case 3: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERMUMPS); + break; + case 4: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERCUSPARSE); + break; + case 5: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERSTRUMPACK); + break; + // case 6: + // TODO : FIX HYPRE PARAMETERS + // PCSetType(pc, PCHYPRE); + // PCHYPRESetType(pc, "boomeramg"); + // break; + default: + PCSetType(pc, PCLU); + PCFactorSetMatSolverType(pc, MATSOLVERPETSC); + } + + PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */ + PCFactorGetMatrix(pc, &F); + + /*Parameters obtained from https://petsc.org/release/src/ksp/ksp/tutorials/ex52.c.html*/ + if (SOLVER_INDEX == 3) + { + MatMumpsSetIcntl(F, 7, 2); + /* threshold for row pivot detection */ + MatMumpsSetIcntl(F, 24, 1); + MatMumpsSetCntl(F, 3, 1.e-6); + } + if (SOLVER_INDEX == 1) + { + MatSuperluSetILUDropTol(F, 1.e-8); + } + if (SOLVER_INDEX == 5) + { + /* Set the fill-reducing reordering. */ + MatSTRUMPACKSetReordering(F, MAT_STRUMPACK_METIS); + /* Since this is a simple discretization, the diagonal is always */ + /* nonzero, and there is no need for the extra MC64 permutation. */ + MatSTRUMPACKSetColPerm(F, PETSC_FALSE); + /* The compression tolerance used when doing low-rank compression */ + /* in the preconditioner. This is problem specific! */ + // MatSTRUMPACKSetHSSRelTol(F, 1.e-3); + /* Set minimum matrix size for HSS compression to 15 in order to */ + /* demonstrate preconditioner on small problems. For performance */ + /* a value of say 500 is better. */ + MatSTRUMPACKSetHSSMinSepSize(F, 500); + /* You can further limit the fill in the preconditioner by */ + /* setting a maximum rank */ + MatSTRUMPACKSetHSSMaxRank(F, 100); + /* Set the size of the diagonal blocks (the leafs) in the HSS */ + /* approximation. The default value should be better for real */ + /* problems. This is mostly for illustration on a small problem. */ + // MatSTRUMPACKSetHSSLeafSize(F, 4); + } + return; + } + + void LinearSolverPETSC::solve(const Ref b, Ref x) + { + if (GPU_vec) + VecCreateSeqCUDAWithArrays(PETSC_COMM_WORLD, 1, b.rows() * b.cols(), b.data(), NULL, &b_petsc); + else + VecCreateSeqWithArray(PETSC_COMM_WORLD, 1, b.rows() * b.cols(), b.data(), &b_petsc); + + /*USEFUL FOR VARIABLE TEST CASES*/ + // KSPSetFromOptions(ksp); + // KSPSetUp(ksp); + KSPSolve(ksp, b_petsc, x_petsc); + + x.resize(b.rows() * b.cols(), 1); + + int pidx = 0; + for (int i = 0; i < b.rows() * b.cols(); ++i) + { + PetscInt ix[] = {pidx}; + PetscScalar y[] = {0}; + VecGetValues(x_petsc, 1, ix, y); + x(i, 0) = y[0]; + pidx++; + } + } + + //////////////////////////////////////////////////////////////////////////////// + + LinearSolverPETSC::~LinearSolverPETSC() + { + KSPDestroy(&ksp); + MatDestroy(&A_petsc); + VecDestroy(&b_petsc); + VecDestroy(&x_petsc); + // PetscFinalize(); + // TODO: FIX THIS FOR POLYFEM + // MISSING: SET A EXTERNAL FINALIZE + } + +} // namespace polysolve + +#endif \ No newline at end of file diff --git a/src/polysolve/LinearSolverPETSC.hpp b/src/polysolve/LinearSolverPETSC.hpp new file mode 100644 index 0000000..b2b08c3 --- /dev/null +++ b/src/polysolve/LinearSolverPETSC.hpp @@ -0,0 +1,83 @@ +#pragma once + +#ifdef POLYSOLVE_WITH_PETSC + +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// https://docs.nvidia.com/cuda/cusolver/index.html#cuSolverDN-function-reference +// + +namespace polysolve +{ + class LinearSolverPETSC : public LinearSolver + { + + public: + LinearSolverPETSC(); + ~LinearSolverPETSC(); + + private: + POLYSOLVE_DELETE_MOVE_COPY(LinearSolverPETSC) + + public: + ////////////////////// + // Public interface // + ////////////////////// + + // Set solver parameters + virtual void setParameters(const json ¶ms) override; + + // Retrieve memory information from cuSolverDN + virtual void getInfo(json ¶ms) const override; + + // Analyze sparsity pattern (sparse) + virtual void analyzePattern(const StiffnessMatrix &A, const int precond_num) override; + + // Factorize system matrix for PETSC (MATRIX TYPE AIJ_CUSPARSE: TRUE OR FALSE FOR ONLY SEQAIJ) + /*SOLVER INDEX + 0 = PARDISO + 1 = SUPERLU_DIST + 2 = CHOLMOD + 3 = MUMPS + 4 = CUSPARSE + 5 = STRUMPACK + 6 = HYPRE // NOT FULLY IMPLEMENTED YET + */ + virtual void factorize(StiffnessMatrix &A, int AIJ_CUSPARSE, int SOLVER_INDEX) override; + + // Analyze sparsity pattern (dense, preferred) + virtual void analyzePattern(const Eigen::MatrixXd &A, const int precond_num) override; + + // Solve the linear system Ax = b + virtual void solve(const Ref b, Ref x) override; + + // Name of the solver type (for debugging purposes) + virtual std::string name() const override { return "PETSC_Solver"; } + + protected: + int init(); + + protected: + // PETSC variables + Vec b_petsc, x_petsc, y_petsc; + Mat A_petsc, F; + KSP ksp; + PC pc; + PetscReal norm; + PetscInt its, GPU_vec; + + // Eigen variables + Eigen::SparseMatrix A; + }; + +} // namespace polysolve + +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f9e005f..226578e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,6 @@ set(test_sources test_solver.cpp ) add_executable(unit_tests ${test_sources}) - ################################################################################ # Required Libraries ################################################################################ diff --git a/tests/test_solver.cpp b/tests/test_solver.cpp index 358b59d..fcefffb 100644 --- a/tests/test_solver.cpp +++ b/tests/test_solver.cpp @@ -50,6 +50,8 @@ TEST_CASE("all", "[solver]") { if (s == "Eigen::DGMRES") continue; + if (s == "PETSC_Solver") + continue; #ifdef WIN32 if (s == "Eigen::ConjugateGradient" || s == "Eigen::BiCGSTAB" || s == "Eigen::GMRES" || s == "Eigen::MINRES") continue; @@ -127,6 +129,8 @@ TEST_CASE("pre_factor", "[solver]") { if (s == "Eigen::DGMRES") continue; + if (s == "PETSC_Solver") + continue; #ifdef WIN32 if (s == "Eigen::ConjugateGradient" || s == "Eigen::BiCGSTAB" || s == "Eigen::GMRES" || s == "Eigen::MINRES") continue; @@ -459,7 +463,7 @@ TEST_CASE("amgcl_blocksolver_crystm03_CG", "[solver]") json solver_info; auto solver = LinearSolver::create("AMGCL", ""); prof.tic("setup"); - json params; + json params; params["AMGCL"]["tolerance"] = 1e-8; params["AMGCL"]["max_iter"] = 1000; params["AMGCL"]["block_size"] = 3; @@ -481,7 +485,7 @@ TEST_CASE("amgcl_blocksolver_crystm03_CG", "[solver]") json solver_info; auto solver = LinearSolver::create("AMGCL", ""); prof.tic("setup"); - json params; + json params; params["AMGCL"]["tolerance"] = 1e-8; params["AMGCL"]["max_iter"] = 10000; solver->setParameters(params); @@ -572,6 +576,185 @@ TEST_CASE("amgcl_blocksolver_crystm03_Bicgstab", "[solver]") } #endif +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("Hyprel_b2", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "gr_30_30.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + std::cout << "Matrix Load OK" << std::endl; + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 2; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); +} +#endif + +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("Hybre_crystm03", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "crystm03.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + std::cout << "Matrix Load OK" << std::endl; + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 3; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); +} +#endif + +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("hypre_smallscale", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + Eigen::SparseMatrix A; + const bool ok = loadMarket(A, path + "/A_2.mat"); + REQUIRE(ok); + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 3; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); +} +#endif + #ifdef POLYSOLVE_WITH_CUSOLVER TEST_CASE("cusolverdn", "[solver]") { @@ -600,11 +783,12 @@ TEST_CASE("cusolverdn_dense", "[solver]") const std::string path = POLYSOLVE_DATA_DIR; Eigen::MatrixXd A(4, 4); - for(int i = 0; i < 4; i++){ - A(i,i) = 1.0; + for (int i = 0; i < 4; i++) + { + A(i, i) = 1.0; } - A(0,1) = 1.0; - A(3,0) = 1.0; + A(0, 1) = 1.0; + A(3, 0) = 1.0; auto solver = LinearSolver::create("cuSolverDN", ""); // solver->setParameters(params); @@ -626,24 +810,28 @@ TEST_CASE("cusolverdn_5cubes", "[solver]") { const std::string path = POLYSOLVE_DATA_DIR; auto solver = LinearSolver::create("cuSolverDN", ""); - - //std::ofstream factorize_times_file(path+"/factorize_times_5cubes.txt"); - //std::ofstream solve_times_file(path+"/solve_times_5cubes.txt"); - for(int i = 0; i <= 1091; i++){ + // std::ofstream factorize_times_file(path+"/factorize_times_5cubes.txt"); + // std::ofstream solve_times_file(path+"/solve_times_5cubes.txt"); + + for (int i = 0; i <= 1091; i++) + { Eigen::MatrixXd A(120, 120); std::string hessian_path = path + "/matrixdata-5cubes/hessian" + std::to_string(i) + ".txt"; std::ifstream hessian_file(hessian_path); - for(int m = 0; m < 120; m++){ - for(int n = 0; n < 120; n++){ - hessian_file >> A(m,n); + for (int m = 0; m < 120; m++) + { + for (int n = 0; n < 120; n++) + { + hessian_file >> A(m, n); } } Eigen::VectorXd b(A.rows()); std::string gradient_path = path + "/matrixdata-5cubes/gradient" + std::to_string(i) + ".txt"; std::ifstream gradient_file(gradient_path); - for(int m = 0; m < 120; m++){ + for (int m = 0; m < 120; m++) + { gradient_file >> b(m); } @@ -651,24 +839,48 @@ TEST_CASE("cusolverdn_5cubes", "[solver]") x.setZero(); solver->analyzePattern(A, A.rows()); - - //std::chrono::steady_clock::time_point beginf = std::chrono::steady_clock::now(); + + // std::chrono::steady_clock::time_point beginf = std::chrono::steady_clock::now(); solver->factorize(A); - //std::chrono::steady_clock::time_point endf = std::chrono::steady_clock::now(); - //std::cout << "time to factorize: " << std::chrono::duration_cast(endf-beginf).count() << std::endl; - //factorize_times_file << std::chrono::duration_cast(endf-beginf).count() << " "; + // std::chrono::steady_clock::time_point endf = std::chrono::steady_clock::now(); + // std::cout << "time to factorize: " << std::chrono::duration_cast(endf-beginf).count() << std::endl; + // factorize_times_file << std::chrono::duration_cast(endf-beginf).count() << " "; - //std::chrono::steady_clock::time_point begins = std::chrono::steady_clock::now(); + // std::chrono::steady_clock::time_point begins = std::chrono::steady_clock::now(); solver->solve(b, x); - //std::chrono::steady_clock::time_point ends = std::chrono::steady_clock::now(); - //std::cout << "time to solve: " << std::chrono::duration_cast(ends-begins).count() << std::endl; - //solve_times_file << std::chrono::duration_cast(ends-begins).count() << " "; + // std::chrono::steady_clock::time_point ends = std::chrono::steady_clock::now(); + // std::cout << "time to solve: " << std::chrono::duration_cast(ends-begins).count() << std::endl; + // solve_times_file << std::chrono::duration_cast(ends-begins).count() << " "; - //std::cout << "Ax norm: " << (A*x).norm() << std::endl; - //std::cout << "b norm: " << b.norm() << std::endl; + // std::cout << "Ax norm: " << (A*x).norm() << std::endl; + // std::cout << "b norm: " << b.norm() << std::endl; const double err = (A * x - b).norm(); REQUIRE(err < 1e-8); } } +#endif + +#ifdef POLYSOLVE_WITH_PETSC +TEST_CASE("PETSC-DEFAULT", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + Eigen::SparseMatrix A; + const bool ok = loadMarket(A, path + "/A_2.mat"); + REQUIRE(ok); + + auto solver = LinearSolver::create("PETSC_Solver", ""); + + Eigen::VectorXd b(A.rows()); + b.setRandom(); + Eigen::VectorXd x(b.size()); + x.setZero(); + + solver->analyzePattern(A, A.rows()); + solver->factorize(A, 1, 99); + solver->solve(b, x); + + const double err = (A * x - b).norm(); + REQUIRE(err < 1e-8); +} #endif \ No newline at end of file