-
Notifications
You must be signed in to change notification settings - Fork 94
/
Copy pathCMakeLists.txt
138 lines (132 loc) · 5.5 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
target_sources(ginkgo_cuda
PRIVATE
base/device.cpp
base/device_matrix_data_kernels.cu
base/exception.cpp
base/executor.cpp
base/index_set_kernels.cpp
base/memory.cpp
base/nvtx.cpp
base/scoped_device_id.cpp
base/stream.cpp
base/timer.cpp
base/version.cpp
components/prefix_sum_kernels.cu
distributed/matrix_kernels.cu
distributed/partition_kernels.cu
distributed/vector_kernels.cu
factorization/cholesky_kernels.cu
factorization/factorization_kernels.cu
factorization/ic_kernels.cu
factorization/ilu_kernels.cu
factorization/lu_kernels.cu
factorization/par_ic_kernels.cu
factorization/par_ict_kernels.cu
factorization/par_ilu_kernels.cu
factorization/par_ilut_approx_filter_kernel.cu
factorization/par_ilut_filter_kernel.cu
factorization/par_ilut_select_common.cu
factorization/par_ilut_select_kernel.cu
factorization/par_ilut_spgeam_kernel.cu
factorization/par_ilut_sweep_kernel.cu
matrix/coo_kernels.cu
matrix/csr_kernels.cu
matrix/dense_kernels.cu
matrix/diagonal_kernels.cu
matrix/ell_kernels.cu
matrix/fbcsr_kernels.cu
matrix/fft_kernels.cu
matrix/sellp_kernels.cu
matrix/sparsity_csr_kernels.cu
multigrid/pgm_kernels.cu
preconditioner/isai_kernels.cu
preconditioner/jacobi_advanced_apply_kernel.cu
preconditioner/jacobi_generate_kernel.cu
preconditioner/jacobi_kernels.cu
preconditioner/jacobi_simple_apply_kernel.cu
reorder/rcm_kernels.cu
solver/cb_gmres_kernels.cu
solver/idr_kernels.cu
solver/lower_trs_kernels.cu
solver/multigrid_kernels.cu
solver/upper_trs_kernels.cu
stop/criterion_kernels.cu
stop/residual_norm_kernels.cu
${GKO_UNIFIED_COMMON_SOURCES}
)
# override the default language mapping for the common files, set them to CUDA
foreach(source_file IN LISTS GKO_UNIFIED_COMMON_SOURCES)
set_source_files_properties(${source_file} PROPERTIES LANGUAGE CUDA)
endforeach(source_file)
if(GINKGO_JACOBI_FULL_OPTIMIZATIONS)
set(GKO_CUDA_JACOBI_BLOCK_SIZES)
foreach(blocksize RANGE 1 32)
list(APPEND GKO_CUDA_JACOBI_BLOCK_SIZES ${blocksize})
endforeach()
else()
set(GKO_CUDA_JACOBI_BLOCK_SIZES 1 2 4 8 13 16 32)
endif()
set(GKO_CUDA_JACOBI_SOURCES)
foreach(GKO_JACOBI_BLOCK_SIZE IN LISTS GKO_CUDA_JACOBI_BLOCK_SIZES)
configure_file(
preconditioner/jacobi_generate_instantiate.inc.cu
preconditioner/jacobi_generate_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu)
configure_file(
preconditioner/jacobi_simple_apply_instantiate.inc.cu
preconditioner/jacobi_simple_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu)
configure_file(
preconditioner/jacobi_advanced_apply_instantiate.inc.cu
preconditioner/jacobi_advanced_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu)
list(APPEND GKO_CUDA_JACOBI_SOURCES
${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_generate_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu
${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_simple_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu
${CMAKE_CURRENT_BINARY_DIR}/preconditioner/jacobi_advanced_apply_instantiate.${GKO_JACOBI_BLOCK_SIZE}.cu)
endforeach()
target_sources(ginkgo_cuda PRIVATE ${GKO_CUDA_JACOBI_SOURCES})
string(REPLACE ";" "," GKO_CUDA_JACOBI_BLOCK_SIZES_CODE "${GKO_CUDA_JACOBI_BLOCK_SIZES}")
configure_file(preconditioner/jacobi_common.hpp.in preconditioner/jacobi_common.hpp)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
# remove false positive CUDA warnings when calling one<T>() and zero<T>()
# and allows the usage of std::array for nvidia GPUs
target_compile_options(ginkgo_cuda
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
if(MSVC)
target_compile_options(ginkgo_cuda
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
else()
target_compile_options(ginkgo_cuda
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>)
endif()
endif()
target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
ginkgo_compile_features(ginkgo_cuda)
target_compile_definitions(ginkgo_cuda PRIVATE GKO_COMPILING_CUDA)
target_include_directories(ginkgo_cuda
SYSTEM PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
# include path for generated headers like jacobi_common.hpp
target_include_directories(ginkgo_cuda
PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/..)
target_link_libraries(ginkgo_cuda PRIVATE ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE} ${CURAND} ${CUFFT} nvtx::nvtx)
# NVTX3 is header-only and requires dlopen/dlclose in static builds
target_link_libraries(ginkgo_cuda PUBLIC ginkgo_device ${CMAKE_DL_LIBS})
target_compile_options(ginkgo_cuda
PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_ARCH_FLAGS}>")
# we handle CUDA architecture flags for now, disable CMake handling
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set_target_properties(ginkgo_cuda PROPERTIES CUDA_ARCHITECTURES OFF)
endif()
list(GET CUDA_RUNTIME_LIBS 0 CUDA_FIRST_LIB)
get_filename_component(GKO_CUDA_LIBDIR "${CUDA_FIRST_LIB}" DIRECTORY)
ginkgo_default_includes(ginkgo_cuda)
ginkgo_install_library(ginkgo_cuda "${GKO_CUDA_LIBDIR}")
if (GINKGO_CHECK_CIRCULAR_DEPS)
ginkgo_check_headers(ginkgo_cuda GKO_COMPILING_CUDA)
endif()
if(GINKGO_BUILD_TESTS)
add_subdirectory(test)
endif()