Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intel compilation #337

Merged
merged 9 commits into from
Aug 21, 2019
92 changes: 90 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,18 @@ build/cuda91/clang/all/release/shared:
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *cuda_flags_shared

build/cuda91/intel/all/debug/shared:
<<: *default_build
image: localhost:5000/gko-cuda91-gnu6-llvm40
variables:
<<: *default_variables
C_COMPILER: icc
CXX_COMPILER: icpc
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: Debug
EXTRA_CMAKE_FLAGS: *cuda_flags_shared

# cuda 9.2 and friends
build/cuda92/gcc/all/release/shared:
<<: *default_build
Expand All @@ -180,6 +192,18 @@ build/cuda92/clang/all/debug/static:
BUILD_TYPE: Debug
EXTRA_CMAKE_FLAGS: *cuda_flags_static

build/cuda92/intel/all/release/static:
<<: *default_build
image: localhost:5000/gko-cuda92-gnu7-llvm50
variables:
<<: *default_variables
C_COMPILER: icc
CXX_COMPILER: icpc
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *cuda_flags_static

# cuda 10.0 and friends
build/cuda100/gcc/all/debug/shared:
<<: *default_build
Expand All @@ -203,6 +227,18 @@ build/cuda100/clang/all/release/static:
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *cuda_flags_static

build/cuda100/intel/all/release/shared:
<<: *default_build
image: localhost:5000/gko-cuda100-gnu7-llvm60
variables:
<<: *default_variables
C_COMPILER: icc
CXX_COMPILER: icpc
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *cuda_flags_shared

# no cuda but latest gcc and "soon" clang 7
build/nocuda/gcc/core/debug/static:
<<: *default_build
Expand All @@ -226,13 +262,23 @@ build/nocuda/clang/core/release/shared:
EXTRA_CMAKE_FLAGS: &flags_shared
-DBUILD_SHARED_LIBS=ON

build/nocuda/intel/core/debug/shared:
<<: *default_build
image: localhost:5000/gko-nocuda-gnu8-llvm70
variables:
<<: *default_variables
C_COMPILER: icc
CXX_COMPILER: icpc
BUILD_REFERENCE: "OFF"
BUILD_TYPE: Debug
EXTRA_CMAKE_FLAGS: *flags_shared

build/nocuda/gcc/omp/release/shared:
<<: *default_build
image: localhost:5000/gko-nocuda-gnu8-llvm70
variables:
<<: *default_variables
BUILD_OMP: "ON"
BUILD_REFERENCE: "ON"
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *flags_shared

Expand All @@ -241,11 +287,23 @@ build/nocuda/clang/omp/debug/static:
image: localhost:5000/gko-nocuda-gnu8-llvm70
variables:
<<: *default_variables
C_COMPILER: clang
CXX_COMPILER: clang++
BUILD_OMP: "ON"
BUILD_REFERENCE: "ON"
BUILD_TYPE: Debug
EXTRA_CMAKE_FLAGS: *flags_static

build/nocuda/intel/omp/release/static:
<<: *default_build
image: localhost:5000/gko-nocuda-gnu8-llvm70
variables:
<<: *default_variables
C_COMPILER: icc
CXX_COMPILER: icpc
BUILD_OMP: "ON"
BUILD_TYPE: Release
EXTRA_CMAKE_FLAGS: *flags_static

# Test jobs
test/cuda90/gcc/all/debug/shared:
<<: *default_test
Expand All @@ -272,6 +330,12 @@ test/cuda91/clang/all/release/shared:
dependencies:
- build/cuda91/clang/all/release/shared

test/cuda91/intel/all/debug/shared:
<<: *default_test
image: localhost:5000/gko-cuda91-gnu6-llvm40
dependencies:
- build/cuda91/intel/all/debug/shared

# cuda 9.2 and friends
test/cuda92/gcc/all/release/shared:
<<: *default_test
Expand All @@ -285,6 +349,12 @@ test/cuda92/clang/all/debug/static:
dependencies:
- build/cuda92/clang/all/debug/static

test/cuda92/intel/all/release/static:
<<: *default_test
image: localhost:5000/gko-cuda92-gnu7-llvm50
dependencies:
- build/cuda92/intel/all/release/static

# cuda 10.0 and friends
test/cuda100/gcc/all/debug/shared:
<<: *default_test
Expand All @@ -298,6 +368,12 @@ test/cuda100/clang/all/release/static:
dependencies:
- build/cuda100/clang/all/release/static

test/cuda100/intel/all/release/shared:
<<: *default_test
image: localhost:5000/gko-cuda100-gnu7-llvm60
dependencies:
- build/cuda100/intel/all/release/shared

# no cuda but latest gcc and "soon" clang 7
test/nocuda/gcc/core/debug/static:
<<: *default_test
Expand All @@ -311,6 +387,12 @@ test/nocuda/clang/core/release/shared:
dependencies:
- build/nocuda/clang/core/release/shared

test/nocuda/intel/core/debug/shared:
<<: *default_test
image: localhost:5000/gko-nocuda-gnu8-llvm70
dependencies:
- build/nocuda/intel/core/debug/shared

test/nocuda/gcc/omp/release/shared:
<<: *default_test
image: localhost:5000/gko-nocuda-gnu8-llvm70
Expand All @@ -323,6 +405,12 @@ test/nocuda/clang/omp/debug/static:
dependencies:
- build/nocuda/clang/omp/debug/static

test/nocuda/intel/omp/release/static:
<<: *default_test
image: localhost:5000/gko-nocuda-gnu8-llvm70
dependencies:
- build/nocuda/intel/omp/release/static


# Job with important warnings as error
warnings:
Expand Down
9 changes: 5 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@ option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" OFF)
option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" OFF)
option(GINKGO_BUILD_DOC "Generate documentation" OFF)
option(GINKGO_SKIP_DEPENDENCY_UPDATE
"Do not update dependencies each time the project is rebuilt" ON)
"Do not update dependencies each time the project is rebuilt" ON)
option(GINKGO_EXPORT_BUILD_DIR
"Make Ginkgo export its build directory to the CMake package registry."
OFF)
option(GINKGO_WITH_CLANG_TIDY "Make Ginkgo call `clang-tidy` to find programming issues." OFF)
option(GINKGO_WITH_IWYU "Make Ginkgo call `iwyu` (Include What You Use) to find include issues." OFF)
set(GINKGO_VERBOSE_LEVEL "1" CACHE STRING
"Verbosity level. Put 0 to turn off. 1 activates a few important messages.")
"Verbosity level. Put 0 to turn off. 1 activates a few important messages.")
set(GINKGO_COMPILER_FLAGS "-Wpedantic" CACHE STRING
"Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic`")
"Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic`")
set(GINKGO_CUDA_COMPILER_FLAGS "" CACHE STRING
"Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
"Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING
"A list of target NVIDIA GPU achitectures. See README.md for more detail.")
option(GINKGO_CUDA_DEFAULT_HOST_COMPILER "Tell Ginkgo to not automatically set the CUDA host compiler" OFF)
option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ For Ginkgo core library:
* C++11 compliant compiler, one of:
* _gcc 5.3+, 6.3+, 7.3+, 8.1+_
* _clang 3.9+_
* _Intel compiler 2017+_
* _Apple LLVM 8.0+_ (__TODO__: verify)

The Ginkgo CUDA module has the following __additional__ requirements:
Expand Down
16 changes: 9 additions & 7 deletions benchmark/utils/spmv_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,11 @@ void validate_option_object(const rapidjson::Value &value)
* @param data the data represented in the intermediate representation format
*
* @tparam MatrixType the Ginkgo matrix type (such as `gko::matrix::Csr<>`)
*
* @return a `unique_pointer` to the created matrix
*/
template <typename MatrixType>
std::unique_ptr<gko::LinOp> read_matrix_from_data(
std::unique_ptr<MatrixType> read_matrix_from_data(
std::shared_ptr<const gko::Executor> exec, const gko::matrix_data<> &data)
{
auto mat = MatrixType::create(std::move(exec));
Expand All @@ -96,10 +98,10 @@ std::unique_ptr<gko::LinOp> read_matrix_from_data(
*
* @param MATRIX_TYPE the Ginkgo matrix type (such as `gko::matrix::Csr<>`)
*/
#define READ_MATRIX(MATRIX_TYPE, ...) \
[](std::shared_ptr<const gko::Executor> exec, \
const gko::matrix_data<> &data) -> std::unique_ptr<gko::LinOp> { \
auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \
mat->read(data); \
return mat; \
#define READ_MATRIX(MATRIX_TYPE, ...) \
[](std::shared_ptr<const gko::Executor> exec, \
const gko::matrix_data<> &data) -> std::unique_ptr<MATRIX_TYPE> { \
auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \
mat->read(data); \
return mat; \
}
4 changes: 4 additions & 0 deletions cmake/CTestCustom.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE

"test"

"benchmark"

"examples"

"c\\+\\+"
)

Expand Down
12 changes: 6 additions & 6 deletions core/factorization/par_ilu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ ParIlu<ValueType, IndexType>::generate_l_u(
// directly created with it
Array<IndexType> l_col_idxs{exec, l_nnz};
Array<ValueType> l_vals{exec, l_nnz};
auto l_factor = l_matrix_type::create(exec, matrix_size, std::move(l_vals),
std::move(l_col_idxs),
std::move(l_row_ptrs), csr_strategy);
std::shared_ptr<CsrMatrix> l_factor = l_matrix_type::create(
exec, matrix_size, std::move(l_vals), std::move(l_col_idxs),
std::move(l_row_ptrs), csr_strategy);
Array<IndexType> u_col_idxs{exec, u_nnz};
Array<ValueType> u_vals{exec, u_nnz};
auto u_factor = u_matrix_type::create(exec, matrix_size, std::move(u_vals),
std::move(u_col_idxs),
std::move(u_row_ptrs), csr_strategy);
std::shared_ptr<CsrMatrix> u_factor = u_matrix_type::create(
exec, matrix_size, std::move(u_vals), std::move(u_col_idxs),
std::move(u_row_ptrs), csr_strategy);

exec->run(par_ilu_factorization::make_initialize_l_u(
csr_system_matrix, l_factor.get(), u_factor.get()));
Expand Down
6 changes: 6 additions & 0 deletions cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION MATCHES "9.0")
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
endif()

if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
unset(CMAKE_CUDA_HOST_COMPILER CACHE)
endif()
target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
ginkgo_compile_features(ginkgo_cuda)
Expand Down
14 changes: 9 additions & 5 deletions dev_tools/containers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ There is minor differences, but all of Ginkgo's recipes install the following
packages:
+ GNU compilers
+ LLVM/Clang
+ Intel Compilers
+ OpenMP
+ Python 2 and 3
+ cmake
+ git, openssh, doxygen, curl (these are required for some synchronization or
documentation building jobs)
+ valgrind, graphviz, jq (documentation and debugging)

### CUDA recipes
Every container is tailored to have matching CUDA, GNU Compilers and LLVM/Clang
Expand All @@ -60,16 +62,17 @@ documentation](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.
default limit is gcc 5.4).
+ Arguments can be provided for CUDA, GNU and LLVM version.
+ It is required to use `libomp-dev` library for Clang+OpenMP to work.
+ hwloc is built and the server's topology is added to the container.
+ Finally, `LIBRARY_PATH` and `LD_LIBRARY_PATH` are properly setup for the CUDA
library. For proper CMake detection of the GPUs, this should maybe be
extended.


The dockerfiles and container images already generated are:
+ CUDA 9.0, GNU 5.5, LLVM 3.9
+ CUDA 9.1, GNU 6, LLVM 4.0
+ CUDA 9.2, GNU 7, LLVM 5.0
+ CUDA 10.0, GNU 7, LLVM 6.0
+ CUDA 9.0, GNU 5.5, LLVM 3.9, no Intel
+ CUDA 9.1, GNU 6, LLVM 4.0, Intel 2017 update 4
+ CUDA 9.2, GNU 7, LLVM 5.0, Intel 2017 update 4
+ CUDA 10.0, GNU 7, LLVM 6.0, Intel 2018 update 1

### No CUDA recipe
Because CUDA limits the versions of compilers it can work with, it is good
Expand All @@ -92,7 +95,8 @@ container from a folder named `papi/` with the following format:
+ `papi/bin`: papi pre-built binary files

The dockerfiles and container images already generated are:
+ GNU 8, LLVM 6 (7 will replace this as soon as it is available)
+ GNU 8, LLVM 6 (7 will replace this as soon as it is available), Intel 2019
update 4.
## Using HPCCM recipes and docker to create containers
The following explains how to use recipes and docker to create new containers.
### Generate the Dockerfile
Expand Down
23 changes: 17 additions & 6 deletions dev_tools/containers/ginkgo-cuda-base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
CUDA version set by the user
GNU compilers version set by the user
LLVM/Clang clang-tidy version set by the user
Intel ICC and ICPC version set according to the CUDA version
OpenMP latest apt version for Clang+OpenMP
Python 2 and 3 (upstream)
cmake (upstream)
Expand All @@ -17,12 +18,8 @@

cuda_version = USERARG.get('cuda', '10.0')

if float(cuda_version) < float(9.2):
image = 'nvidia/cuda:{}-devel-ubuntu16.04'.format(cuda_version)
Stage0.baseimage(image)
else:
image = 'nvidia/cuda:{}-devel-ubuntu18.04'.format(cuda_version)
Stage0.baseimage(image)
image = 'nvidia/cuda:{}-devel-ubuntu16.04'.format(cuda_version)
Stage0.baseimage(image)


# Correctly set the LIBRARY_PATH
Expand Down Expand Up @@ -82,3 +79,17 @@
Stage0 += copy(src='topology/fineci.xml', dest='/')
Stage0 += environment(variables={'HWLOC_XMLFILE': '/fineci.xml'})
Stage0 += environment(variables={'HWLOC_THISSYSTEM': '1'})


# Convert from CUDA version to Intel Compiler years
intel_versions = {'9.0' : '2017', '9.1' : '2017', '9.2' : '2017', '10.0' : '2018'}
intel_path = 'intel/parallel_studio_xe_{}/compilers_and_libraries/linux/'.format(intel_versions.get(cuda_version))
if os.path.isdir(intel_path):
Stage0 += copy(src=intel_path+'bin/intel64/', dest='/opt/intel/bin/')
Stage0 += copy(src=intel_path+'lib/intel64/', dest='/opt/intel/lib/')
Stage0 += copy(src=intel_path+'include/', dest='/opt/intel/include/')
Stage0 += environment(variables={'INTEL_LICENSE_FILE': '28518@scclic1.scc.kit.edu'})
Stage0 += environment(variables={'PATH': '$PATH:/opt/intel/bin'})
Stage0 += environment(variables={'LIBRARY_PATH': '$LIBRARY_PATH:/opt/intel/lib'})
Stage0 += environment(variables={'LD_LIBRARY_PATH': '$LD_LIBRARY_PATH:/opt/intel/lib'})
Stage0 += environment(variables={'LD_RUN_PATH': '$LD_RUN_PATH:/opt/intel/lib'})
20 changes: 16 additions & 4 deletions dev_tools/containers/ginkgo-nocuda-base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Contents:
GNU compilers version set by the user
LLVM/Clang version set by the user
Intel ICC and ICPC version set to the latest available version
OpenMP latest apt version for Clang+OpenMP
Python 2 and 3 (upstream)
cmake (upstream)
Expand Down Expand Up @@ -42,7 +43,18 @@
# Copy PAPI libs
add_papi = USERARG.get('papi', 'False')
if os.path.isdir('papi/') and add_papi == 'True':
Stage0 += apt_get(ospackages=['libpfm4'])
Stage0 += copy(src='papi/include/*', dest='/usr/include/')
Stage0 += copy(src='papi/lib/*', dest='/usr/lib/')
Stage0 += copy(src='papi/bin/*', dest='/usr/bin/')
Stage0 += apt_get(ospackages=['libpfm4'])
Stage0 += copy(src='papi/include/*', dest='/usr/include/')
Stage0 += copy(src='papi/lib/*', dest='/usr/lib/')
Stage0 += copy(src='papi/bin/*', dest='/usr/bin/')

intel_path = 'intel/parallel_studio_xe_2019/compilers_and_libraries/linux/'
if os.path.isdir(intel_path):
Stage0 += copy(src=intel_path+'bin/intel64/', dest='/opt/intel/bin/')
Stage0 += copy(src=intel_path+'lib/intel64/', dest='/opt/intel/lib/')
Stage0 += copy(src=intel_path+'include/', dest='/opt/intel/include/')
Stage0 += environment(variables={'INTEL_LICENSE_FILE': '28518@scclic1.scc.kit.edu'})
Stage0 += environment(variables={'PATH': '$PATH:/opt/intel/bin'})
Stage0 += environment(variables={'LIBRARY_PATH': '$LIBRARY_PATH:/opt/intel/lib'})
Stage0 += environment(variables={'LD_LIBRARY_PATH': '$LD_LIBRARY_PATH:/opt/intel/lib'})
Stage0 += environment(variables={'LD_RUN_PATH': '$LD_RUN_PATH:/opt/intel/lib'})
Loading