ginkgo-project · tcojean · Aug 21, 2019 · Aug 7, 2019 · Aug 7, 2019 · Aug 7, 2019
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -157,6 +157,18 @@ build/cuda91/clang/all/release/shared:
     BUILD_TYPE: Release
     EXTRA_CMAKE_FLAGS: *cuda_flags_shared
 
+build/cuda91/intel/all/debug/shared:
+  <<: *default_build
+  image: localhost:5000/gko-cuda91-gnu6-llvm40
+  variables:
+    <<: *default_variables
+    C_COMPILER: icc
+    CXX_COMPILER: icpc
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: Debug
+    EXTRA_CMAKE_FLAGS: *cuda_flags_shared
+
 # cuda 9.2 and friends
 build/cuda92/gcc/all/release/shared:
   <<: *default_build
@@ -180,6 +192,18 @@ build/cuda92/clang/all/debug/static:
     BUILD_TYPE: Debug
     EXTRA_CMAKE_FLAGS: *cuda_flags_static
 
+build/cuda92/intel/all/release/static:
+  <<: *default_build
+  image: localhost:5000/gko-cuda92-gnu7-llvm50
+  variables:
+    <<: *default_variables
+    C_COMPILER: icc
+    CXX_COMPILER: icpc
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: Release
+    EXTRA_CMAKE_FLAGS: *cuda_flags_static
+
 # cuda 10.0 and friends
 build/cuda100/gcc/all/debug/shared:
   <<: *default_build
@@ -203,6 +227,18 @@ build/cuda100/clang/all/release/static:
     BUILD_TYPE: Release
     EXTRA_CMAKE_FLAGS: *cuda_flags_static
 
+build/cuda100/intel/all/release/shared:
+  <<: *default_build
+  image: localhost:5000/gko-cuda100-gnu7-llvm60
+  variables:
+    <<: *default_variables
+    C_COMPILER: icc
+    CXX_COMPILER: icpc
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: Release
+    EXTRA_CMAKE_FLAGS: *cuda_flags_shared
+
 # no cuda but latest gcc and "soon" clang 7
 build/nocuda/gcc/core/debug/static:
   <<: *default_build
@@ -226,13 +262,23 @@ build/nocuda/clang/core/release/shared:
     EXTRA_CMAKE_FLAGS: &flags_shared
       -DBUILD_SHARED_LIBS=ON
 
+build/nocuda/intel/core/debug/shared:
+  <<: *default_build
+  image: localhost:5000/gko-nocuda-gnu8-llvm70
+  variables:
+    <<: *default_variables
+    C_COMPILER: icc
+    CXX_COMPILER: icpc
+    BUILD_REFERENCE: "OFF"
+    BUILD_TYPE: Debug
+    EXTRA_CMAKE_FLAGS: *flags_shared
+
 build/nocuda/gcc/omp/release/shared:
   <<: *default_build
   image: localhost:5000/gko-nocuda-gnu8-llvm70
   variables:
     <<: *default_variables
     BUILD_OMP: "ON"
-    BUILD_REFERENCE: "ON"
     BUILD_TYPE: Release
     EXTRA_CMAKE_FLAGS: *flags_shared
 
@@ -241,11 +287,23 @@ build/nocuda/clang/omp/debug/static:
   image: localhost:5000/gko-nocuda-gnu8-llvm70
   variables:
     <<: *default_variables
+    C_COMPILER: clang
+    CXX_COMPILER: clang++
     BUILD_OMP: "ON"
-    BUILD_REFERENCE: "ON"
     BUILD_TYPE: Debug
     EXTRA_CMAKE_FLAGS: *flags_static
 
+build/nocuda/intel/omp/release/static:
+  <<: *default_build
+  image: localhost:5000/gko-nocuda-gnu8-llvm70
+  variables:
+    <<: *default_variables
+    C_COMPILER: icc
+    CXX_COMPILER: icpc
+    BUILD_OMP: "ON"
+    BUILD_TYPE: Release
+    EXTRA_CMAKE_FLAGS: *flags_static
+
 # Test jobs
 test/cuda90/gcc/all/debug/shared:
   <<: *default_test
@@ -272,6 +330,12 @@ test/cuda91/clang/all/release/shared:
   dependencies:
     - build/cuda91/clang/all/release/shared
 
+test/cuda91/intel/all/debug/shared:
+  <<: *default_test
+  image: localhost:5000/gko-cuda91-gnu6-llvm40
+  dependencies:
+    - build/cuda91/intel/all/debug/shared
+
 # cuda 9.2 and friends
 test/cuda92/gcc/all/release/shared:
   <<: *default_test
@@ -285,6 +349,12 @@ test/cuda92/clang/all/debug/static:
   dependencies:
     - build/cuda92/clang/all/debug/static
 
+test/cuda92/intel/all/release/static:
+  <<: *default_test
+  image: localhost:5000/gko-cuda92-gnu7-llvm50
+  dependencies:
+    - build/cuda92/intel/all/release/static
+
 # cuda 10.0 and friends
 test/cuda100/gcc/all/debug/shared:
   <<: *default_test
@@ -298,6 +368,12 @@ test/cuda100/clang/all/release/static:
   dependencies:
     - build/cuda100/clang/all/release/static
 
+test/cuda100/intel/all/release/shared:
+  <<: *default_test
+  image: localhost:5000/gko-cuda100-gnu7-llvm60
+  dependencies:
+    - build/cuda100/intel/all/release/shared
+
 # no cuda but latest gcc and "soon" clang 7
 test/nocuda/gcc/core/debug/static:
   <<: *default_test
@@ -311,6 +387,12 @@ test/nocuda/clang/core/release/shared:
   dependencies:
     - build/nocuda/clang/core/release/shared
 
+test/nocuda/intel/core/debug/shared:
+  <<: *default_test
+  image: localhost:5000/gko-nocuda-gnu8-llvm70
+  dependencies:
+    - build/nocuda/intel/core/debug/shared
+
 test/nocuda/gcc/omp/release/shared:
   <<: *default_test
   image: localhost:5000/gko-nocuda-gnu8-llvm70
@@ -323,6 +405,12 @@ test/nocuda/clang/omp/debug/static:
   dependencies:
     - build/nocuda/clang/omp/debug/static
 
+test/nocuda/intel/omp/release/static:
+  <<: *default_test
+  image: localhost:5000/gko-nocuda-gnu8-llvm70
+  dependencies:
+    - build/nocuda/intel/omp/release/static
+
 
 # Job with important warnings as error
 warnings:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,20 +14,21 @@ option(GINKGO_BUILD_OMP "Compile OpenMP kernels for CPU" OFF)
 option(GINKGO_BUILD_CUDA "Compile kernels for NVIDIA GPUs" OFF)
 option(GINKGO_BUILD_DOC "Generate documentation" OFF)
 option(GINKGO_SKIP_DEPENDENCY_UPDATE
-       "Do not update dependencies each time the project is rebuilt" ON)
+    "Do not update dependencies each time the project is rebuilt" ON)
 option(GINKGO_EXPORT_BUILD_DIR
     "Make Ginkgo export its build directory to the CMake package registry."
     OFF)
 option(GINKGO_WITH_CLANG_TIDY "Make Ginkgo call `clang-tidy` to find programming issues." OFF)
 option(GINKGO_WITH_IWYU "Make Ginkgo call `iwyu` (Include What You Use) to find include issues." OFF)
 set(GINKGO_VERBOSE_LEVEL "1" CACHE STRING
-  "Verbosity level. Put 0 to turn off. 1 activates a few important messages.")
+    "Verbosity level. Put 0 to turn off. 1 activates a few important messages.")
 set(GINKGO_COMPILER_FLAGS "-Wpedantic" CACHE STRING
-  "Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic`")
+    "Set the required CXX compiler flags, mainly used for warnings. Current default is `-Wpedantic`")
 set(GINKGO_CUDA_COMPILER_FLAGS "" CACHE STRING
-  "Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
+    "Set the required NVCC compiler flags, mainly used for warnings. Current default is an empty string")
 set(GINKGO_CUDA_ARCHITECTURES "Auto" CACHE STRING
     "A list of target NVIDIA GPU achitectures. See README.md for more detail.")
+option(GINKGO_CUDA_DEFAULT_HOST_COMPILER "Tell Ginkgo to not automatically set the CUDA host compiler" OFF)
 option(GINKGO_JACOBI_FULL_OPTIMIZATIONS "Use all the optimizations for the CUDA Jacobi algorithm" OFF)
 option(BUILD_SHARED_LIBS "Build shared (.so, .dylib, .dll) libraries" ON)
 

diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@ For Ginkgo core library:
 *   C++11 compliant compiler, one of:
     *   _gcc 5.3+, 6.3+, 7.3+, 8.1+_
     *   _clang 3.9+_
+    *   _Intel compiler 2017+_
     *   _Apple LLVM 8.0+_ (__TODO__: verify)
 
 The Ginkgo CUDA module has the following __additional__ requirements:

diff --git a/benchmark/utils/spmv_common.hpp b/benchmark/utils/spmv_common.hpp
@@ -80,9 +80,11 @@ void validate_option_object(const rapidjson::Value &value)
  * @param data  the data represented in the intermediate representation format
  *
  * @tparam MatrixType  the Ginkgo matrix type (such as `gko::matrix::Csr<>`)
+ *
+ * @return a `unique_pointer` to the created matrix
  */
 template <typename MatrixType>
-std::unique_ptr<gko::LinOp> read_matrix_from_data(
+std::unique_ptr<MatrixType> read_matrix_from_data(
     std::shared_ptr<const gko::Executor> exec, const gko::matrix_data<> &data)
 {
     auto mat = MatrixType::create(std::move(exec));
@@ -96,10 +98,10 @@ std::unique_ptr<gko::LinOp> read_matrix_from_data(
  *
  * @param MATRIX_TYPE  the Ginkgo matrix type (such as `gko::matrix::Csr<>`)
  */
-#define READ_MATRIX(MATRIX_TYPE, ...)                                   \
-    [](std::shared_ptr<const gko::Executor> exec,                       \
-       const gko::matrix_data<> &data) -> std::unique_ptr<gko::LinOp> { \
-        auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__);   \
-        mat->read(data);                                                \
-        return mat;                                                     \
+#define READ_MATRIX(MATRIX_TYPE, ...)                                    \
+    [](std::shared_ptr<const gko::Executor> exec,                        \
+       const gko::matrix_data<> &data) -> std::unique_ptr<MATRIX_TYPE> { \
+        auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__);    \
+        mat->read(data);                                                 \
+        return mat;                                                      \
     }
diff --git a/cmake/CTestCustom.cmake.in b/cmake/CTestCustom.cmake.in
@@ -9,6 +9,10 @@ list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE
 
   "test"
 
+  "benchmark"
+
+  "examples"
+
   "c\\+\\+"
 )
 

diff --git a/core/factorization/par_ilu.cpp b/core/factorization/par_ilu.cpp
@@ -123,14 +123,14 @@ ParIlu<ValueType, IndexType>::generate_l_u(
     // directly created with it
     Array<IndexType> l_col_idxs{exec, l_nnz};
     Array<ValueType> l_vals{exec, l_nnz};
-    auto l_factor = l_matrix_type::create(exec, matrix_size, std::move(l_vals),
-                                          std::move(l_col_idxs),
-                                          std::move(l_row_ptrs), csr_strategy);
+    std::shared_ptr<CsrMatrix> l_factor = l_matrix_type::create(
+        exec, matrix_size, std::move(l_vals), std::move(l_col_idxs),
+        std::move(l_row_ptrs), csr_strategy);
     Array<IndexType> u_col_idxs{exec, u_nnz};
     Array<ValueType> u_vals{exec, u_nnz};
-    auto u_factor = u_matrix_type::create(exec, matrix_size, std::move(u_vals),
-                                          std::move(u_col_idxs),
-                                          std::move(u_row_ptrs), csr_strategy);
+    std::shared_ptr<CsrMatrix> u_factor = u_matrix_type::create(
+        exec, matrix_size, std::move(u_vals), std::move(u_col_idxs),
+        std::move(u_row_ptrs), csr_strategy);
 
     exec->run(par_ilu_factorization::make_initialize_l_u(
         csr_system_matrix, l_factor.get(), u_factor.get()));

diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
@@ -59,6 +59,12 @@ if(NOT CMAKE_CUDA_COMPILER_VERSION MATCHES "9.0")
         PRIVATE
             $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
 endif()
+
+if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
+    set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
+elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
+    unset(CMAKE_CUDA_HOST_COMPILER CACHE)
+endif()
 target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${GINKGO_CUDA_COMPILER_FLAGS}>)
 target_compile_options(ginkgo_cuda PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${GINKGO_COMPILER_FLAGS}>)
 ginkgo_compile_features(ginkgo_cuda)

diff --git a/dev_tools/containers/README.md b/dev_tools/containers/README.md
@@ -42,11 +42,13 @@ There is minor differences, but all of Ginkgo's recipes install the following
 packages:
 + GNU compilers
 + LLVM/Clang
++ Intel Compilers
 + OpenMP
 + Python 2 and 3
 + cmake
 + git, openssh, doxygen, curl (these are required for some synchronization or
   documentation building jobs)
++ valgrind, graphviz, jq (documentation and debugging)
 
 ### CUDA recipes
 Every container is tailored to have matching CUDA, GNU Compilers and LLVM/Clang
@@ -60,16 +62,17 @@ documentation](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.
   default limit is gcc 5.4).
 + Arguments can be provided for CUDA, GNU and LLVM version.
 + It is required to use `libomp-dev` library for Clang+OpenMP to work.
++ hwloc is built and the server's topology is added to the container.
 + Finally, `LIBRARY_PATH` and `LD_LIBRARY_PATH` are properly setup for the CUDA
   library. For proper CMake detection of the GPUs, this should maybe be
   extended.
 
 
 The dockerfiles and container images already generated are:
-+ CUDA 9.0, GNU 5.5, LLVM 3.9
-+ CUDA 9.1, GNU 6, LLVM 4.0
-+ CUDA 9.2, GNU 7, LLVM 5.0
-+ CUDA 10.0, GNU 7, LLVM 6.0
++ CUDA 9.0, GNU 5.5, LLVM 3.9, no Intel
++ CUDA 9.1, GNU 6, LLVM 4.0, Intel 2017 update 4
++ CUDA 9.2, GNU 7, LLVM 5.0, Intel 2017 update 4
++ CUDA 10.0, GNU 7, LLVM 6.0, Intel 2018 update 1
 
 ### No CUDA recipe
 Because CUDA limits the versions of compilers it can work with, it is good
@@ -92,7 +95,8 @@ container from a folder named `papi/` with the following format:
 + `papi/bin`: papi pre-built binary files
 
 The dockerfiles and container images already generated are:
-+ GNU 8, LLVM 6 (7 will replace this as soon as it is available)
++ GNU 8, LLVM 6 (7 will replace this as soon as it is available), Intel 2019
+  update 4.
 ## Using HPCCM recipes and docker to create containers
 The following explains how to use recipes and docker to create new containers.
 ### Generate the Dockerfile

diff --git a/dev_tools/containers/ginkgo-cuda-base.py b/dev_tools/containers/ginkgo-cuda-base.py
@@ -4,6 +4,7 @@
     CUDA version set by the user
     GNU compilers version set by the user
     LLVM/Clang clang-tidy version set by the user
+    Intel ICC and ICPC version set according to the CUDA version
     OpenMP latest apt version for Clang+OpenMP
     Python 2 and 3 (upstream)
     cmake (upstream)
@@ -17,12 +18,8 @@
 
 cuda_version = USERARG.get('cuda', '10.0')
 
-if float(cuda_version) < float(9.2):
-    image = 'nvidia/cuda:{}-devel-ubuntu16.04'.format(cuda_version)
-    Stage0.baseimage(image)
-else:
-    image = 'nvidia/cuda:{}-devel-ubuntu18.04'.format(cuda_version)
-    Stage0.baseimage(image)
+image = 'nvidia/cuda:{}-devel-ubuntu16.04'.format(cuda_version)
+Stage0.baseimage(image)
 
 
 # Correctly set the LIBRARY_PATH
@@ -82,3 +79,17 @@
         Stage0 += copy(src='topology/fineci.xml', dest='/')
         Stage0 += environment(variables={'HWLOC_XMLFILE': '/fineci.xml'})
         Stage0 += environment(variables={'HWLOC_THISSYSTEM': '1'})
+
+
+# Convert from CUDA version to Intel Compiler years
+intel_versions = {'9.0' : '2017', '9.1' : '2017', '9.2' : '2017', '10.0' : '2018'}
+intel_path = 'intel/parallel_studio_xe_{}/compilers_and_libraries/linux/'.format(intel_versions.get(cuda_version))
+if os.path.isdir(intel_path):
+        Stage0 += copy(src=intel_path+'bin/intel64/', dest='/opt/intel/bin/')
+        Stage0 += copy(src=intel_path+'lib/intel64/', dest='/opt/intel/lib/')
+        Stage0 += copy(src=intel_path+'include/', dest='/opt/intel/include/')
+        Stage0 += environment(variables={'INTEL_LICENSE_FILE': '28518@scclic1.scc.kit.edu'})
+        Stage0 += environment(variables={'PATH': '$PATH:/opt/intel/bin'})
+        Stage0 += environment(variables={'LIBRARY_PATH': '$LIBRARY_PATH:/opt/intel/lib'})
+        Stage0 += environment(variables={'LD_LIBRARY_PATH': '$LD_LIBRARY_PATH:/opt/intel/lib'})
+        Stage0 += environment(variables={'LD_RUN_PATH': '$LD_RUN_PATH:/opt/intel/lib'})
diff --git a/dev_tools/containers/ginkgo-nocuda-base.py b/dev_tools/containers/ginkgo-nocuda-base.py
@@ -3,6 +3,7 @@
 Contents:
     GNU compilers version set by the user
     LLVM/Clang version set by the user
+    Intel ICC and ICPC version set to the latest available version
     OpenMP latest apt version for Clang+OpenMP
     Python 2 and 3 (upstream)
     cmake (upstream)
@@ -42,7 +43,18 @@
 # Copy PAPI libs
 add_papi = USERARG.get('papi', 'False')
 if os.path.isdir('papi/') and add_papi == 'True':
-	Stage0 += apt_get(ospackages=['libpfm4'])
-	Stage0 += copy(src='papi/include/*', dest='/usr/include/')
-	Stage0 += copy(src='papi/lib/*', dest='/usr/lib/')
-	Stage0 += copy(src='papi/bin/*', dest='/usr/bin/')
+    Stage0 += apt_get(ospackages=['libpfm4'])
+    Stage0 += copy(src='papi/include/*', dest='/usr/include/')
+    Stage0 += copy(src='papi/lib/*', dest='/usr/lib/')
+    Stage0 += copy(src='papi/bin/*', dest='/usr/bin/')
+
+intel_path = 'intel/parallel_studio_xe_2019/compilers_and_libraries/linux/'
+if os.path.isdir(intel_path):
+    Stage0 += copy(src=intel_path+'bin/intel64/', dest='/opt/intel/bin/')
+    Stage0 += copy(src=intel_path+'lib/intel64/', dest='/opt/intel/lib/')
+    Stage0 += copy(src=intel_path+'include/', dest='/opt/intel/include/')
+    Stage0 += environment(variables={'INTEL_LICENSE_FILE': '28518@scclic1.scc.kit.edu'})
+    Stage0 += environment(variables={'PATH': '$PATH:/opt/intel/bin'})
+    Stage0 += environment(variables={'LIBRARY_PATH': '$LIBRARY_PATH:/opt/intel/lib'})
+    Stage0 += environment(variables={'LD_LIBRARY_PATH': '$LD_LIBRARY_PATH:/opt/intel/lib'})
+    Stage0 += environment(variables={'LD_RUN_PATH': '$LD_RUN_PATH:/opt/intel/lib'})
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,10 @@ list(APPEND CTEST_CUSTOM_COVERAGE_EXCLUDE @@
       "test"
+      "benchmark"
+      "examples"
       "c\\+\\+"
     )
@@ Expand Down @@