Squashed 'thirdParty/alpaka/' changes from 655eb708f..90ae873b9

90ae873b9 Merge pull request ComputationalRadiationPhysics#382 from BenjaminW3/topic-CMake-3_9_followup aad3f6a5e add missing changes to alpaka_add_library d421e9a20 Merge pull request ComputationalRadiationPhysics#381 from BenjaminW3/topic-cmake-3_9 71fc4e7ed support CMake 3.9.0 441032e7f Merge pull request ComputationalRadiationPhysics#378 from ax3l/fix-uninitMemberMemcpy dea0bd0f4 Merge pull request ComputationalRadiationPhysics#379 from ax3l/fix-warningCudaBraces 8643a97f0 cudaMemcpy3DParams: Fix Uninit Members 182d1b64f Refactor: double-braces for std::array init a22e42a42 CUDA: Missing Initializer Warning ddb2993aa Merge pull request ComputationalRadiationPhysics#375 from BenjaminW3/topic-update-readme cf311c6a5 remove boost.context from ReadMe requirements e93d5ae49 Merge pull request ComputationalRadiationPhysics#374 from BenjaminW3/topic-remove-boost-atomic-dependency fe898716f remove unused boost atomic dependency 7a66700f2 Merge pull request ComputationalRadiationPhysics#371 from BenjaminW3/topic-duplicate-CUDA_NVCC_FLAGS 2f902cd70 Merge pull request ComputationalRadiationPhysics#372 from ax3l/topic-cmakeOptionalNoWarning 3f95afa83 CMake Config: Optional Deps 83a1bf3e6 Merge pull request ComputationalRadiationPhysics#369 from BenjaminW3/topic-clean-up-assertions a932c02d7 clean up CUDA_NVCC_FLAGS 9753ef6fa clean up assertions git-subtree-dir: thirdParty/alpaka git-subtree-split: 90ae873b9984071e64343995e7721bde6a13ba7f
ax3l · Aug 16, 2017 · 3076485 · 3076485
1 parent 160af5c
commit 3076485
Show file tree

Hide file tree

Showing 26 changed files with 67 additions and 59 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -50,7 +50,7 @@ compiler:
 # CMAKE_BUILD_TYPE                              : {Debug, Release}
 # ALPAKA_CI                                     : {ON}
 # ALPAKA_CI_BOOST_BRANCH                        : {boost-1.62.0, boost-1.63.0, boost-1.64.0}
-# ALPAKA_CI_CMAKE_VER                           : {3.7.2, 3.8.2}
+# ALPAKA_CI_CMAKE_VER                           : {3.7.2, 3.8.2, 3.9.1}
 # ALPAKA_CI_SANITIZERS                          : {ASan+UBsan+ESan, TSan+UBsan+ESan, MSan+UBsan+ESan}
 # ALPAKA_CI_ANALYSIS                            : {ON, OFF}
 # ALPAKA_DEBUG                                  : {0, 1, 2}
@@ -86,17 +86,17 @@ env:
 
     matrix:
         # Analysis builds
-        - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.9.0 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.8.2 ALPAKA_CI_SANITIZERS=                ALPAKA_CI_ANALYSIS=ON  ALPAKA_DEBUG=2 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.5 ALPAKA_CUDA_COMPILER=clang
+        - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.9.0 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.9.1 ALPAKA_CI_SANITIZERS=                ALPAKA_CI_ANALYSIS=ON  ALPAKA_DEBUG=2 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.5 ALPAKA_CUDA_COMPILER=clang
         - ALPAKA_GCC_VER=7   ALPAKA_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.7.2 ALPAKA_CI_SANITIZERS=                ALPAKA_CI_ANALYSIS=ON  ALPAKA_DEBUG=2
 
         # Debug builds
         - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.8.1 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.63.0 ALPAKA_CI_CMAKE_VER=3.8.2 ALPAKA_CI_SANITIZERS=                ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.5 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="20;35" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF ALPAKA_ACC_CPU_BT_OMP4_ENABLE=OFF ALPAKA_ACC_CPU_BT_OPENACC2_ENABLE=OFF
         - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.7.1 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.7.2 ALPAKA_CI_SANITIZERS=TSan+UBSan+ESan OMP_NUM_THREADS=3 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=8.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="20;35"
-        - ALPAKA_GCC_VER=5   ALPAKA_CLANG_VER=3.8.1 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.8.2 ALPAKA_CI_SANITIZERS=ASan+UBSan+ESan OMP_NUM_THREADS=2 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=8.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="35"
+        - ALPAKA_GCC_VER=5   ALPAKA_CLANG_VER=3.8.1 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.9.1 ALPAKA_CI_SANITIZERS=ASan+UBSan+ESan OMP_NUM_THREADS=2 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=8.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="35"
         - ALPAKA_GCC_VER=6   ALPAKA_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug   ALPAKA_CI_BOOST_BRANCH=boost-1.63.0 ALPAKA_CI_CMAKE_VER=3.7.2 ALPAKA_CI_SANITIZERS=TSan+UBSan+ESan OMP_NUM_THREADS=4
 
         # Release builds
-        - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.9.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.8.2 ALPAKA_CI_SANITIZERS=ASan+UBSan+ESan OMP_NUM_THREADS=4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.0 ALPAKA_CUDA_COMPILER=clang
+        - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.9.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.9.1 ALPAKA_CI_SANITIZERS=ASan+UBSan+ESan OMP_NUM_THREADS=4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.0 ALPAKA_CUDA_COMPILER=clang
         - ALPAKA_GCC_VER=4.9 ALPAKA_CLANG_VER=3.6.2 CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.7.2 ALPAKA_CI_SANITIZERS=TSan+UBSan+ESan OMP_NUM_THREADS=3 ALPAKA_ACC_GPU_CUDA_ENABLE=ON  ALPAKA_CUDA_VER=7.5 ALPAKA_CUDA_COMPILER=nvcc
         - ALPAKA_GCC_VER=7   ALPAKA_CLANG_VER=3.5.2 CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.63.0 ALPAKA_CI_CMAKE_VER=3.8.2 ALPAKA_CI_SANITIZERS=ASan+UBSan+ESan OMP_NUM_THREADS=2
         - ALPAKA_GCC_VER=5   ALPAKA_CLANG_VER=3.9.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.7.2 ALPAKA_CI_SANITIZERS=                OMP_NUM_THREADS=4

diff --git a/README.md b/README.md
@@ -82,7 +82,7 @@ Dependencies
 The **alpaka** library itself just requires header-only libraries.
 However some of the accelerator back-end implementations require different boost libraries to be built.
 
-When an accelerator back-end using *Boost.Fiber* is enabled, `boost-fiber`, `boost-context` and all of its dependencies are required to be build in C++11 mode `./b2 cxxflags="-std=c++11"`.
+When an accelerator back-end using *Boost.Fiber* is enabled, `boost-fiber` and all of its dependencies are required to be build in C++11 mode `./b2 cxxflags="-std=c++11"`.
 
 When an accelerator back-end using *CUDA* is enabled, version *7.0* of the *CUDA SDK* is the minimum requirement.
 *NOTE*: When using nvcc as *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with the *Boost.Fiber accelerator back-end* due to bugs in the nvcc compiler.

diff --git a/alpakaConfig.cmake b/alpakaConfig.cmake
@@ -134,9 +134,9 @@ IF(${ALPAKA_DEBUG} GREATER 1)
     SET(Boost_DETAILED_FAILURE_MSG ON)
 ENDIF()
 IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE)
-    FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET COMPONENTS fiber context system thread atomic chrono date_time)
+    FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET COMPONENTS fiber context system thread chrono date_time)
     IF(NOT Boost_FIBER_FOUND)
-        MESSAGE(WARNING "Optional alpaka dependency Boost fiber could not be found! Fibers back-end disabled!")
+        MESSAGE(STATUS "Optional alpaka dependency Boost fiber could not be found! Fibers back-end disabled!")
         SET(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE OFF CACHE BOOL "Enable the Fibers CPU back-end" FORCE)
         FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET)
     ENDIF()
@@ -213,7 +213,7 @@ ENDIF()
 IF(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE)
     FIND_PACKAGE(TBB 2.2)
     IF(NOT TBB_FOUND)
-        MESSAGE(WARNING "Optional alpaka dependency TBB could not be found! TBB grid block back-end disabled!")
+        MESSAGE(STATUS "Optional alpaka dependency TBB could not be found! TBB grid block back-end disabled!")
         SET(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE OFF CACHE BOOL "Enable the TBB grid block back-end" FORCE)
     ELSE()
         LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC ${TBB_LIBRARIES})
@@ -243,7 +243,7 @@ IF(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OR ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OR A
     ENDIF()
 
     IF(NOT OPENMP_FOUND)
-        MESSAGE(WARNING "Optional alpaka dependency OpenMP could not be found! OpenMP back-ends disabled!")
+        MESSAGE(STATUS "Optional alpaka dependency OpenMP could not be found! OpenMP back-ends disabled!")
         SET(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OFF CACHE BOOL "Enable the OpenMP 2.0 CPU grid block back-end" FORCE)
         SET(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OFF CACHE BOOL "Enable the OpenMP 2.0 CPU block thread back-end" FORCE)
         SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE OFF CACHE BOOL "Enable the OpenMP 4.0 CPU block and thread back-end" FORCE)
@@ -276,7 +276,7 @@ IF(ALPAKA_ACC_GPU_CUDA_ENABLE)
     ELSE()
         FIND_PACKAGE(CUDA "${ALPAKA_CUDA_VERSION}")
         IF(NOT CUDA_FOUND)
-            MESSAGE(WARNING "Optional alpaka dependency CUDA could not be found! CUDA back-end disabled!")
+            MESSAGE(STATUS "Optional alpaka dependency CUDA could not be found! CUDA back-end disabled!")
             SET(ALPAKA_ACC_GPU_CUDA_ENABLE OFF CACHE BOOL "Enable the CUDA GPU back-end" FORCE)
 
         ELSE()
@@ -323,6 +323,9 @@ IF(ALPAKA_ACC_GPU_CUDA_ENABLE)
                 ENDIF()
 
             ELSE()
+                # Clean up the flags. Else, multiple find calls would result in duplicate flags. Furthermore, other modules may have set different settings.
+                SET(CUDA_NVCC_FLAGS)
+
                 IF(${ALPAKA_DEBUG} GREATER 1)
                     SET(CUDA_VERBOSE_BUILD ON)
                 ENDIF()

diff --git a/cmake/addExecutable.cmake b/cmake/addExecutable.cmake
@@ -41,7 +41,11 @@ FUNCTION(ALPAKA_ADD_EXECUTABLE In_Name)
                     SET_SOURCE_FILES_PROPERTIES(${_file} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
                 ENDIF()
             ENDFOREACH()
-            CMAKE_POLICY(SET CMP0023 OLD)   # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords.
+            IF (CMAKE_VERSION VERSION_LESS 3.9.0)
+                CMAKE_POLICY(SET CMP0023 OLD)   # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords.
+            ELSE()
+                SET(CUDA_LINK_LIBRARIES_KEYWORD "PUBLIC")
+            ENDIF()
             CUDA_ADD_EXECUTABLE(
                 ${In_Name}
                 ${ARGN})

diff --git a/cmake/addLibrary.cmake b/cmake/addLibrary.cmake
@@ -119,7 +119,11 @@ FUNCTION(ALPAKA_ADD_LIBRARY libraryName)
                     SET_SOURCE_FILES_PROPERTIES( ${_file} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ )
                 ENDIF()
             ENDFOREACH()
-            CMAKE_POLICY(SET CMP0023 OLD)   # CUDA_ADD_LIBRARY calls TARGET_LINK_LIBRARIES without keywords.
+            IF (CMAKE_VERSION VERSION_LESS 3.9.0)
+                CMAKE_POLICY(SET CMP0023 OLD)   # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords.
+            ELSE()
+                SET(CUDA_LINK_LIBRARIES_KEYWORD "PUBLIC")
+            ENDIF()
             CUDA_ADD_LIBRARY(
                 ${libraryName}
                 ${sourceFileNames}

diff --git a/doc/markdown/user/implementation/library/Rationale.md b/doc/markdown/user/implementation/library/Rationale.md
@@ -74,7 +74,7 @@ By itself this is no problem, but how can be assured that a two-dimensional kern
 How can it be assured that a kernel which only uses `threadIdx.x` or equivalently calls `get_global_id(0)` will not get called with two dimensional grid and block extents?
 Because the result in such a case is undefined, and most of the time not wanted by the kernel author, this should be easy to check and reject at compile-time.
 In *alpaka* all accelerators are templatized on the dimensionality.
-This allows a two-dimensional image filter to assert that it is only  called with a two dimensional accelerator.
+This allows a two-dimensional image filter to assert that it is only called with a two dimensional accelerator.
 Thereby the algorithms can check for supported dimensionality of the accelerator at compile time instead of runtime.
 Furthermore with the dimension being a template parameter, the CPU back-end implementations are able to use only the number of nested loops really necessary instead of the 6 loops (2 x 3 loops for grid blocks and block threads), which are mandatory to emulate the *CUDA* threaded blocking scheme.
 

diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp
@@ -24,7 +24,7 @@
 
 #include <iostream>
 #include <cstdint>
-#include <cassert>
+#include <cassert>              // assert
 
 /**
  * Prints all elements of the buffer.

diff --git a/include/alpaka/acc/AccCpuFibers.hpp b/include/alpaka/acc/AccCpuFibers.hpp
@@ -52,7 +52,6 @@
 #include <boost/core/ignore_unused.hpp>         // boost::ignore_unused
 #include <boost/predef.h>                       // workarounds
 
-#include <cassert>                              // assert
 #include <memory>                               // std::unique_ptr
 #include <typeinfo>                             // typeid
 

diff --git a/include/alpaka/acc/AccCpuThreads.hpp b/include/alpaka/acc/AccCpuThreads.hpp
@@ -49,7 +49,6 @@
 #include <boost/core/ignore_unused.hpp>             // boost::ignore_unused
 #include <boost/predef.h>                           // workarounds
 
-#include <cassert>                                  // assert
 #include <memory>                                   // std::unique_ptr
 #include <thread>                                   // std::thread
 #include <typeinfo>                                 // typeid

diff --git a/include/alpaka/core/Assert.hpp b/include/alpaka/core/Assert.hpp
@@ -28,6 +28,7 @@
 #endif
 #include <boost/predef.h>                   // workarounds
 
+#include <cassert>                          // assert
 #include <type_traits>                      // std::enable_if
 
 namespace alpaka

diff --git a/include/alpaka/core/Cuda.hpp b/include/alpaka/core/Cuda.hpp
@@ -114,15 +114,9 @@ namespace alpaka
             {
                 if(error != cudaSuccess)
                 {
-                    // Disable the incorrect warning see: http://stackoverflow.com/questions/13905200/is-it-wise-to-ignore-gcc-clangs-wmissing-braces-warning
-#if BOOST_COMP_CLANG
-    #pragma clang diagnostic push
-    #pragma clang diagnostic ignored "-Wmissing-braces"
-#endif
-                    std::array<cudaError_t, sizeof...(ignoredErrorCodes)> const aIgnoredErrorCodes{std::forward<TErrors>(ignoredErrorCodes)...};
-#if BOOST_COMP_CLANG
-    #pragma clang diagnostic pop
-#endif
+                    // https://stackoverflow.com/questions/18792731/can-we-omit-the-double-braces-for-stdarray-in-c14/18792782#18792782
+                    std::array<cudaError_t, sizeof...(ignoredErrorCodes)> const aIgnoredErrorCodes{{ignoredErrorCodes...}};
+
                     // If the error code is not one of the ignored ones.
                     if(std::find(aIgnoredErrorCodes.cbegin(), aIgnoredErrorCodes.cend(), error) == aIgnoredErrorCodes.cend())
                     {

diff --git a/include/alpaka/event/EventCpu.hpp b/include/alpaka/event/EventCpu.hpp
@@ -34,6 +34,7 @@
 #include <boost/uuid/uuid_generators.hpp>   // boost::uuids::random_generator
 #include <boost/core/ignore_unused.hpp>     // boost::ignore_unused
 
+#include <cassert>                          // assert
 #include <mutex>                            // std::mutex
 #include <condition_variable>               // std::condition_variable
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
@@ -88,7 +89,7 @@ namespace alpaka
                     ALPAKA_FN_HOST ~EventCpuImpl() noexcept
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
                     {
-                        // If a event is enqueued to a stream and gets waited on but destructed before it is completed it is kept alive until completed.
+                        // If an event is enqueued to a stream and gets waited on but destructed before it is completed it is kept alive until completed.
                         // This can never happen.
                         assert(!m_bIsWaitedFor);
                     }

diff --git a/include/alpaka/exec/ExecCpuOmp2Blocks.hpp b/include/alpaka/exec/ExecCpuOmp2Blocks.hpp
@@ -45,7 +45,8 @@
 #include <alpaka/core/OpenMp.hpp>
 #include <alpaka/meta/ApplyTuple.hpp>           // meta::apply
 
-#include <cassert>                              // assert
+#include <boost/assert.hpp>                     // BOOST_VERIFY
+
 #include <stdexcept>                            // std::runtime_error
 #include <tuple>                                // std::tuple
 #include <type_traits>                          // std::decay
@@ -159,7 +160,7 @@ namespace alpaka
                 // The number of blocks in the grid.
                 TSize const numBlocksInGrid(gridBlockExtent.prod());
                 // There is only ever one thread in a block in the OpenMP 2.0 block accelerator.
-                assert(blockThreadExtent.prod() == static_cast<TSize>(1u));
+                BOOST_VERIFY(blockThreadExtent.prod() == static_cast<TSize>(1u));
 
                 // Force the environment to use the given number of threads.
                 int const ompIsDynamic(::omp_get_dynamic());

diff --git a/include/alpaka/exec/ExecCpuOmp2Threads.hpp b/include/alpaka/exec/ExecCpuOmp2Threads.hpp
@@ -47,7 +47,6 @@
 
 #include <boost/core/ignore_unused.hpp>         // boost::ignore_unused
 
-#include <cassert>                              // assert
 #include <stdexcept>                            // std::runtime_error
 #include <tuple>                                // std::tuple
 #include <type_traits>                          // std::decay

diff --git a/include/alpaka/exec/ExecCpuOmp4.hpp b/include/alpaka/exec/ExecCpuOmp4.hpp
@@ -45,7 +45,6 @@
 #include <alpaka/core/OpenMp.hpp>
 #include <alpaka/meta/ApplyTuple.hpp>           // meta::apply
 
-#include <cassert>                              // assert
 #include <stdexcept>                            // std::runtime_error
 #include <tuple>                                // std::tuple
 #include <type_traits>                          // std::decay

diff --git a/include/alpaka/exec/ExecCpuSerial.hpp b/include/alpaka/exec/ExecCpuSerial.hpp
@@ -41,8 +41,8 @@
 #include <alpaka/meta/ApplyTuple.hpp>           // meta::apply
 
 #include <boost/core/ignore_unused.hpp>         // boost::ignore_unused
+#include <boost/assert.hpp>                     // BOOST_VERIFY
 
-#include <cassert>                              // assert
 #include <tuple>                                // std::tuple
 #include <type_traits>                          // std::decay
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
@@ -156,7 +156,7 @@ namespace alpaka
                     blockSharedMemDynSizeBytes);
 
                 // There is only ever one thread in a block in the serial accelerator.
-                assert(blockThreadExtent.prod() == static_cast<TSize>(1u));
+                BOOST_VERIFY(blockThreadExtent.prod() == static_cast<TSize>(1u));
 
                 // Execute the blocks serially.
                 meta::ndLoopIncIdx(

diff --git a/include/alpaka/exec/ExecCpuTbbBlocks.hpp b/include/alpaka/exec/ExecCpuTbbBlocks.hpp
@@ -41,7 +41,8 @@
 #include <alpaka/meta/NdLoop.hpp>               // meta::ndLoopIncIdx
 #include <alpaka/meta/ApplyTuple.hpp>           // meta::apply
 
-#include <cassert>                              // assert
+#include <boost/assert.hpp>                     // BOOST_VERIFY
+
 #include <stdexcept>                            // std::runtime_error
 #include <tuple>                                // std::tuple
 #include <type_traits>                          // std::decay
@@ -159,7 +160,7 @@ namespace alpaka
                 TSize const numBlocksInGrid(gridBlockExtent.prod());
 
                 // There is only ever one thread in a block in the TBB accelerator.
-                assert(blockThreadExtent.prod() == static_cast<TSize>(1u));
+                BOOST_VERIFY(blockThreadExtent.prod() == static_cast<TSize>(1u));
 
                 tbb::parallel_for(
                     static_cast<TSize>(0),

diff --git a/include/alpaka/exec/ExecGpuCudaRt.hpp b/include/alpaka/exec/ExecGpuCudaRt.hpp
@@ -57,6 +57,7 @@
 #include <alpaka/meta/Metafunctions.hpp>        // meta::Conjunction
 
 #include <boost/predef.h>                       // workarounds
+#include <boost/assert.hpp>                     // BOOST_VERIFY
 
 #include <stdexcept>                            // std::runtime_error
 #include <tuple>                                // std::tuple
@@ -335,9 +336,9 @@ namespace alpaka
                     // Assert that all extent of the higher dimensions are 1!
                     for(auto i(std::min(static_cast<typename TDim::value_type>(3), TDim::value)); i<TDim::value; ++i)
                     {
-                        assert(gridBlockExtent[TDim::value-1u-i] == 1);
-                        assert(blockThreadExtent[TDim::value-1u-i] == 1);
-                        assert(threadElemExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(gridBlockExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(blockThreadExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(threadElemExtent[TDim::value-1u-i] == 1);
                     }
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
@@ -472,9 +473,9 @@ namespace alpaka
                     // Assert that all extent of the higher dimensions are 1!
                     for(auto i(std::min(static_cast<typename TDim::value_type>(3), TDim::value)); i<TDim::value; ++i)
                     {
-                        assert(gridBlockExtent[TDim::value-1u-i] == 1);
-                        assert(blockThreadExtent[TDim::value-1u-i] == 1);
-                        assert(threadElemExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(gridBlockExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(blockThreadExtent[TDim::value-1u-i] == 1);
+                        BOOST_VERIFY(threadElemExtent[TDim::value-1u-i] == 1);
                     }
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL

diff --git a/include/alpaka/idx/bt/IdxBtOmp.hpp b/include/alpaka/idx/bt/IdxBtOmp.hpp
@@ -31,6 +31,8 @@
 
 #include <boost/core/ignore_unused.hpp>     // boost::ignore_unused
 
+#include <cassert>                          // assert
+
 namespace alpaka
 {
     namespace idx

diff --git a/include/alpaka/idx/bt/IdxBtRefFiberIdMap.hpp b/include/alpaka/idx/bt/IdxBtRefFiberIdMap.hpp
@@ -30,6 +30,7 @@
 #include <boost/core/ignore_unused.hpp>     // boost::ignore_unused
 
 #include <map>                              // std::map
+#include <cassert>                          // assert
 
 namespace alpaka
 {

diff --git a/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp b/include/alpaka/idx/bt/IdxBtRefThreadIdMap.hpp
@@ -29,6 +29,7 @@
 
 #include <thread>                           // std::thread
 #include <map>                              // std::map
+#include <cassert>                          // assert
 
 namespace alpaka
 {

diff --git a/include/alpaka/mem/buf/BufCpu.hpp b/include/alpaka/mem/buf/BufCpu.hpp
@@ -42,7 +42,6 @@
     #include <boost/core/ignore_unused.hpp>     // boost::ignore_unused
 #endif
 
-#include <cassert>                              // assert
 #include <memory>                               // std::shared_ptr
 
 namespace alpaka