From 843c81c90ecbd2283630149a1587fb46809abacf Mon Sep 17 00:00:00 2001 From: waytrue17 <52505574+waytrue17@users.noreply.github.com> Date: Tue, 26 May 2020 10:16:30 -0700 Subject: [PATCH] [v1.7.x] update jetson dockerfile to support CUDA 10.0 (#18339) * update dockerfile for jetson * add toolchain files * update build_jetson function * update ubuntu_julia.sh * update FindCUDAToolkit.cmake * Update centos7_python.sh * revert changes on ubuntu_julia.sh * disable TVM for gpu build * Disable TVM_OP on GPU builds Co-authored-by: Wei Chu Co-authored-by: Leonard Lausen --- ci/docker/Dockerfile.build.jetson | 96 ++++---- ci/docker/install/centos7_python.sh | 2 +- ci/docker/runtime_functions.sh | 68 +++--- .../aarch64-linux-gnu-toolchain.cmake | 28 +++ .../arm-linux-gnueabihf-toolchain.cmake | 27 +++ ci/jenkins/Jenkins_steps.groovy | 44 ++-- ci/jenkins/Jenkinsfile_unix_gpu | 7 +- cmake/Modules/FindCUDAToolkit.cmake | 205 +++++++++++++----- 8 files changed, 288 insertions(+), 189 deletions(-) create mode 100644 ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake create mode 100644 ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson index e31ee43a93d8..93fe5e0a5b0d 100644 --- a/ci/docker/Dockerfile.build.jetson +++ b/ci/docker/Dockerfile.build.jetson @@ -20,68 +20,58 @@ # This script assumes /work/mxnet exists and contains the mxnet code you wish to compile and # that /work/build exists and is the target for your output. -FROM nvidia/cuda:9.0-cudnn7-devel as cudabuilder +FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -FROM dockcross/linux-arm64 +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + TARGET=ARMV8 -ENV ARCH aarch64 -ENV HOSTCC gcc -ENV TARGET ARMV8 +WORKDIR /usr/local -# gh issue #11567 https://github.com/apache/incubator-mxnet/issues/11567 -#RUN sed -i '\#deb http://cdn-fastly.deb.debian.org/debian-security jessie/updates main#d' /etc/apt/sources.list -#RUN sed -i 's/cdn-fastly.//' /etc/apt/sources.list +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + git \ + curl \ + zip \ + unzip \ + python3 \ + python3-pip \ + awscli \ + crossbuild-essential-arm64 \ + && rm -rf /var/lib/apt/lists/* +# cmake on Ubuntu 18.04 is too old +RUN python3 -m pip install cmake -WORKDIR /work/deps - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - +# ccache on Ubuntu 18.04 is too old to support Cuda correctly COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh -# Setup CUDA build env (including configuring and copying nvcc) -COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda -ENV TARGET_ARCH aarch64 -ENV TARGET_OS linux +COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr +ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu install && \ + cd /usr/local && \ + rm -rf OpenBLAS -# Install ARM depedencies based on Jetpack 3.3 -RUN JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39 && \ - CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \ - ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \ - ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \ - ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \ - ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \ - ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb && \ - dpkg --add-architecture arm64 && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDA_INSTALLER_PACKAGE && \ - apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \ - dpkg -i --force-architecture $ARM_CUDNN_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \ - dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - apt update -y || true && apt install -y cuda-libraries-dev-9-0 libcudnn7-dev libnvinfer-dev -RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h /usr/include/aarch64-linux-gnu/cudnn.h -ENV PATH $PATH:/usr/local/cuda/bin -ENV NVCCFLAGS "-m64" -ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62" -ENV NVCC /usr/local/cuda/bin/nvcc +# Install aarch64 cross depedencies based on Jetpack 4.3 +# Manually downloaded using SDK Manager tool and placed in a private S3 bucket. +# We're not allowed to redistribute these files and there is no public version. +RUN aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb . && \ + dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + rm cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + apt-key add /var/cuda-repo-10-0-local-10.0.326-410.108/7fa2af80.pub && \ + aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb . && \ + dpkg -i cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + rm cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + apt-get update && \ + apt-get install -y -f && \ + apt-get install -y cuda-cross-aarch64 cuda-cross-aarch64-10-0 && \ + rm -rf /var/lib/apt/lists/* ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/install/centos7_python.sh b/ci/docker/install/centos7_python.sh index 734f05c3830a..5ca08b8fedf8 100755 --- a/ci/docker/install/centos7_python.sh +++ b/ci/docker/install/centos7_python.sh @@ -23,7 +23,7 @@ set -ex # Python 2.7 is installed by default, install 3.6 on top -yum -y install https://repo.ius.io/ius-release-el7.rpm +yum -y install https://repo.ius.io/ius-release-el7.rpm yum -y install python36u # Install PIP diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index d5de024e49ec..d8f0cbed7186 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -249,15 +249,22 @@ build_dynamic_libmxnet() { build_jetson() { set -ex - pushd . - - #build_ccache_wrappers - - cp make/crosscompile.jetson.mk ./config.mk - make -j$(nproc) - - build_wheel /work/mxnet/python /work/mxnet/lib - popd + cd /work/build + cmake \ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DUSE_CUDA=ON \ + -DMXNET_CUDA_ARCH="5.2" \ + -DENABLE_CUDA_RTC=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=ON \ + -DUSE_LAPACK=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MKL_IF_AVAILABLE=OFF \ + -G Ninja /work/mxnet + ninja + build_wheel } # @@ -772,7 +779,7 @@ build_ubuntu_gpu_mkldnn() { USE_CUDA=1 \ USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ - USE_TVM_OP=1 \ + USE_TVM_OP=0 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ -j$(nproc) @@ -789,34 +796,13 @@ build_ubuntu_gpu_mkldnn_nocudnn() { USE_CUDA=1 \ USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=0 \ - USE_TVM_OP=1 \ + USE_TVM_OP=0 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ -j$(nproc) } build_ubuntu_gpu_cuda101_cudnn7() { - set -ex - build_ccache_wrappers - make \ - DEV=1 \ - USE_BLAS=openblas \ - USE_MKLDNN=0 \ - USE_CUDA=1 \ - USE_CUDA_PATH=/usr/local/cuda \ - USE_CUDNN=1 \ - USE_TVM_OP=1 \ - USE_CPP_PACKAGE=1 \ - USE_DIST_KVSTORE=1 \ - CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ - USE_SIGNAL_HANDLER=1 \ - -j$(nproc) - - make cython PYTHON=python2 - make cython PYTHON=python3 -} - -build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op() { set -ex build_ccache_wrappers make \ @@ -867,7 +853,7 @@ build_ubuntu_gpu_cmake_mkldnn() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=1 \ -DUSE_CUDNN=1 \ - -DUSE_TVM_OP=1 \ + -DUSE_TVM_OP=0 \ -DPython3_EXECUTABLE=/usr/bin/python3 \ -DUSE_MKLML_MKL=1 \ -DCMAKE_BUILD_TYPE=Release \ @@ -892,7 +878,7 @@ build_ubuntu_gpu_cmake() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=ON \ + -DUSE_TVM_OP=OFF \ -DPython3_EXECUTABLE=/usr/bin/python3 \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ @@ -904,17 +890,14 @@ build_ubuntu_gpu_cmake() { -G Ninja \ /work/mxnet - ninja -v + ninja } -build_ubuntu_gpu_cmake_no_tvm_op() { +build_ubuntu_gpu_cmake_no_rtc() { set -ex cd /work/build build_ccache_wrappers cmake \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ @@ -922,15 +905,16 @@ build_ubuntu_gpu_cmake_no_tvm_op() { -DPython3_EXECUTABLE=/usr/bin/python3 \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ - -DUSE_MKLDNN=OFF \ + -DUSE_MKLDNN=ON \ -DUSE_DIST_KVSTORE=ON \ -DCMAKE_BUILD_TYPE=Release \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DBUILD_CYTHON_MODULES=1 \ + -DENABLE_CUDA_RTC=OFF \ -G Ninja \ /work/mxnet - ninja -v + ninja } build_ubuntu_cpu_large_tensor() { @@ -964,7 +948,7 @@ build_ubuntu_gpu_large_tensor() { -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ - -DUSE_TVM_OP=ON \ + -DUSE_TVM_OP=OFF \ -DPython3_EXECUTABLE=/usr/bin/python3 \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLML_MKL=OFF \ diff --git a/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake new file mode 100644 index 000000000000..3780415c4b15 --- /dev/null +++ b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "aarch64") +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) +set(CMAKE_CUDA_HOST_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_FIND_ROOT_PATH "/usr/aarch64-linux-gnu") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake new file mode 100644 index 000000000000..62038ecee16a --- /dev/null +++ b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "armv7l") +set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc) +set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++) +set(CMAKE_FIND_ROOT_PATH "/usr/arm-linux-gnueabihf" "/usr/local/arm-linux-gnueabihf") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index be66350b99d9..5345c78a9e7e 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -261,14 +261,14 @@ def compile_unix_full_gpu() { }] } -def compile_unix_full_gpu_no_tvm_op() { - return ['GPU: CUDA10.1+cuDNN7 TVM_OP OFF': { +def compile_unix_full_gpu_mkldnn_cpp_test() { + return ['GPU: CUDA10.1+cuDNN7+MKLDNN+CPPTEST': { node(NODE_LINUX_CPU) { - ws('workspace/build-gpu-no-tvm-op') { + ws('workspace/build-gpu-mkldnn-cpp') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_no_tvm_op', false) - utils.pack_lib('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) + utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_mkldnn_cpp_test', false) + utils.pack_lib('gpu_mkldnn_cpp_test', mx_lib_cpp_capi) } } } @@ -303,16 +303,16 @@ def compile_unix_cmake_gpu() { }] } -def compile_unix_cmake_gpu_no_tvm_op() { - return ['GPU: CMake TVM_OP OFF': { - node(NODE_LINUX_CPU) { - ws('workspace/build-cmake-gpu-no-tvm-op') { - timeout(time: max_time, unit: 'MINUTES') { - utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_tvm_op', false) - } +def compile_unix_cmake_gpu_no_rtc() { + return ['GPU: CMake CUDA RTC OFF': { + node(NODE_LINUX_CPU) { + ws('workspace/build-cmake-gpu-no-rtc') { + timeout(time: max_time, unit: 'MINUTES') { + utils.init_git() + utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false) + } + } } - } }] } @@ -799,22 +799,6 @@ def test_unix_python3_gpu() { }] } -def test_unix_python3_gpu_no_tvm_op() { - return ['Python3: GPU TVM_OP OFF': { - node(NODE_LINUX_GPU) { - ws('workspace/ut-python3-gpu-no-tvm-op') { - try { - utils.unpack_and_init('gpu_no_tvm_op', mx_lib_cpp_examples_no_tvm_op) - python3_gpu_ut_cython('ubuntu_gpu_cu101') - utils.publish_test_coverage() - } finally { - utils.collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_gpu.xml') - } - } - } - }] -} - def test_unix_python3_quantize_gpu() { return ['Python3: Quantize GPU': { node(NODE_LINUX_GPU_P3) { diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index 18e27198c330..e3ff31933125 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -41,8 +41,8 @@ core_logic: { custom_steps.compile_unix_cmake_gpu(), custom_steps.compile_unix_tensorrt_gpu(), custom_steps.compile_unix_int64_gpu(), - custom_steps.compile_unix_full_gpu_no_tvm_op(), - custom_steps.compile_unix_cmake_gpu_no_tvm_op(), + custom_steps.compile_unix_cmake_gpu_no_rtc(), + custom_steps.compile_unix_full_gpu_mkldnn_cpp_test() ]) utils.parallel_stage('Tests', [ @@ -63,7 +63,8 @@ core_logic: { custom_steps.test_unix_scala_gpu(), custom_steps.test_unix_distributed_kvstore_gpu(), custom_steps.test_static_python_gpu(), - custom_steps.test_unix_python3_gpu_no_tvm_op(), + custom_steps.test_static_python_gpu_cmake(), + custom_steps.test_unix_capi_cpp_package(), // Disabled due to: https://github.com/apache/incubator-mxnet/issues/11407 //custom_steps.test_unix_caffe_gpu() diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake index d37c44d9c782..fee4f3f4f698 100644 --- a/cmake/Modules/FindCUDAToolkit.cmake +++ b/cmake/Modules/FindCUDAToolkit.cmake @@ -132,6 +132,7 @@ of the following libraries that are part of the CUDAToolkit: - :ref:`cuRAND` - :ref:`cuSOLVER` - :ref:`cuSPARSE` +- :ref:`cuPTI` - :ref:`NPP` - :ref:`nvBLAS` - :ref:`nvGRAPH` @@ -149,7 +150,6 @@ CUDA Runtime Library The CUDA Runtime library (cudart) are what most applications will typically need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. -They are an explicit dependency of almost every library. Targets Created: @@ -230,6 +230,18 @@ Targets Created: - ``CUDA::cusparse`` - ``CUDA::cusparse_static`` +.. _`cuda_toolkit_cupti`: + +cupti +""""" + +The `NVIDIA CUDA Profiling Tools Interface `_. + +Targets Created: + +- ``CUDA::cupti`` +- ``CUDA::cupti_static`` + .. _`cuda_toolkit_NPP`: NPP @@ -361,8 +373,6 @@ Targets Created: - ``CUDA::nvml`` -.. _`cuda_toolkit_opencl`: - .. _`cuda_toolkit_nvToolsExt`: nvToolsExt @@ -375,6 +385,8 @@ Targets Created: - ``CUDA::nvToolsExt`` +.. _`cuda_toolkit_opencl`: + OpenCL """""" @@ -436,6 +448,11 @@ Result variables The path to the CUDA Toolkit library directory that contains the CUDA Runtime library ``cudart``. +``CUDAToolkit_TARGET_DIR`` + The path to the CUDA Toolkit directory including the target architecture + when cross-compiling. When not cross-compiling this will be equivalant to + ``CUDAToolkit_ROOT_DIR``. + ``CUDAToolkit_NVCC_EXECUTABLE`` The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may **not** be the same as @@ -487,6 +504,7 @@ if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR) get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) # use the already detected cuda compiler set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -641,6 +659,7 @@ endif() if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -669,8 +688,47 @@ endif() get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) -# Now that we have the real ROOT_DIR, find components inside it. -list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) +# Handle cross compilation +if(CMAKE_CROSSCOMPILING) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + # Support for arm cross compilation + set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # Support for aarch64 cross compilation + if (ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") + else() + set(CUDAToolkit_TARGET_NAME "aarch64-linux") + endif (ANDROID_ARCH_NAME STREQUAL "arm64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAME "x86_64-linux") + endif() + + if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATh + set(_CUDAToolkit_Pop_ROOT_PATH True) + endif() +else() + # Not cross compiling + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") + # Now that we have the real ROOT_DIR, find components inside it. + list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + + # Mark that we need to pop the prefix path changes after we have + # found the cudart library. + set(_CUDAToolkit_Pop_Prefix True) +endif() + # Find the include/ directory find_path(CUDAToolkit_INCLUDE_DIR @@ -680,14 +738,17 @@ find_path(CUDAToolkit_INCLUDE_DIR # And find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart - PATH_SUFFIXES lib64 lib/x64 + PATH_SUFFIXES lib64 lib64/stubs lib/x64 ) if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) message(STATUS "Unable to find cudart library.") endif() unset(CUDAToolkit_ROOT_DIR) -list(REMOVE_AT CMAKE_PREFIX_PATH -1) +if(_CUDAToolkit_Pop_Prefix) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + unset(_CUDAToolkit_Pop_Prefix) +endif() #----------------------------------------------------------------------------- # Perform version comparison and validate all required variables are set. @@ -702,6 +763,10 @@ find_package_handle_standard_args(CUDAToolkit VERSION_VAR CUDAToolkit_VERSION ) +mark_as_advanced(CUDA_CUDART + CUDAToolkit_INCLUDE_DIR + CUDAToolkit_NVCC_EXECUTABLE + ) #----------------------------------------------------------------------------- # Construct result variables @@ -714,78 +779,103 @@ endif() # Construct import targets if(CUDAToolkit_FOUND) - function(find_and_add_cuda_import_lib lib_name) + function(_CUDAToolkit_find_and_add_import_lib lib_name) + cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN}) - if(ARGC GREATER 1) - set(search_names ${ARGN}) - else() - set(search_names ${lib_name}) - endif() + set(search_names ${lib_name} ${arg_ALT}) find_library(CUDA_${lib_name}_LIBRARY NAMES ${search_names} - PATHS ${CUDAToolkit_LIBRARY_DIR} + HINTS ${CUDAToolkit_LIBRARY_DIR} ENV CUDA_PATH - PATH_SUFFIXES nvidia/current lib64 lib/x64 lib + PATH_SUFFIXES nvidia/current lib64 lib64/stubs lib/x64 lib lib/stubs stubs + ${arg_EXTRA_PATH_SUFFIXES} ) + mark_as_advanced(CUDA_${lib_name}_LIBRARY) - if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) add_library(CUDA::${lib_name} IMPORTED INTERFACE) target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") + foreach(dep ${arg_DEPS}) + if(TARGET CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + endif() + endforeach() endif() endfunction() - function(add_cuda_link_dependency lib_name) - foreach(dependency IN LISTS ${ARGN}) - target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) - endforeach() - endfunction() + if(NOT TARGET CUDA::toolkit) + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + endif() - add_library(CUDA::toolkit IMPORTED INTERFACE) - target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") - target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda) + _CUDAToolkit_find_and_add_import_lib(cudart) + _CUDAToolkit_find_and_add_import_lib(cudart_static) - find_and_add_cuda_import_lib(cuda_driver cuda) + # setup dependencies that are required for cudart_static when building + # on linux. These are generally only required when using the CUDA toolkit + # when CUDA language is disabled + if(NOT TARGET CUDA::cudart_static_deps + AND TARGET CUDA::cudart_static) - find_and_add_cuda_import_lib(cudart) - find_and_add_cuda_import_lib(cudart_static) + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) + target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) - foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg) - find_and_add_cuda_import_lib(${cuda_lib}) - add_cuda_link_dependency(${cuda_lib} cudart) + if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER)) + find_package(Threads REQUIRED) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib}_static cudart_static) + if(UNIX AND NOT APPLE) + # On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDAToolkit_rt_LIBRARY rt) + mark_as_advanced(CUDAToolkit_rt_LIBRARY) + if(NOT CUDAToolkit_rt_LIBRARY) + message(WARNING "Could not find librt library, needed by CUDA::cudart_static") + else() + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library + foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos) endforeach() + # cuFFTW depends on cuFFT + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft) + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static) + # cuSOLVER depends on cuBLAS, and cuSPARSE - add_cuda_link_dependency(cusolver cublas cusparse) - add_cuda_link_dependency(cusolver_static cublas_static cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos) # nvGRAPH depends on cuRAND, and cuSOLVER. - add_cuda_link_dependency(nvgraph curand cusolver) - add_cuda_link_dependency(nvgraph_static curand_static cusolver_static) - - find_and_add_cuda_import_lib(nppc) - find_and_add_cuda_import_lib(nppc_static) - - add_cuda_link_dependency(nppc cudart) - add_cuda_link_dependency(nppc_static cudart_static culibos) + _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver) + _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static) # Process the majority of the NPP libraries. foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) - find_and_add_cuda_import_lib(${cuda_lib}) - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib} nppc) - add_cuda_link_dependency(${cuda_lib}_static nppc_static) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static) endforeach() - find_and_add_cuda_import_lib(nvrtc) - add_cuda_link_dependency(nvrtc cuda_driver) + _CUDAToolkit_find_and_add_import_lib(cupti + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + _CUDAToolkit_find_and_add_import_lib(cupti_static + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + + _CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver) - find_and_add_cuda_import_lib(nvml nvidia-ml nvml) + _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml) if(WIN32) # nvtools can be installed outside the CUDA toolkit directory @@ -798,17 +888,12 @@ if(CUDAToolkit_FOUND) PATH_SUFFIXES lib/x64 lib ) endif() - find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64) + _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64) - add_cuda_link_dependency(nvToolsExt cudart) - - find_and_add_cuda_import_lib(OpenCL) - - find_and_add_cuda_import_lib(culibos) - if(TARGET CUDA::culibos) - foreach (cuda_lib cublas cufft cusparse curand nvjpeg) - add_cuda_link_dependency(${cuda_lib}_static culibos) - endforeach() - endif() + _CUDAToolkit_find_and_add_import_lib(OpenCL) +endif() +if(_CUDAToolkit_Pop_ROOT_PATH) + list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0) + unset(_CUDAToolkit_Pop_ROOT_PATH) endif()