diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cf24bbcae53..1ca92ff19a93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,7 +180,7 @@ endif() if(NOT mxnet_LINKER_LIBS) set(mxnet_LINKER_LIBS "") -endif(NOT mxnet_LINKER_LIBS) +endif() if(USE_GPROF) message(STATUS "Using GPROF") diff --git a/ci/build.py b/ci/build.py index a21ec44942a8..cbc41218f042 100755 --- a/ci/build.py +++ b/ci/build.py @@ -70,7 +70,8 @@ def get_docker_binary(use_nvidia_docker: bool) -> str: return "nvidia-docker" if use_nvidia_docker else "docker" -def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool) -> str: +def build_docker(platform: str, docker_binary: str, registry: str, num_retries: int, no_cache: bool, + cache_intermediate: bool) -> str: """ Build a container for the given platform :param platform: Platform @@ -104,6 +105,8 @@ def build_docker(platform: str, docker_binary: str, registry: str, num_retries: "--build-arg", "GROUP_ID={}".format(os.getgid())] if no_cache: cmd.append("--no-cache") + if cache_intermediate: + cmd.append("--rm=false") elif registry: cmd.extend(["--cache-from", tag]) cmd.extend(["-t", tag, get_dockerfiles_path()]) @@ -330,6 +333,9 @@ def main() -> int: parser.add_argument("--no-cache", action="store_true", help="passes --no-cache to docker build") + parser.add_argument("--cache-intermediate", action="store_true", + help="passes --rm=false to docker build") + parser.add_argument("-e", "--environment", nargs="*", default=[], help="Environment variables for the docker container. " "Specify with a list containing either names or name=value") @@ -361,7 +367,8 @@ def main() -> int: load_docker_cache(tag=tag, docker_registry=args.docker_registry) if not args.run_only: build_docker(platform=platform, docker_binary=docker_binary, registry=args.docker_registry, - num_retries=args.docker_build_retries, no_cache=args.no_cache) + num_retries=args.docker_build_retries, no_cache=args.no_cache, + cache_intermediate=args.cache_intermediate) else: logging.info("Skipping docker build step.") diff --git a/ci/docker/Dockerfile.build.android_armv7 b/ci/docker/Dockerfile.build.android_armv7 index 2c923a015b63..96ca04e9f5e6 100644 --- a/ci/docker/Dockerfile.build.android_armv7 +++ b/ci/docker/Dockerfile.build.android_armv7 @@ -18,62 +18,41 @@ # # Dockerfile to build MXNet for Android ARMv7 -FROM dockcross/base -MAINTAINER Pedro Larroy "pllarroy@amazon.com" - -# The cross-compiling emulator -RUN apt-get update && apt-get install -y \ - unzip - -ENV CROSS_TRIPLE=arm-linux-androideabi -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm -WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - - -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ - -WORKDIR /work/deps - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh -WORKDIR /work -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -COPY install/android_armv7_openblas.sh /work/deps -RUN /work/deps/android_armv7_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -WORKDIR /work +FROM ubuntu:20.04 + +ENV ARCH=armv7l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV7 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -o android-ndk-r19-linux-x86_64.zip -L https://dl.google.com/android/repository/android-ndk-r19-linux-x86_64.zip && \ + unzip android-ndk-r19-linux-x86_64.zip && \ + rm android-ndk-r19-linux-x86_64.zip +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/android-ndk-r19/build/cmake/android.toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + mkdir /usr/local/openblas-android && \ + cd /usr/local/OpenBLAS && \ + export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ + make NOFORTRAN=1 ARM_SOFTFP_ABI=1 \ + LDFLAGS="-L/usr/local/android-ndk-r19/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/lib/gcc/arm-linux-androideabi/4.9.x -lm" \ + CC=$TOOLCHAIN/bin/armv7a-linux-androideabi16-clang AR=$TOOLCHAIN/bin/arm-linux-androideabi-ar && \ + make PREFIX=/usr/local/openblas-android install && \ + cd /usr/local && \ + rm -rf OpenBLAS +ENV OpenBLAS_HOME=/usr/local/openblas-android ARG USER_ID=0 ARG GROUP_ID=0 @@ -81,5 +60,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ -WORKDIR /work/mxnet - +WORKDIR /work/build diff --git a/ci/docker/Dockerfile.build.android_armv8 b/ci/docker/Dockerfile.build.android_armv8 index ca62288129bb..81adc80edf14 100644 --- a/ci/docker/Dockerfile.build.android_armv8 +++ b/ci/docker/Dockerfile.build.android_armv8 @@ -18,62 +18,41 @@ # # Dockerfile to build MXNet for Android ARM64/ARMv8 -FROM dockcross/base -MAINTAINER Pedro Larroy "pllarroy@amazon.com" - -RUN apt-get update && apt-get install -y \ - unzip - -WORKDIR /work/deps - -# Build x86 dependencies. -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh - -# Setup Android cross-compilation environment. -ENV CROSS_TRIPLE=aarch64-linux-android -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - -ENV ARCH aarch64 -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm64 -WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - - -WORKDIR /work/deps -COPY install/android_ndk.sh /work/ -RUN /work/android_ndk.sh - -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ - -# Build ARM dependencies. -COPY install/android_arm64_openblas.sh /work/ -RUN /work/android_arm64_openblas.sh -ENV CPLUS_INCLUDE_PATH /work/deps/OpenBLAS +FROM ubuntu:20.04 + +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV8 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -o android-ndk-r19-linux-x86_64.zip -L https://dl.google.com/android/repository/android-ndk-r19-linux-x86_64.zip && \ + unzip android-ndk-r19-linux-x86_64.zip && \ + rm android-ndk-r19-linux-x86_64.zip +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/android-ndk-r19/build/cmake/android.toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + mkdir /usr/local/openblas-android && \ + cd /usr/local/OpenBLAS && \ + export TOOLCHAIN=/usr/local/android-ndk-r19/toolchains/llvm/prebuilt/linux-x86_64 && \ + make NOFORTRAN=1 \ + LDFLAGS="-L/usr/local/android-ndk-r21/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x -lm" \ + CC=$TOOLCHAIN/bin/aarch64-linux-android21-clang AR=$TOOLCHAIN/bin/aarch64-linux-android-ar && \ + make PREFIX=/usr/local/openblas-android install && \ + cd /usr/local && \ + rm -rf OpenBLAS +ENV OpenBLAS_HOME=/usr/local/openblas-android ARG USER_ID=0 ARG GROUP_ID=0 @@ -81,5 +60,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ - WORKDIR /work/build diff --git a/ci/docker/Dockerfile.build.armv6 b/ci/docker/Dockerfile.build.armv6 index 98414d8c2aff..02e16da11616 100644 --- a/ci/docker/Dockerfile.build.armv6 +++ b/ci/docker/Dockerfile.build.armv6 @@ -18,26 +18,42 @@ # # Dockerfile to build MXNet for ARMv6 -FROM dockcross/linux-armv6 +FROM ubuntu:20.04 -ENV ARCH armv6l -ENV HOSTCC gcc-6 -ENV HOSTCXX g++-6 -ENV TARGET ARMV6 +ENV ARCH=armv6l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV6 -WORKDIR /work/deps +WORKDIR /usr/local -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh +# We use a toolchain from toolchains.bootlin.com instead of Debian / Ubunut +# crossbuild-essential-armel toolchain, as the latter targets ARM architecture +# versions 4T, 5T, and 6, whereas we only wish to target ARMV6 and like to use +# ARMV6 specific features. https://wiki.debian.org/ArmEabiPort +RUN curl -o armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 -L https://toolchains.bootlin.com/downloads/releases/toolchains/armv6-eabihf/tarballs/armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 && \ + tar xf armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 && \ + rm armv6-eabihf--glibc--stable-2020.02-2.tar.bz2 +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/share/buildroot/toolchainfile.cmake -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/bin/arm-linux-gcc && \ + make PREFIX=/usr/local/armv6-eabihf--glibc--stable-2020.02-2/arm-buildroot-linux-gnueabihf/sysroot install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.armv7 b/ci/docker/Dockerfile.build.armv7 index f0cdd90322f0..a9cc6d1e83a4 100644 --- a/ci/docker/Dockerfile.build.armv7 +++ b/ci/docker/Dockerfile.build.armv7 @@ -16,28 +16,39 @@ # specific language governing permissions and limitations # under the License. # -# Dockerfile to build MXNet for Android ARMv7 - -FROM dockcross/linux-armv7 - -ENV ARCH armv7l -ENV HOSTCC gcc-7 -ENV HOSTCXX g++-7 -ENV TARGET ARMV7 - -WORKDIR /work/deps - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +# Dockerfile to build MXNet for ARMv7 + +FROM ubuntu:20.04 + +ENV ARCH=armv7l \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV7 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + crossbuild-essential-armhf \ + && rm -rf /var/lib/apt/lists/* + +COPY toolchains/arm-linux-gnueabihf-toolchain.cmake /usr/local +ENV CMAKE_TOOLCHAIN_FILE=/usr/local/arm-linux-gnueabihf-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=arm-linux-gnueabihf-gcc && \ + make PREFIX=/usr/local/arm-linux-gnueabihf install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.armv8 b/ci/docker/Dockerfile.build.armv8 index 2f918e55d991..adf6873fb40c 100644 --- a/ci/docker/Dockerfile.build.armv8 +++ b/ci/docker/Dockerfile.build.armv8 @@ -18,26 +18,37 @@ # # Dockerfile to build MXNet for ARM64/ARMv8 -FROM dockcross/linux-arm64 - -ENV ARCH aarch64 -ENV HOSTCC gcc-7 -ENV HOSTCXX g++-7 -ENV TARGET ARMV8 - -WORKDIR /work/deps - -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh +FROM ubuntu:20.04 + +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + HOSTCXX=g++ \ + TARGET=ARMV8 + +WORKDIR /usr/local + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + cmake \ + ccache \ + git \ + curl \ + zip \ + python3 \ + python3-pip \ + crossbuild-essential-arm64 \ + && rm -rf /var/lib/apt/lists/* + +COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr +ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu install && \ + cd /usr/local && \ + rm -rf OpenBLAS ARG USER_ID=0 ARG GROUP_ID=0 @@ -45,4 +56,4 @@ COPY install/ubuntu_adduser.sh /work/ RUN /work/ubuntu_adduser.sh COPY runtime_functions.sh /work/ -WORKDIR /work/build +WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson index 55575d17970f..93fe5e0a5b0d 100644 --- a/ci/docker/Dockerfile.build.jetson +++ b/ci/docker/Dockerfile.build.jetson @@ -20,63 +20,58 @@ # This script assumes /work/mxnet exists and contains the mxnet code you wish to compile and # that /work/build exists and is the target for your output. -FROM nvidia/cuda:9.0-cudnn7-devel as cudabuilder +FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -FROM dockcross/linux-arm64 +ENV ARCH=aarch64 \ + HOSTCC=gcc \ + TARGET=ARMV8 -ENV ARCH aarch64 -ENV HOSTCC gcc -ENV TARGET ARMV8 +WORKDIR /usr/local -WORKDIR /work/deps +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + ninja-build \ + git \ + curl \ + zip \ + unzip \ + python3 \ + python3-pip \ + awscli \ + crossbuild-essential-arm64 \ + && rm -rf /var/lib/apt/lists/* -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh - -COPY install/arm_openblas.sh /work/ -RUN /work/arm_openblas.sh - -ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} +# cmake on Ubuntu 18.04 is too old +RUN python3 -m pip install cmake +# ccache on Ubuntu 18.04 is too old to support Cuda correctly COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh -# Setup CUDA build env (including configuring and copying nvcc) -COPY --from=cudabuilder /usr/local/cuda /usr/local/cuda -ENV TARGET_ARCH aarch64 -ENV TARGET_OS linux +COPY toolchains/aarch64-linux-gnu-toolchain.cmake /usr +ENV CMAKE_TOOLCHAIN_FILE=/usr/aarch64-linux-gnu-toolchain.cmake + +RUN git clone --recursive -b v0.3.9 https://github.com/xianyi/OpenBLAS.git && \ + cd /usr/local/OpenBLAS && \ + make NOFORTRAN=1 CC=aarch64-linux-gnu-gcc && \ + make PREFIX=/usr/aarch64-linux-gnu install && \ + cd /usr/local && \ + rm -rf OpenBLAS -# Install ARM depedencies based on Jetpack 3.3 -RUN JETPACK_DOWNLOAD_PREFIX=https://developer.download.nvidia.com/devzone/devcenter/mobile/jetpack_l4t/3.3/lw.xd42/JetPackL4T_33_b39 && \ - CUDA_REPO_PREFIX=/var/cuda-repo-9-0-local && \ - ARM_CUDA_INSTALLER_PACKAGE=cuda-repo-l4t-9-0-local_9.0.252-1_arm64.deb && \ - ARM_CUDNN_INSTALLER_PACKAGE=libcudnn7_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_CUDNN_DEV_INSTALLER_PACKAGE=libcudnn7-dev_7.1.5.14-1+cuda9.0_arm64.deb && \ - ARM_LICENSE_INSTALLER=cuda-license-9-0_9.0.252-1_arm64.deb && \ - ARM_CUBLAS_INSTALLER=cuda-cublas-9-0_9.0.252-1_arm64.deb && \ - ARM_NVINFER_INSTALLER_PACKAGE=libnvinfer4_4.1.3-1+cuda9.0_arm64.deb && \ - ARM_NVINFER_DEV_INSTALLER_PACKAGE=libnvinfer-dev_4.1.3-1+cuda9.0_arm64.deb && \ - dpkg --add-architecture arm64 && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDA_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_INSTALLER_PACKAGE && \ - wget -nv $JETPACK_DOWNLOAD_PREFIX/$ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDA_INSTALLER_PACKAGE && \ - apt-key add $CUDA_REPO_PREFIX/7fa2af80.pub && \ - dpkg -i --force-architecture $ARM_CUDNN_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_CUDNN_DEV_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_LICENSE_INSTALLER && \ - dpkg -i --force-architecture $CUDA_REPO_PREFIX/$ARM_CUBLAS_INSTALLER && \ - dpkg -i --force-architecture $ARM_NVINFER_INSTALLER_PACKAGE && \ - dpkg -i --force-architecture $ARM_NVINFER_DEV_INSTALLER_PACKAGE && \ - apt update -y || true && apt install -y cuda-libraries-dev-9-0 libcudnn7-dev libnvinfer-dev -RUN ln -s /usr/include/aarch64-linux-gnu/cudnn_v7.h /usr/include/aarch64-linux-gnu/cudnn.h -ENV PATH $PATH:/usr/local/cuda/bin -ENV NVCCFLAGS "-m64" -ENV CUDA_ARCH "-gencode arch=compute_53,code=sm_53 -gencode arch=compute_62,code=sm_62" -ENV NVCC /usr/local/cuda/bin/nvcc +# Install aarch64 cross depedencies based on Jetpack 4.3 +# Manually downloaded using SDK Manager tool and placed in a private S3 bucket. +# We're not allowed to redistribute these files and there is no public version. +RUN aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb . && \ + dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + rm cuda-repo-ubuntu1804-10-0-local-10.0.326-410.108_1.0-1_amd64.deb && \ + apt-key add /var/cuda-repo-10-0-local-10.0.326-410.108/7fa2af80.pub && \ + aws s3 cp s3://mxnet-ci-prod-private-slave-data/nvidia/sdkm_downloads/cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb . && \ + dpkg -i cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + rm cuda-repo-cross-aarch64-10-0-local-10.0.326_1.0-1_all.deb && \ + apt-get update && \ + apt-get install -y -f && \ + apt-get install -y cuda-cross-aarch64 cuda-cross-aarch64-10-0 && \ + rm -rf /var/lib/apt/lists/* ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/install/android_armv7_openblas.sh b/ci/docker/install/android_armv7_openblas.sh deleted file mode 100755 index 8642df6d9450..000000000000 --- a/ci/docker/install/android_armv7_openblas.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make TARGET=ARMV7 HOSTCC=gcc-7 HOSTCXX=g++-7 NOFORTRAN=1 ARM_SOFTFP_ABI=1 -j$(nproc) libs -#make PREFIX=${CROSS_ROOT} TARGET=ARMV7 HOSTCC=gcc NOFORTRAN=1 ARM_SOFTFP_ABI=1 install -cp *.h ${CROSS_ROOT}/include -cp libopenblas*.a ${CROSS_ROOT}/lib -popd diff --git a/ci/docker/install/android_ndk.sh b/ci/docker/install/android_ndk.sh deleted file mode 100755 index cb83aa65639a..000000000000 --- a/ci/docker/install/android_ndk.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -# This environment variable comes from the docker file -echo "Downloading android SDK rev ${ANDROID_NDK_REVISION}" -curl -O https://dl.google.com/android/repository/android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ -unzip ./android-ndk-r${ANDROID_NDK_REVISION}-linux-x86_64.zip && \ -cd android-ndk-r${ANDROID_NDK_REVISION} && \ -./build/tools/make_standalone_toolchain.py \ - --stl=libc++ \ - --arch ${ANDROID_NDK_ARCH}\ - --api ${ANDROID_NDK_API}\ - --install-dir=${CROSS_ROOT} && \ - -find ${CROSS_ROOT} -exec chmod a+r '{}' \; && \ -find ${CROSS_ROOT} -executable -exec chmod a+x '{}' \; -popd diff --git a/ci/docker/install/arm64_openblas.sh b/ci/docker/install/arm64_openblas.sh deleted file mode 100755 index 88f2e98cd65b..000000000000 --- a/ci/docker/install/arm64_openblas.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -wget -nv https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master -O openblas_version.json -echo "Using openblas:" -cat openblas_version.json -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 -make install -ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a -popd diff --git a/ci/docker/install/deb_ubuntu_ccache.sh b/ci/docker/install/deb_ubuntu_ccache.sh index cdc9354e220f..ef913ba36e55 100755 --- a/ci/docker/install/deb_ubuntu_ccache.sh +++ b/ci/docker/install/deb_ubuntu_ccache.sh @@ -23,7 +23,7 @@ set -ex pushd . -apt update || true +apt update apt install -y \ autoconf \ gperf \ @@ -32,31 +32,9 @@ apt install -y \ mkdir -p /work/deps cd /work/deps -# Unset ARM toolchain cross-compilation configuration on dockcross -unset ARCH -unset DEFAULT_DOCKCROSS_IMAGE -unset CROSS_TRIPLE -unset CC -unset AS -unset AR -unset FC -unset CXX -unset CROSS_ROOT -unset CROSS_COMPILE -unset PKG_CONFIG_PATH -unset CMAKE_TOOLCHAIN_FILE -unset CPP -unset LD -export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin - git clone --recursive https://github.com/ccache/ccache.git cd ccache -git checkout v3.7.8 -# Backport cuda related fixes: https://github.com/ccache/ccache/pull/381 -git config user.name "MXNet CI" -git config user.email "MXNetCI@example.com" -git cherry-pick --strategy-option=theirs c4fffda031034f930df2cf188878b8f9160027df -git cherry-pick 0dec5c2df3e3ebc1fbbf33f74c992bef6264f37a +git checkout v3.7.9 ./autogen.sh ./configure --disable-man diff --git a/ci/docker/install/ubuntu_arm.sh b/ci/docker/install/ubuntu_arm.sh deleted file mode 100755 index 608d0362f138..000000000000 --- a/ci/docker/install/ubuntu_arm.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -ex - -apt update || true -apt install -y \ - unzip \ - python3 \ - python3-pip - -pip3 install setuptools diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 03f8b2c1c3d8..4e7b0db1b648 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -216,15 +216,22 @@ build_dynamic_libmxnet() { build_jetson() { set -ex - pushd . - - export CC=gcc-6 - export CXX=g++-6 - cp make/crosscompile.jetson.mk ./config.mk - make -j$(nproc) - - build_wheel /work/mxnet/python /work/mxnet/lib - popd + cd /work/build + cmake \ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DUSE_CUDA=ON \ + -DMXNET_CUDA_ARCH="5.2" \ + -DENABLE_CUDA_RTC=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=ON \ + -DUSE_LAPACK=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MKL_IF_AVAILABLE=OFF \ + -G Ninja /work/mxnet + ninja + build_wheel } # @@ -252,7 +259,7 @@ build_armv6() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_LAPACK=OFF \ -DBUILD_CPP_EXAMPLES=OFF \ - -Dmxnet_LINKER_LIBS=-lgfortran \ + -Dmxnet_LINKER_LIBS=-latomic \ -G Ninja /work/mxnet ninja @@ -279,7 +286,6 @@ build_armv7() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_LAPACK=OFF \ -DBUILD_CPP_EXAMPLES=OFF \ - -Dmxnet_LINKER_LIBS=-lgfortran \ -G Ninja /work/mxnet ninja @@ -289,14 +295,15 @@ build_armv7() { build_armv8() { cd /work/build cmake \ - -DUSE_CUDA=OFF\ - -DSUPPORT_F16C=OFF\ - -DUSE_OPENCV=OFF\ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DUSE_CUDA=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_OPENCV=OFF \ -DUSE_OPENMP=ON \ - -DUSE_LAPACK=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=Release\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DUSE_LAPACK=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja build_wheel @@ -311,15 +318,18 @@ build_android_armv7() { set -ex cd /work/build cmake \ - -DANDROID=ON\ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DSUPPORT_F16C=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DANDROID_ABI="armeabi-v7a" \ + -DANDROID_STL="c++_shared" \ + -DANDROID=ON \ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja } @@ -328,14 +338,17 @@ build_android_armv8() { set -ex cd /work/build cmake \ + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DANDROID_ABI="arm64-v8a" \ + -DANDROID_STL="c++_shared" \ -DANDROID=ON \ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja } diff --git a/ci/docker/install/android_arm64_openblas.sh b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake old mode 100755 new mode 100644 similarity index 64% rename from ci/docker/install/android_arm64_openblas.sh rename to ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake index f9647d969bf2..3780415c4b15 --- a/ci/docker/install/android_arm64_openblas.sh +++ b/ci/docker/toolchains/aarch64-linux-gnu-toolchain.cmake @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,16 +15,14 @@ # specific language governing permissions and limitations # under the License. -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "aarch64") +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) +set(CMAKE_CUDA_HOST_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_FIND_ROOT_PATH "/usr/aarch64-linux-gnu") -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 ARM_SOFTFP_ABI=1 HOSTCC=gcc-7 HOSTCXX=g++-7 NOFORTRAN=1 libs -# Can't be run (utility not compiled for the target platform) -#make install -cp *.h /usr/include -cp libopenblas.a /usr/local/lib -popd +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/ci/docker/install/arm_openblas.sh b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake old mode 100755 new mode 100644 similarity index 65% rename from ci/docker/install/arm_openblas.sh rename to ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake index fa2e5cae9cba..62038ecee16a --- a/ci/docker/install/arm_openblas.sh +++ b/ci/docker/toolchains/arm-linux-gnueabihf-toolchain.cmake @@ -1,5 +1,3 @@ -#!/usr/bin/env bash - # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,14 +15,13 @@ # specific language governing permissions and limitations # under the License. -set -ex - -git clone --recursive -b v0.2.20 https://github.com/xianyi/OpenBLAS.git - -cd OpenBLAS -make -j$(nproc) -PREFIX=${CROSS_ROOT} make install - -cd .. +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR "armv7l") +set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc) +set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++) +set(CMAKE_FIND_ROOT_PATH "/usr/arm-linux-gnueabihf" "/usr/local/arm-linux-gnueabihf") -rm -rf OpenBLAS +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/cmake/upstream/FindCUDAToolkit.cmake b/cmake/upstream/FindCUDAToolkit.cmake index d37c44d9c782..fee4f3f4f698 100644 --- a/cmake/upstream/FindCUDAToolkit.cmake +++ b/cmake/upstream/FindCUDAToolkit.cmake @@ -132,6 +132,7 @@ of the following libraries that are part of the CUDAToolkit: - :ref:`cuRAND` - :ref:`cuSOLVER` - :ref:`cuSPARSE` +- :ref:`cuPTI` - :ref:`NPP` - :ref:`nvBLAS` - :ref:`nvGRAPH` @@ -149,7 +150,6 @@ CUDA Runtime Library The CUDA Runtime library (cudart) are what most applications will typically need to link against to make any calls such as `cudaMalloc`, and `cudaFree`. -They are an explicit dependency of almost every library. Targets Created: @@ -230,6 +230,18 @@ Targets Created: - ``CUDA::cusparse`` - ``CUDA::cusparse_static`` +.. _`cuda_toolkit_cupti`: + +cupti +""""" + +The `NVIDIA CUDA Profiling Tools Interface `_. + +Targets Created: + +- ``CUDA::cupti`` +- ``CUDA::cupti_static`` + .. _`cuda_toolkit_NPP`: NPP @@ -361,8 +373,6 @@ Targets Created: - ``CUDA::nvml`` -.. _`cuda_toolkit_opencl`: - .. _`cuda_toolkit_nvToolsExt`: nvToolsExt @@ -375,6 +385,8 @@ Targets Created: - ``CUDA::nvToolsExt`` +.. _`cuda_toolkit_opencl`: + OpenCL """""" @@ -436,6 +448,11 @@ Result variables The path to the CUDA Toolkit library directory that contains the CUDA Runtime library ``cudart``. +``CUDAToolkit_TARGET_DIR`` + The path to the CUDA Toolkit directory including the target architecture + when cross-compiling. When not cross-compiling this will be equivalant to + ``CUDAToolkit_ROOT_DIR``. + ``CUDAToolkit_NVCC_EXECUTABLE`` The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may **not** be the same as @@ -487,6 +504,7 @@ if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR) get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) # use the already detected cuda compiler set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -641,6 +659,7 @@ endif() if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) unset(cuda_dir) endif() @@ -669,8 +688,47 @@ endif() get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) -# Now that we have the real ROOT_DIR, find components inside it. -list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) +# Handle cross compilation +if(CMAKE_CROSSCOMPILING) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + # Support for arm cross compilation + set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # Support for aarch64 cross compilation + if (ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") + else() + set(CUDAToolkit_TARGET_NAME "aarch64-linux") + endif (ANDROID_ARCH_NAME STREQUAL "arm64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAME "x86_64-linux") + endif() + + if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATh + set(_CUDAToolkit_Pop_ROOT_PATH True) + endif() +else() + # Not cross compiling + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") + # Now that we have the real ROOT_DIR, find components inside it. + list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + + # Mark that we need to pop the prefix path changes after we have + # found the cudart library. + set(_CUDAToolkit_Pop_Prefix True) +endif() + # Find the include/ directory find_path(CUDAToolkit_INCLUDE_DIR @@ -680,14 +738,17 @@ find_path(CUDAToolkit_INCLUDE_DIR # And find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart - PATH_SUFFIXES lib64 lib/x64 + PATH_SUFFIXES lib64 lib64/stubs lib/x64 ) if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) message(STATUS "Unable to find cudart library.") endif() unset(CUDAToolkit_ROOT_DIR) -list(REMOVE_AT CMAKE_PREFIX_PATH -1) +if(_CUDAToolkit_Pop_Prefix) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + unset(_CUDAToolkit_Pop_Prefix) +endif() #----------------------------------------------------------------------------- # Perform version comparison and validate all required variables are set. @@ -702,6 +763,10 @@ find_package_handle_standard_args(CUDAToolkit VERSION_VAR CUDAToolkit_VERSION ) +mark_as_advanced(CUDA_CUDART + CUDAToolkit_INCLUDE_DIR + CUDAToolkit_NVCC_EXECUTABLE + ) #----------------------------------------------------------------------------- # Construct result variables @@ -714,78 +779,103 @@ endif() # Construct import targets if(CUDAToolkit_FOUND) - function(find_and_add_cuda_import_lib lib_name) + function(_CUDAToolkit_find_and_add_import_lib lib_name) + cmake_parse_arguments(arg "" "" "ALT;DEPS;EXTRA_PATH_SUFFIXES" ${ARGN}) - if(ARGC GREATER 1) - set(search_names ${ARGN}) - else() - set(search_names ${lib_name}) - endif() + set(search_names ${lib_name} ${arg_ALT}) find_library(CUDA_${lib_name}_LIBRARY NAMES ${search_names} - PATHS ${CUDAToolkit_LIBRARY_DIR} + HINTS ${CUDAToolkit_LIBRARY_DIR} ENV CUDA_PATH - PATH_SUFFIXES nvidia/current lib64 lib/x64 lib + PATH_SUFFIXES nvidia/current lib64 lib64/stubs lib/x64 lib lib/stubs stubs + ${arg_EXTRA_PATH_SUFFIXES} ) + mark_as_advanced(CUDA_${lib_name}_LIBRARY) - if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) + if (NOT TARGET CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY) add_library(CUDA::${lib_name} IMPORTED INTERFACE) target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}") + foreach(dep ${arg_DEPS}) + if(TARGET CUDA::${dep}) + target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dep}) + endif() + endforeach() endif() endfunction() - function(add_cuda_link_dependency lib_name) - foreach(dependency IN LISTS ${ARGN}) - target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency}) - endforeach() - endfunction() + if(NOT TARGET CUDA::toolkit) + add_library(CUDA::toolkit IMPORTED INTERFACE) + target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") + target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + endif() - add_library(CUDA::toolkit IMPORTED INTERFACE) - target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}") - target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}") + _CUDAToolkit_find_and_add_import_lib(cuda_driver ALT cuda) + _CUDAToolkit_find_and_add_import_lib(cudart) + _CUDAToolkit_find_and_add_import_lib(cudart_static) - find_and_add_cuda_import_lib(cuda_driver cuda) + # setup dependencies that are required for cudart_static when building + # on linux. These are generally only required when using the CUDA toolkit + # when CUDA language is disabled + if(NOT TARGET CUDA::cudart_static_deps + AND TARGET CUDA::cudart_static) - find_and_add_cuda_import_lib(cudart) - find_and_add_cuda_import_lib(cudart_static) + add_library(CUDA::cudart_static_deps IMPORTED INTERFACE) + target_link_libraries(CUDA::cudart_static INTERFACE CUDA::cudart_static_deps) - foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg) - find_and_add_cuda_import_lib(${cuda_lib}) - add_cuda_link_dependency(${cuda_lib} cudart) + if(UNIX AND (CMAKE_C_COMPILER OR CMAKE_CXX_COMPILER)) + find_package(Threads REQUIRED) + target_link_libraries(CUDA::cudart_static_deps INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) + endif() - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib}_static cudart_static) + if(UNIX AND NOT APPLE) + # On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDAToolkit_rt_LIBRARY rt) + mark_as_advanced(CUDAToolkit_rt_LIBRARY) + if(NOT CUDAToolkit_rt_LIBRARY) + message(WARNING "Could not find librt library, needed by CUDA::cudart_static") + else() + target_link_libraries(CUDA::cudart_static_deps INTERFACE ${CUDAToolkit_rt_LIBRARY}) + endif() + endif() + endif() + + _CUDAToolkit_find_and_add_import_lib(culibos) # it's a static library + foreach (cuda_lib cublas cufft curand cusparse nppc nvjpeg) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS culibos) endforeach() + # cuFFTW depends on cuFFT + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft) + _CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft_static) + # cuSOLVER depends on cuBLAS, and cuSPARSE - add_cuda_link_dependency(cusolver cublas cusparse) - add_cuda_link_dependency(cusolver_static cublas_static cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver DEPS cublas cusparse) + _CUDAToolkit_find_and_add_import_lib(cusolver_static DEPS cublas_static cusparse_static culibos) # nvGRAPH depends on cuRAND, and cuSOLVER. - add_cuda_link_dependency(nvgraph curand cusolver) - add_cuda_link_dependency(nvgraph_static curand_static cusolver_static) - - find_and_add_cuda_import_lib(nppc) - find_and_add_cuda_import_lib(nppc_static) - - add_cuda_link_dependency(nppc cudart) - add_cuda_link_dependency(nppc_static cudart_static culibos) + _CUDAToolkit_find_and_add_import_lib(nvgraph DEPS curand cusolver) + _CUDAToolkit_find_and_add_import_lib(nvgraph_static DEPS curand_static cusolver_static) # Process the majority of the NPP libraries. foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu) - find_and_add_cuda_import_lib(${cuda_lib}) - find_and_add_cuda_import_lib(${cuda_lib}_static) - add_cuda_link_dependency(${cuda_lib} nppc) - add_cuda_link_dependency(${cuda_lib}_static nppc_static) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib} DEPS nppc) + _CUDAToolkit_find_and_add_import_lib(${cuda_lib}_static DEPS nppc_static) endforeach() - find_and_add_cuda_import_lib(nvrtc) - add_cuda_link_dependency(nvrtc cuda_driver) + _CUDAToolkit_find_and_add_import_lib(cupti + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + _CUDAToolkit_find_and_add_import_lib(cupti_static + EXTRA_PATH_SUFFIXES ../extras/CUPTI/lib64/ + ../extras/CUPTI/lib/) + + _CUDAToolkit_find_and_add_import_lib(nvrtc DEPS cuda_driver) - find_and_add_cuda_import_lib(nvml nvidia-ml nvml) + _CUDAToolkit_find_and_add_import_lib(nvml ALT nvidia-ml nvml) if(WIN32) # nvtools can be installed outside the CUDA toolkit directory @@ -798,17 +888,12 @@ if(CUDAToolkit_FOUND) PATH_SUFFIXES lib/x64 lib ) endif() - find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64) + _CUDAToolkit_find_and_add_import_lib(nvToolsExt ALT nvToolsExt64) - add_cuda_link_dependency(nvToolsExt cudart) - - find_and_add_cuda_import_lib(OpenCL) - - find_and_add_cuda_import_lib(culibos) - if(TARGET CUDA::culibos) - foreach (cuda_lib cublas cufft cusparse curand nvjpeg) - add_cuda_link_dependency(${cuda_lib}_static culibos) - endforeach() - endif() + _CUDAToolkit_find_and_add_import_lib(OpenCL) +endif() +if(_CUDAToolkit_Pop_ROOT_PATH) + list(REMOVE_AT CMAKE_FIND_ROOT_PATH 0) + unset(_CUDAToolkit_Pop_ROOT_PATH) endif() diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk deleted file mode 100644 index 880e2cf5b466..000000000000 --- a/make/crosscompile.jetson.mk +++ /dev/null @@ -1,216 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet -# -# If you want to change the configuration, please use the following -# steps. Assume you are on the root directory of mxnet. First copy the this -# file so that any local changes will be ignored by git -# -# $ cp make/config.mk . -# -# Next modify the according entries, and then compile by -# -# $ make -# -# or build in parallel with 8 threads -# -# $ make -j8 -#------------------------------------------------------------------------------- - -#--------------------- -# For cross compilation we only explictily set a compiler when one is not already present. -#-------------------- - -ifndef CC -export CC = gcc -endif -ifndef CXX -export CXX = g++ -endif -ifndef NVCC -export NVCC = nvcc -endif - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - -# whether to turn on segfault signal handler to log the stack trace -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS = -L${CROSS_ROOT}/lib -L/usr/lib/aarch64-linux-gnu/ - -# the additional compile flags you want to add -ADD_CFLAGS = -I${CROSS_ROOT}/include -I/usr/include/aarch64-linux-gnu/ - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# whether use CUDA during compile -USE_CUDA = 1 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = /usr/local/cuda-9.0/targets/aarch64-linux - -# whether to enable CUDA runtime compilation -ENABLE_CUDA_RTC = 0 - -# whether use CuDNN R3 library -USE_CUDNN = 1 - -#whether to use NCCL library -USE_NCCL = 0 -#add the path to NCCL library -USE_NCCL_PATH = NONE - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 0 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -#whether use libjpeg-turbo for image decode without OpenCV wrapper -USE_LIBJPEG_TURBO = 0 -#add the path to libjpeg-turbo library -USE_LIBJPEG_TURBO_PATH = NONE - -# use openmp for parallelization -USE_OPENMP = 1 - -# whether use MKL-DNN library -USE_MKLDNN = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -UNAME_S := $(shell uname -s) -USE_BLAS = openblas - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL only for BLAS, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -USE_SSE=0 - -# Turn off F16C instruction set support -USE_F16C=0 - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 0 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 0 - -#---------------------------- -# performance settings -#---------------------------- -# Use operator tuning -USE_OPERATOR_TUNING = 1 - -# Use gperftools if found -# Disable because of #8968 -USE_GPERFTOOLS = 0 - -# path to gperftools (tcmalloc) library in case of a non-standard installation -USE_GPERFTOOLS_PATH = - -# Use JEMalloc if found, and not using gperftools -USE_JEMALLOC = 1 - -# path to jemalloc library in case of a non-standard installation -USE_JEMALLOC_PATH = - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - -#---------------------------- -# other features -#---------------------------- - -# Create C++ interface package -USE_CPP_PACKAGE = 0 - -# Use int64_t type to represent the total number of elements in the tensor -# This will cause performance degradation reported in issue #14496 -# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647 -# Note: the size of each dimension is still bounded by INT32_MAX -USE_INT64_TENSOR_SIZE = 0 - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc index 3f94cca530c3..c81d90689d58 100644 --- a/src/operator/random/shuffle_op.cc +++ b/src/operator/random/shuffle_op.cc @@ -23,7 +23,7 @@ * \brief Operator to shuffle elements of an NDArray */ #if ((__GNUC__ > 4 && !defined(__clang__major__)) || (__clang_major__ > 4 && __linux__)) && \ - defined(_OPENMP) + defined(_OPENMP) && !defined(__ANDROID__) #define USE_GNU_PARALLEL_SHUFFLE #endif