From feccddff1621a872afa384ca7702f924df451530 Mon Sep 17 00:00:00 2001 From: Justin Luitjens Date: Wed, 31 Oct 2018 07:16:21 -0700 Subject: [PATCH 1/3] [src] Add configure option for cuda_arch Allow CUDA_ARCH to be overidden with a configure flag --- src/configure | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/configure b/src/configure index d4509a04c05..928923ee9f5 100755 --- a/src/configure +++ b/src/configure @@ -22,6 +22,8 @@ # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine +# ./configure --use-cuda --cudatk-dir=/usr/local/cuda/ --cuda-arch=-sm_70 +# # Use cuda in /usr/local/cuda and set the arch to -sm_70 # ./configure --static --fst-root=/opt/cross/armv8hf \ # --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf # # Cross compile for armv8hf, this assumes that you have openfst built @@ -65,6 +67,7 @@ Configuration options: --shared Build and link against shared libraries [default=no] --use-cuda Build with CUDA [default=yes] --cudatk-dir=DIR CUDA toolkit directory + --cuda-arch=FLAGS Override the default CUDA_ARCH flags. See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-examples. --double-precision Build with BaseFloat set to double if yes [default=no], mostly useful for testing purposes. --static-fst Build with static OpenFst libraries [default=no] @@ -121,6 +124,11 @@ function read_dirname { echo $retval } +function read_value { + local val=`expr "X$1" : '[^=]*=\(.*\)'`; + echo $val +} + function is_set { local myvar=${1:-notset} if [ "$myvar" == "notset" ]; then @@ -421,15 +429,17 @@ function configure_cuda { fi fi - case $CUDA_VERSION in - 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; - 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; - 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53" ;; - 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62" ;; - 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70" ;; - 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_72,code=sm_72 -gencode arch=compute_75,code=sm_75" ;; - *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; - esac + if [ -z "$CUDA_ARCH" ]; then + case $CUDA_VERSION in + 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; + 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; + 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53" ;; + 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62" ;; + 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70" ;; + 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_72,code=sm_72 -gencode arch=compute_75,code=sm_75" ;; + *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; + esac + fi echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk @@ -975,6 +985,9 @@ do --cudatk-dir=*) CUDATKDIR=`read_dirname $1`; shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only + --cuda-arch=*) + CUDA_ARCH=`read_value $1`; + shift;; --fst-version=*) OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; shift;; From 89b29908dda0e4b23584a52cba14b76f515a4c30 Mon Sep 17 00:00:00 2001 From: Ryan Leary Date: Wed, 31 Oct 2018 10:55:51 -0400 Subject: [PATCH 2/3] Fix documentation bug --- src/configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configure b/src/configure index 928923ee9f5..9c790e85e20 100755 --- a/src/configure +++ b/src/configure @@ -22,7 +22,7 @@ # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine -# ./configure --use-cuda --cudatk-dir=/usr/local/cuda/ --cuda-arch=-sm_70 +# ./configure --use-cuda --cudatk-dir=/usr/local/cuda/ --cuda-arch=-arch=sm_70 # # Use cuda in /usr/local/cuda and set the arch to -sm_70 # ./configure --static --fst-root=/opt/cross/armv8hf \ # --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf From 3464e902453a7d257071c0e934a18dc667927f3b Mon Sep 17 00:00:00 2001 From: Ryan Leary Date: Wed, 31 Oct 2018 11:21:31 -0400 Subject: [PATCH 3/3] Reduce duplicated code --- src/configure | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/configure b/src/configure index 9c790e85e20..4a03f7ade87 100755 --- a/src/configure +++ b/src/configure @@ -23,7 +23,7 @@ # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine # ./configure --use-cuda --cudatk-dir=/usr/local/cuda/ --cuda-arch=-arch=sm_70 -# # Use cuda in /usr/local/cuda and set the arch to -sm_70 +# # Use cuda in /usr/local/cuda and set the arch to sm_70 # ./configure --static --fst-root=/opt/cross/armv8hf \ # --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf # # Cross compile for armv8hf, this assumes that you have openfst built @@ -117,18 +117,18 @@ function rel2abs { fi } +function read_value { + local val=`expr "X$1" : '[^=]*=\(.*\)'`; + echo $val +} + function read_dirname { - local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`; + local dir_name=`read_value $1` local retval=`rel2abs $dir_name` [ -z $retval ] && echo "Bad option '$1': no such directory" && exit 1; echo $retval } -function read_value { - local val=`expr "X$1" : '[^=]*=\(.*\)'`; - echo $val -} - function is_set { local myvar=${1:-notset} if [ "$myvar" == "notset" ]; then @@ -949,7 +949,7 @@ do mkl_threading=sequential; shift ;; --mkl-threading=*) - mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; + mkl_threading=`read_value $1`; threaded_atlas=true; shift ;; --fst-root=*) @@ -980,7 +980,7 @@ do OMPLIBDIR=`read_dirname $1`; shift ;; --mathlib=*) - MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; + MATHLIB=`read_value $1`; shift ;; --cudatk-dir=*) CUDATKDIR=`read_dirname $1`; @@ -989,13 +989,13 @@ do CUDA_ARCH=`read_value $1`; shift;; --fst-version=*) - OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; + OPENFST_VER=`read_value $1`; shift;; --host=*) # The type of system where built programs and libraries will run. # It should be in the format cpu-vendor-os. If specified, this script # will infer the target architecture from the specified host triple. - HOST=`expr "X$1" : '[^=]*=\(.*\)'`; + HOST=`read_value $1`; shift ;; --android-incdir=*) android=true;