Skip to content

Commit

Permalink
Fix build issue with USE_CUDNN=0 (apache#11470)
Browse files Browse the repository at this point in the history
* Fix build issue with CUDNN=0

* Fix nocudnn func name

* Remove python2 tests

* Remove CPP package test

* Check assert raises when cudnn disabled for op tests on gpu

* Add line

* Remove whitespace

* add decorator for other ops

* Add and remove assert

* Fix op and common

* Fix merge issue

* Remove C API

* Fix

* Fix lint

* Add init git

* Rename CUDNN_DISABLED env variable

* Add a runtime function for nocudnn

* Remove MXCudnnIsenabled

* Add comment for disabled test

* Add full link in comment
  • Loading branch information
anirudh2290 authored Jul 12, 2018
1 parent b759c7b commit 1d8fc79
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 16 deletions.
32 changes: 32 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,13 @@ def python3_gpu_ut(docker_container_name) {
}
}

// Python 3 NOCUDNN
def python3_gpu_ut_nocudnn(docker_container_name) {
timeout(time: max_time, unit: 'MINUTES') {
docker_run(docker_container_name, 'unittest_ubuntu_python3_gpu_nocudnn', true)
}
}

try {
stage('Sanity Check') {
parallel 'Lint': {
Expand Down Expand Up @@ -292,6 +299,17 @@ try {
}
}
},
'GPU: MKLDNN_CUDNNOFF': {
node('mxnetlinux-cpu') {
ws('workspace/build-mkldnn-gpu-nocudnn') {
timeout(time: max_time, unit: 'MINUTES') {
init_git()
docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false)
pack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
}
}
}
},
'GPU: CUDA9.1+cuDNN7': {
node('mxnetlinux-cpu') {
ws('workspace/build-gpu') {
Expand Down Expand Up @@ -667,6 +685,20 @@ try {
}
}
},
'Python3: MKLDNN-GPU-NOCUDNN': {
node('mxnetlinux-gpu') {
ws('workspace/ut-python3-mkldnn-gpu-nocudnn') {
try {
init_git()
unpack_lib('mkldnn_gpu_nocudnn', mx_mkldnn_lib)
python3_gpu_ut_nocudnn('ubuntu_gpu')
publish_test_coverage()
} finally {
collect_test_results_unix('nosetests_gpu.xml', 'nosetests_python3_mkldnn_gpu_nocudnn.xml')
}
}
}
},
'Python3: CentOS 7 CPU': {
node('mxnetlinux-cpu') {
ws('workspace/build-centos7-cpu') {
Expand Down
25 changes: 25 additions & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,23 @@ build_ubuntu_gpu_mkldnn() {
report_ccache_usage
}

build_ubuntu_gpu_mkldnn_nocudnn() {
set -ex

build_ccache_wrappers

make \
DEV=1 \
USE_BLAS=openblas \
USE_MKLDNN=1 \
USE_CUDA=1 \
USE_CUDA_PATH=/usr/local/cuda \
USE_CUDNN=0 \
-j$(nproc)

report_ccache_usage
}

build_ubuntu_gpu_cuda91_cudnn7() {
set -ex
# unfortunately this build has problems in 3rdparty dependencies with ccache and make
Expand Down Expand Up @@ -611,6 +628,14 @@ unittest_ubuntu_python3_gpu() {
nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu
}

unittest_ubuntu_python3_gpu_nocudnn() {
set -ex
export PYTHONPATH=./python/
export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
export CUDNN_OFF_TEST_ONLY=true
nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu
}

# quantization gpu currently only runs on P3 instances
# need to separte it from unittest_ubuntu_python2_gpu()
unittest_ubuntu_python2_quantization_gpu() {
Expand Down
1 change: 0 additions & 1 deletion src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,6 @@ int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** args,
API_END();
}


int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shared_id) {
API_BEGIN();
NDArray* arr = reinterpret_cast<NDArray*>(handle);
Expand Down
12 changes: 6 additions & 6 deletions src/operator/nn/convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
int dtype = inputs[conv::kData].type_flag_;

#if CUDNN_MAJOR < 5
if (param_.layout.value() != kNCW &&
param_.layout.value() != kNCHW &&
param_.layout.value() != kNCDHW) {
if (param.layout.value() != kNCW &&
param.layout.value() != kNCHW &&
param.layout.value() != kNCDHW) {
// Need CuDNN > 5.0 for layout support. use MXNet implementation
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
ConvolutionOp<gpu, DType> op;
Expand Down Expand Up @@ -168,9 +168,9 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
int dtype = out_grad.type_flag_;

#if CUDNN_MAJOR < 5
if (param_.layout.value() != kNCW &&
param_.layout.value() != kNCHW &&
param_.layout.value() != kNCDHW) {
if (param.layout.value() != kNCW &&
param.layout.value() != kNCHW &&
param.layout.value() != kNCDHW) {
// Need CuDNN > 5.0 for layout support. use MXNet implementation
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
ConvolutionOp<gpu, DType> op;
Expand Down
10 changes: 9 additions & 1 deletion tests/python/gpu/test_operator_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
from common import setup_module, with_seed, teardown
from common import setup_module, with_seed, teardown, assert_raises_cudnn_disabled
from test_operator import *
from test_optimizer import *
from test_random import *
Expand Down Expand Up @@ -422,6 +422,7 @@ def test_3d_batchnorm(fix_gamma, use_global_stats):


@with_seed(1234)
@assert_raises_cudnn_disabled()
def test_convolution_with_type():
sym1 = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv')

Expand Down Expand Up @@ -1316,6 +1317,7 @@ def check_rnn_consistency(cell1, cell2):
assert_allclose(mod1.get_outputs()[0].asnumpy(), mod2.get_outputs()[0].asnumpy(), rtol=1e-2, atol=1e-4)

@with_seed()
@assert_raises_cudnn_disabled()
def test_rnn():
fused = mx.rnn.FusedRNNCell(100, num_layers=2, mode='rnn_relu', prefix='')

Expand All @@ -1327,6 +1329,7 @@ def test_rnn():
check_rnn_consistency(stack, fused)

@with_seed()
@assert_raises_cudnn_disabled()
def test_lstm_forget_bias():
forget_bias = 2.0
fused = mx.rnn.FusedRNNCell(10, forget_bias=forget_bias, num_layers=2, mode='lstm', prefix='')
Expand All @@ -1348,6 +1351,7 @@ def test_lstm_forget_bias():
assert_allclose(args[bias_name].asnumpy(), expected_bias)

@with_seed()
@assert_raises_cudnn_disabled()
def test_gru():
fused = mx.rnn.FusedRNNCell(100, num_layers=2, mode='gru', prefix='')

Expand All @@ -1359,6 +1363,7 @@ def test_gru():
check_rnn_consistency(stack, fused)

@with_seed()
@assert_raises_cudnn_disabled()
def test_bidirectional():
fused = mx.rnn.FusedRNNCell(100, num_layers=2, mode='gru', prefix='',
bidirectional=True)
Expand All @@ -1377,6 +1382,7 @@ def test_bidirectional():
check_rnn_consistency(stack, fused)

@with_seed()
@assert_raises_cudnn_disabled()
def test_unfuse():
for mode in ['rnn_tanh', 'rnn_relu', 'lstm', 'gru']:
fused = mx.rnn.FusedRNNCell(
Expand Down Expand Up @@ -1548,6 +1554,7 @@ def test_deformable_convolution_options():
name='deformable_conv')

@with_seed()
@assert_raises_cudnn_disabled()
def test_residual_fused():
cell = mx.rnn.ResidualCell(
mx.rnn.FusedRNNCell(50, num_layers=3, mode='lstm',
Expand Down Expand Up @@ -1603,6 +1610,7 @@ def check_rnn_layer_w_rand_inputs(layer):
assert_almost_equal(g.asnumpy(), c.asnumpy(), rtol=1e-2, atol=1e-6)

@with_seed()
@assert_raises_cudnn_disabled()
def test_rnn_layer():
check_rnn_layer(gluon.rnn.RNN(100, num_layers=3))
check_rnn_layer(gluon.rnn.RNN(100, activation='tanh', num_layers=3))
Expand Down
20 changes: 19 additions & 1 deletion tests/python/unittest/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
import numpy as np
import random
import shutil
from mxnet.base import MXNetError
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.append(os.path.join(curr_path, '../common/'))
sys.path.insert(0, os.path.join(curr_path, '../../../python'))

import models
from contextlib import contextmanager
from nose.tools import make_decorator
from nose.tools import make_decorator, assert_raises
import tempfile

def assertRaises(expected_exception, func, *args, **kwargs):
Expand Down Expand Up @@ -94,6 +95,23 @@ def random_seed(seed=None):
random.seed(next_seed)


def assert_raises_cudnn_disabled(assertion_error=False):
def test_helper(orig_test):
@make_decorator(orig_test)
def test_new(*args, **kwargs):
cudnn_disabled = (os.getenv('CUDNN_OFF_TEST_ONLY') == "true")
if not cudnn_disabled or mx.context.current_context().device_type == 'cpu':
orig_test(*args, **kwargs)
else:
if assertion_error:
errors = (MXNetError, RuntimeError, AssertionError)
else:
errors = (MXNetError, RuntimeError)
assert_raises(errors, orig_test, *args, **kwargs)
return test_new
return test_helper


def with_seed(seed=None):
"""
A decorator for nosetests test functions that manages rng seeds.
Expand Down
3 changes: 2 additions & 1 deletion tests/python/unittest/test_gluon.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from mxnet.gluon import nn
from mxnet.test_utils import assert_almost_equal
from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
from common import setup_module, with_seed, assertRaises, teardown
from common import setup_module, with_seed, assertRaises, teardown, assert_raises_cudnn_disabled
import numpy as np
from numpy.testing import assert_array_equal
from nose.tools import raises, assert_raises
Expand Down Expand Up @@ -1259,6 +1259,7 @@ def record_name(block):


@with_seed()
@assert_raises_cudnn_disabled()
def test_summary():
net = gluon.model_zoo.vision.resnet50_v1()
net.initialize()
Expand Down
9 changes: 9 additions & 0 deletions tests/python/unittest/test_gluon_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
from numpy.testing import assert_allclose
import unittest
from mxnet.test_utils import almost_equal, assert_almost_equal
from common import assert_raises_cudnn_disabled


@assert_raises_cudnn_disabled()
def test_rnn():
cell = gluon.rnn.RNNCell(100, prefix='rnn_')
inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
Expand Down Expand Up @@ -104,6 +106,7 @@ def test_lstm():
assert outs == [(10, 100), (10, 100), (10, 100)]


@assert_raises_cudnn_disabled()
def test_lstm_forget_bias():
forget_bias = 2.0
stack = gluon.rnn.SequentialRNNCell()
Expand All @@ -124,6 +127,8 @@ def test_lstm_forget_bias():
forget_bias * np.ones(100, ), np.zeros((2 * 100,))])
assert_allclose(mod.get_params()[0][bias_argument].asnumpy(), expected_bias)


@assert_raises_cudnn_disabled()
def test_lstm_cpu_inference():
# should behave the same as lstm cell
EXPECTED_LSTM_OUTPUT = np.array([[[0.72045636, 0.72045636, 0.95215213, 0.95215213],
Expand All @@ -139,6 +144,7 @@ def test_lstm_cpu_inference():
rtol=1e-3, atol=1e-5)


@assert_raises_cudnn_disabled()
def test_gru():
cell = gluon.rnn.GRUCell(100, prefix='rnn_')
inputs = [mx.sym.Variable('rnn_t%d_data'%i) for i in range(3)]
Expand Down Expand Up @@ -419,6 +425,7 @@ def check_rnn_layer_forward(layer, inputs, states=None, run_only=False):
mx.test_utils.assert_almost_equal(np_dx, inputs.grad.asnumpy(), rtol=1e-3, atol=1e-5)


@assert_raises_cudnn_disabled()
def test_rnn_layers():
check_rnn_layer_forward(gluon.rnn.RNN(10, 2), mx.nd.ones((8, 3, 20)))
check_rnn_layer_forward(gluon.rnn.RNN(10, 2, bidirectional=True), mx.nd.ones((8, 3, 20)), mx.nd.ones((4, 3, 10)))
Expand Down Expand Up @@ -531,6 +538,8 @@ def test_cell_fill_shape():
check_rnn_forward(cell, mx.nd.ones((2, 3, 7)))
assert cell.i2h_weight.shape[1] == 7, cell.i2h_weight.shape[1]


@assert_raises_cudnn_disabled()
def test_layer_fill_shape():
layer = gluon.rnn.LSTM(10)
layer.hybridize()
Expand Down
Loading

0 comments on commit 1d8fc79

Please sign in to comment.