diff --git a/src/operator/nn/cudnn/cudnn_softmax_activation-inl.h b/src/operator/nn/cudnn/cudnn_softmax_activation-inl.h index 239da023668d..0845eb79fd60 100644 --- a/src/operator/nn/cudnn/cudnn_softmax_activation-inl.h +++ b/src/operator/nn/cudnn/cudnn_softmax_activation-inl.h @@ -48,7 +48,7 @@ class CuDNNSoftmaxActivationOp { } void Forward(const OpContext &ctx, const TBlob &in_data, - const OpReqType &req, const TBlob &out_data) { + const OpReqType &req, const TBlob &out_data) { using namespace mshadow; using namespace mshadow::expr; Stream *s = ctx.get_stream(); @@ -102,14 +102,14 @@ class CuDNNSoftmaxActivationOp { } void Backward(const OpContext &ctx, const TBlob &out_grad, - const TBlob &out_data, const OpReqType &req, const TBlob &in_grad) { + const TBlob &out_data, const OpReqType &req, + const TBlob &in_grad) { using namespace mshadow; using namespace mshadow::expr; float alpha = 1.0f; float beta = 0.0f; Stream *s = ctx.get_stream(); Tensor grad; - Tensor data; Tensor output_data; Tensor input_grad; cudnnSoftmaxMode_t softmax_mode; @@ -141,6 +141,13 @@ class CuDNNSoftmaxActivationOp { softmax_mode = CUDNN_SOFTMAX_MODE_CHANNEL; } CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + CUDNN_CALL(cudnnSetTensor4dDescriptor(shape_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + input_grad.shape_[0], + input_grad.shape_[1], + input_grad.shape_[2], + input_grad.shape_[3])); CUDNN_CALL(cudnnSoftmaxBackward(s->dnn_handle_, CUDNN_SOFTMAX_ACCURATE, softmax_mode, diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index c7df2eae9b26..43c62e18845b 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -1855,6 +1855,26 @@ def test_create_sparse_ndarray_gpu_to_cpu(): assert(same(rsp_copy.asnumpy(), rsp_created.asnumpy())) +@with_seed() +def test_softmax_activation(): + gpu_a = mx.nd.array([[3., 0.5, -0.5, 2., 7.], + [2., -.4, 7., 3., 0.2]], ctx=mx.gpu(0)) + cpu_a = mx.nd.array([[3., 0.5, -0.5, 2., 7.], + [2., -.4, 7., 3., 0.2]], ctx=mx.cpu()) + + cpu_a.attach_grad() + gpu_a.attach_grad() + with mx.autograd.record(): + gpu_y = mx.nd.SoftmaxActivation(data = gpu_a) + cpu_y = mx.nd.SoftmaxActivation(data = cpu_a) + assert_almost_equal(cpu_y.asnumpy(), gpu_y.asnumpy(), atol = 1e-3, rtol = 1e-3) + + gpu_y.backward() + cpu_y.backward() + assert_almost_equal(cpu_a.grad.asnumpy(), gpu_a.grad.asnumpy(), + atol = 1e-3, rtol = 1e-3) + + if __name__ == '__main__': import nose nose.runmodule()