diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 4b1cbbe7057b..aa38c18a73ca 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -685,6 +685,7 @@ class CuDNNConvolutionOp { const int kMaxAlgos = 10; int nalgo = kMaxAlgos; int i = 0; + size_t min_memory_needs = 0; // Forward Algorithm Find/Get, v6 and earlier if (CUDNN_MAJOR == 6 && param_.layout.value() == mshadow::kNHWC) { // In cuDNNv6, for kNHWC, only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM is @@ -715,10 +716,16 @@ class CuDNNConvolutionOp { while (i < nalgo && (fwd_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == conv::kLimited - && fwd_algo[i].memory > workspace_byte))) + && fwd_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = + (i == 0) ? fwd_algo[i].memory : std::min(min_memory_needs, fwd_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a forward convolution algorithm."; + LOG(FATAL) << nalgo << " forward algorithms with minimum memory requirement " + << min_memory_needs << " bytes have been tried. Workspace size is set to " + << workspace_byte << " bytes, please consider reducing the batch/model size, " + << "or increasing workspace size."; } else { forward_algo_.Set(fwd_algo[i].algo, false); } @@ -749,10 +756,17 @@ class CuDNNConvolutionOp { while (i < nalgo && (bwd_filter_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == conv::kLimited - && bwd_filter_algo[i].memory > workspace_byte))) + && bwd_filter_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = (i == 0) ? + bwd_filter_algo[i].memory : + std::min(min_memory_needs, bwd_filter_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a backward filter convolution algorithm."; + LOG(FATAL) << nalgo << " backward filter algorithms with minimum memory requirement " + << min_memory_needs << " bytes have been tried. Workspace size is set to " + << workspace_byte << " bytes, please consider reducing the batch/model size, " + << "or increasing workspace size."; } else { back_algo_w_.Set(bwd_filter_algo[i].algo, false); } @@ -783,10 +797,17 @@ class CuDNNConvolutionOp { while (i < nalgo && (bwd_data_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == conv::kLimited - && bwd_data_algo[i].memory > workspace_byte))) + && bwd_data_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = (i == 0) ? + bwd_data_algo[i].memory : + std::min(min_memory_needs, bwd_data_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a backward data convolution algorithm."; + LOG(FATAL) << nalgo << " backward data algorithms with minimum memory requirement " + << min_memory_needs << " bytes have been tried. Workspace size is set to " + << workspace_byte << " bytes, please consider reducing the batch/model size, " + << "or increasing workspace size."; } else { back_algo_.Set(bwd_data_algo[i].algo, false); } @@ -833,7 +854,9 @@ class CuDNNConvolutionOp { } } auto mode = param_.cudnn_tune.value() == conv::kOff ? " get " : " find "; - LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " convolution algorithm."; + LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " convolution algorithm. " + << " with workspace size of " << workspace_byte << " bytes," + << " please consider reducing batch/model size or increasing the workspace size"; } void GetTempSize(const OpContext& ctx) { diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index cb0de4c961bf..74baab8f3b26 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -618,6 +618,7 @@ class CuDNNDeconvolutionOp { const int kMaxAlgos = 10; int nalgo = kMaxAlgos; int i = 0; + size_t min_memory_needs = 0; // Forward Algorithm Find/Get, v6 and earlier if (CUDNN_MAJOR == 6 && param_.layout.value() == mshadow::kNHWC) { // In cuDNNv6, for kNHWC, only CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM is @@ -648,11 +649,19 @@ class CuDNNDeconvolutionOp { while (i < nalgo && (fwd_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == deconv::kLimited - && fwd_algo[i].memory > workspace_byte))) + && fwd_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = (i == 0) ? + fwd_algo[i].memory : + std::min(min_memory_needs, fwd_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a 'forward' convolution algorithm " << - "(for use in deconvolution operator backprop-to-data)."; + LOG(FATAL) << nalgo << " forward algorithms" + << " (for use in deconvolution operator backprop-to-data)" + << " with minimum memory requirement " << min_memory_needs + << " bytes have been tried. Workspace size is set to " << workspace_byte + << " bytes, please consider reducing the batch/model size," + << " or increasing workspace size."; } else { forward_algo_.Set(fwd_algo[i].algo, false); } @@ -683,11 +692,19 @@ class CuDNNDeconvolutionOp { while (i < nalgo && (bwd_filter_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == deconv::kLimited - && bwd_filter_algo[i].memory > workspace_byte))) + && bwd_filter_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = (i == 0) ? + bwd_filter_algo[i].memory : + std::min(min_memory_needs, bwd_filter_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a backward filter convolution algorithm " << - "(for use in deconvolution operator backprop-to-filter)."; + LOG(FATAL) << nalgo << " backward filter algorithms" + << " (for use in deconvolution operator backprop-to-filter)" + << " with minimum memory requirement " << min_memory_needs + << " bytes have been tried. Workspace size is set to " << workspace_byte + << " bytes, please consider reducing the batch/model size," + << " or increasing workspace size."; } else { back_algo_w_.Set(bwd_filter_algo[i].algo, false); } @@ -718,11 +735,19 @@ class CuDNNDeconvolutionOp { while (i < nalgo && (bwd_data_algo[i].status != CUDNN_STATUS_SUCCESS || (param_.cudnn_tune.value() == deconv::kLimited - && bwd_data_algo[i].memory > workspace_byte))) + && bwd_data_algo[i].memory > workspace_byte))) { ++i; + min_memory_needs = (i == 0) ? + bwd_data_algo[i].memory : + std::min(min_memory_needs, bwd_data_algo[i].memory); + } if (i == nalgo) { - LOG(FATAL) << "Failed to find a backward data convolution algorithm." << - "(for use in deconvolution operator forward inference)."; + LOG(FATAL) << nalgo << " backward data algorithms" + << " (for use in deconvolution operator forward inference) with" + << " minimum memory requirement " << min_memory_needs + << " bytes have been tried. Workspace size is set to " << workspace_byte + << " bytes, please consider reducing the batch/model size," + << " or increasing workspace size."; } else { back_algo_.Set(bwd_data_algo[i].algo, false); } @@ -774,7 +799,9 @@ class CuDNNDeconvolutionOp { } } auto mode = param_.cudnn_tune.value() == conv::kOff ? " get " : " find "; - LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " deconvolution algorithm."; + LOG(FATAL) << "Failed to" << mode << "any " << kernel_name << " deconvolution algorithm" + << " with workspace size of " << workspace_byte << " bytes," + << " please consider reducing batch/model size or increasing the workspace size"; } void GetTempSize(const OpContext& ctx) {