Skip to content

Commit

Permalink
keep cufftPlan2d across ConvolveImpl::convolve calls
Browse files Browse the repository at this point in the history
on some CUDA versions creating/destroying cufftPlan2d is very time consuming
we now create them in ConvolveImpl::create() and destroy them in the dtor

this solves issue #3385
  • Loading branch information
r2d3 committed Nov 30, 2022
1 parent d6102ef commit 4cba139
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions modules/cudaarithm/src/arithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ namespace
{
public:
explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {}
~ConvolutionImpl();

void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null());

Expand All @@ -452,6 +453,8 @@ namespace
Size user_block_size;
Size dft_size;

cufftHandle planR2C, planC2R;

GpuMat image_spect, templ_spect, result_spect;
GpuMat image_block, templ_block, result_data;
};
Expand Down Expand Up @@ -491,6 +494,15 @@ namespace
// Use maximum result matrix block size for the estimated DFT block size
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);

cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
}

ConvolutionImpl::~ConvolutionImpl()
{
cufftSafeCall( cufftDestroy(planR2C) );
cufftSafeCall( cufftDestroy(planC2R) );
}

Size ConvolutionImpl::estimateBlockSize(Size result_size)
Expand All @@ -516,10 +528,6 @@ namespace

cudaStream_t stream = StreamAccessor::getStream(_stream);

cufftHandle planR2C, planC2R;
cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );

cufftSafeCall( cufftSetStream(planR2C, stream) );
cufftSafeCall( cufftSetStream(planC2R, stream) );

Expand Down Expand Up @@ -559,9 +567,6 @@ namespace
}
}

cufftSafeCall( cufftDestroy(planR2C) );
cufftSafeCall( cufftDestroy(planC2R) );

syncOutput(result, _result, _stream);
}
}
Expand Down

0 comments on commit 4cba139

Please sign in to comment.