Skip to content

Commit

Permalink
add interleaved versions of phase/cartToPolar/polarToCart
Browse files Browse the repository at this point in the history
This PR is for performance only (at the cost of more template code and increased GPU code size)
The additional variants can help the caller skip the creation of temporary GPU mats (where memory is more likely to be a critical resource), and can even allow in-place processing.
magnitude/angles/x/y are often already interleaved when dealing with DFTs.
  • Loading branch information
chacha21 committed Dec 12, 2023
1 parent 3c5635e commit f19a582
Show file tree
Hide file tree
Showing 4 changed files with 458 additions and 0 deletions.
53 changes: 53 additions & 0 deletions modules/cudaarithm/include/opencv2/cudaarithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,17 @@ CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude
*/
CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Computes polar angles of complex matrix elements.
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
@param angle Destination matrix of angles ( CV_32FC1 ).
@param angleInDegrees Flag for angles that must be evaluated in degrees.
@param stream Stream for the asynchronous version.
@sa phase
*/
CV_EXPORTS_W void phase(InputArray xy, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts Cartesian coordinates into polar.
@param x Source matrix containing real components ( CV_32FC1 ).
Expand All @@ -446,6 +457,29 @@ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angl
*/
CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts Cartesian coordinates into polar.
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ).
@param angle Destination matrix of angles ( CV_32FC1 ).
@param angleInDegrees Flag for angles that must be evaluated in degrees.
@param stream Stream for the asynchronous version.
@sa cartToPolar
*/
CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts Cartesian coordinates into polar.
@param xy Source matrix containing real and imaginary components ( CV_32FC2 ).
@param magnitudeAngle Destination matrix of float magnitudes and angles ( CV_32FC2 ).
@param angleInDegrees Flag for angles that must be evaluated in degrees.
@param stream Stream for the asynchronous version.
@sa cartToPolar
*/
CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitudeAngle, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts polar coordinates into Cartesian.
@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ).
Expand All @@ -457,6 +491,25 @@ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude,
*/
CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts polar coordinates into Cartesian.
@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ).
@param angle Source matrix containing angles ( same type as magnitude ).
@param xy Destination matrix of real and imaginary components ( same depth as magnitude, i.e. CV_32FC2 or CV_64FC2 ).
@param angleInDegrees Flag that indicates angles in degrees.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null());

/** @brief Converts polar coordinates into Cartesian.
@param magnitudeAngle Source matrix containing magnitudes and angles ( CV_32FC2 or CV_64FC2 ).
@param xy Destination matrix of real and imaginary components ( same depth as source ).
@param angleInDegrees Flag that indicates angles in degrees.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS_W void polarToCart(InputArray magnitudeAngle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null());

//! @} cudaarithm_elem

//! @addtogroup cudaarithm_core
Expand Down
208 changes: 208 additions & 0 deletions modules/cudaarithm/src/cuda/polar_cart.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,25 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
syncOutput(dst, _dst, stream);
}

void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stream& stream)
{
GpuMat xy = getInputMat(_xy, stream);

CV_Assert( xy.type() == CV_32FC2 );

GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream);

GpuMat_<float2> xyc(xy.reshape(2));
GpuMat_<float> anglec(dst.reshape(1));

if (angleInDegrees)
gridTransformUnary(xyc, anglec, direction_interleaved_func<float2, true>(), stream);
else
gridTransformUnary(xyc, anglec, direction_interleaved_func<float2, false>(), stream);

syncOutput(dst, _dst, stream);
}

void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{
GpuMat x = getInputMat(_x, stream);
Expand Down Expand Up @@ -155,6 +174,71 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
syncOutput(angle, _angle, stream);
}

void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{
GpuMat xy = getInputMat(_xy, stream);

CV_Assert( xy.type() == CV_32FC2 );

GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream);
GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream);

GpuMat_<float2> xyc(xy.reshape(2));
GpuMat_<float> magc(mag.reshape(1));
GpuMat_<float> anglec(angle.reshape(1));

if (angleInDegrees)
{
gridTransformTuple(xyc,
tie(magc, anglec),
make_tuple(
magnitude_interleaved_func<float2>(),
direction_interleaved_func<float2, true>()),
stream);
}
else
{
gridTransformTuple(xyc,
tie(magc, anglec),
make_tuple(
magnitude_interleaved_func<float2>(),
direction_interleaved_func<float2, false>()),
stream);
}

syncOutput(mag, _mag, stream);
syncOutput(angle, _angle, stream);
}

void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDegrees, Stream& stream)
{
GpuMat xy = getInputMat(_xy, stream);

CV_Assert( xy.type() == CV_32FC2 );

GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream);

GpuMat_<float2> xyc(xy.reshape(2));
GpuMat_<float2> magAnglec(magAngle.reshape(2));

if (angleInDegrees)
{
gridTransformUnary(xyc,
magAnglec,
magnitude_direction_interleaved_func<float2, true>(),
stream);
}
else
{
gridTransformUnary(xyc,
magAnglec,
magnitude_direction_interleaved_func<float2, false>(),
stream);
}

syncOutput(magAngle, _magAngle, stream);
}

namespace
{
template <typename T> struct sincos_op
Expand Down Expand Up @@ -192,6 +276,49 @@ namespace
ymat(y, x) = mag_val * sin_a;
}

template <typename T, bool useMag>
__global__ void polarToCartDstInterleavedImpl_(const GlobPtr<T> mag, const GlobPtr<T> angle, GlobPtr<typename MakeVec<T, 2>::type > xymat, const T scale, const int rows, const int cols)
{
typedef typename MakeVec<T, 2>::type T2;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;

if (x >= cols || y >= rows)
return;

const T mag_val = useMag ? mag(y, x) : static_cast<T>(1.0);
const T angle_val = angle(y, x);

T sin_a, cos_a;
sincos_op<T> op;
op(scale * angle_val, &sin_a, &cos_a);

const T2 xy = {mag_val * cos_a, mag_val * sin_a};
xymat(y, x) = xy;
}

template <typename T, bool useMag>
__global__ void polarToCartInterleavedImpl_(const GlobPtr<typename MakeVec<T, 2>::type > magAngle, GlobPtr<typename MakeVec<T, 2>::type > xymat, const T scale, const int rows, const int cols)
{
typedef typename MakeVec<T, 2>::type T2;
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;

if (x >= cols || y >= rows)
return;

const T2 magAngle_val = magAngle(y, x);
const T mag_val = useMag ? magAngle_val.x : static_cast<T>(1.0);
const T angle_val = magAngle_val.y;

T sin_a, cos_a;
sincos_op<T> op;
op(scale * angle_val, &sin_a, &cos_a);

const T2 xy = {mag_val * cos_a, mag_val * sin_a};
xymat(y, x) = xy;
}

template <typename T>
void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream)
{
Expand All @@ -210,6 +337,43 @@ namespace
else
polarToCartImpl_<T, true> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols);
}

template <typename T>
void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream)
{
typedef typename MakeVec<T, 2>::type T2;
GpuMat_<T2> xyc(xy.reshape(2));
GpuMat_<T> magc(mag.reshape(1));
GpuMat_<T> anglec(angle.reshape(1));

const dim3 block(32, 8);
const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y));

const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);

if (magc.empty())
polarToCartDstInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols);
else
polarToCartDstInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xyc), scale, anglec.rows, anglec.cols);
}

template <typename T>
void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream)
{
typedef typename MakeVec<T, 2>::type T2;
GpuMat_<T2> xyc(xy.reshape(2));
GpuMat_<T2> magAnglec(magAngle.reshape(2));

const dim3 block(32, 8);
const dim3 grid(divUp(magAnglec.cols, block.x), divUp(magAnglec.rows, block.y));

const T scale = angleInDegrees ? static_cast<T>(CV_PI / 180.0) : static_cast<T>(1.0);

if (magAnglec.empty())
polarToCartInterleavedImpl_<T, false> << <grid, block, 0, stream >> >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols);
else
polarToCartInterleavedImpl_<T, true> << <grid, block, 0, stream >> >(shrinkPtr(magAnglec), shrinkPtr(xyc), scale, magAnglec.rows, magAnglec.cols);
}
}

void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
Expand Down Expand Up @@ -237,4 +401,48 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}

void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _xy, bool angleInDegrees, Stream& _stream)
{
typedef void(*func_t)(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream);
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartDstInterleavedImpl<float>, polarToCartDstInterleavedImpl<double> };

GpuMat mag = getInputMat(_mag, _stream);
GpuMat angle = getInputMat(_angle, _stream);

CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F);
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );

GpuMat xy = getOutputMat(_xy, angle.size(), CV_MAKETYPE(angle.depth(), 2), _stream);

cudaStream_t stream = StreamAccessor::getStream(_stream);
funcs[angle.depth()](mag, angle, xy, angleInDegrees, stream);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );

syncOutput(xy, _xy, _stream);

if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}

void cv::cuda::polarToCart(InputArray _magAngle, OutputArray _xy, bool angleInDegrees, Stream& _stream)
{
typedef void(*func_t)(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream);
static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartInterleavedImpl<float>, polarToCartInterleavedImpl<double> };

GpuMat magAngle = getInputMat(_magAngle, _stream);

CV_Assert(magAngle.type() == CV_32FC2 || magAngle.type() == CV_64FC2);

GpuMat xy = getOutputMat(_xy, magAngle.size(), magAngle.type(), _stream);

cudaStream_t stream = StreamAccessor::getStream(_stream);
funcs[magAngle.depth()](magAngle, xy, angleInDegrees, stream);
CV_CUDEV_SAFE_CALL( cudaGetLastError() );

syncOutput(xy, _xy, _stream);

if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}

#endif
Loading

0 comments on commit f19a582

Please sign in to comment.