Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mel Filter Bank HIP - Address Review comments, Merge Develop changes and resolve conflicts #324

2 changes: 1 addition & 1 deletion docs/sphinx/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rocm-docs-core[api_reference]==1.7.1
rocm-docs-core[api_reference]==1.7.2
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core[api-reference]==1.7.1
rocm-docs-core[api-reference]==1.7.2
# via -r requirements.in
smmap==5.0.1
# via gitdb
Expand Down
44 changes: 42 additions & 2 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ SOFTWARE.
#define RPP_MAX_8U ( 255 )
/*! \brief RPP maximum dimensions in tensor \ingroup group_rppdefs \page subpage_rppt */
#define RPPT_MAX_DIMS ( 5 )
/*! \brief RPP maximum channels in audio tensor \ingroup group_rppdefs \page subpage_rppt */
#define RPPT_MAX_AUDIO_CHANNELS ( 16 )

#define CHECK_RETURN_STATUS(x) do { \
int retval = (x); \
Expand All @@ -62,6 +64,7 @@ SOFTWARE.
} while (0)

#ifdef HIP_COMPILE
#include <hip/hip_runtime.h>
#define RPP_HOST_DEVICE __host__ __device__
#else
#define RPP_HOST_DEVICE
Expand Down Expand Up @@ -708,7 +711,7 @@ typedef struct GenericFilter
*/
typedef struct RpptResamplingWindow
{
inline void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
inline RPP_HOST_DEVICE void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
{
Rpp32s xc = std::ceil(x);
*loc0 = xc - lobes;
Expand Down Expand Up @@ -742,7 +745,7 @@ typedef struct RpptResamplingWindow
Rpp32f scale = 1, center = 1;
Rpp32s lobes = 0, coeffs = 0;
Rpp32s lookupSize = 0;
std::vector<Rpp32f> lookup;
Rpp32f *lookup = nullptr;
__m128 pCenter, pScale;
} RpptResamplingWindow;

Expand Down Expand Up @@ -806,6 +809,43 @@ struct SlaneyMelScale : public BaseMelScale
public:
~SlaneyMelScale() {};
};
inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.f - (x * x * 0.16666667)) : std::sin(x) / x;
}

inline Rpp64f hann(Rpp64f x)
{
return 0.5 * (1 + std::cos(x * M_PI));
}

// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
{
Rpp32f scale = 2.0f * lobes / (coeffs - 1);
Rpp32f scale_envelope = 2.0f / coeffs;
window.coeffs = coeffs;
window.lobes = lobes;
window.lookupSize = coeffs + 5;
#ifdef GPU_SUPPORT
CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
#else
window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
#endif
Rpp32s center = (coeffs - 1) * 0.5f;
for (int i = 0; i < coeffs; i++) {
Rpp32f x = (i - center) * scale;
Rpp32f y = (i - center) * scale_envelope;
Rpp32f w = sinc(x) * hann(y);
window.lookup[i + 1] = w;
}
window.center = center + 1;
window.scale = 1 / scale;
window.pCenter = _mm_set1_ps(window.center);
window.pScale = _mm_set1_ps(window.scale);
}

/******************** HOST memory typedefs ********************/

Expand Down
27 changes: 23 additions & 4 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ extern "C" {
\n Finds the starting index and length of non silent region in the audio buffer by comparing the
calculated short-term power with cutoff value passed
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
* \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
* \param [out] detectionLengthTensor length of non silent region (1D tensor in HOST memory, of size batchSize)
Expand All @@ -70,7 +70,7 @@ RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr src
\n Finds the starting index and length of non silent region in the audio buffer by comparing the
calculated short-term power with cutoff value passed
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in Pinned/HIP memory, of size batchSize)
* \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in Pinned/HIP memory, of size batchSize)
* \param [out] detectionLengthTensor length of non silent region (1D tensor in Pinned/HIP memory, of size batchSize)
Expand Down Expand Up @@ -254,9 +254,9 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
/*! \brief Resample augmentation on HOST backend
* \details Resample augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [in] inRate Input sampling rate (1D tensor in HOST memory, of size batchSize)
* \param [in] outRate Output sampling rate (1D tensor in HOST memory, of size batchSize)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
Expand All @@ -268,6 +268,25 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
*/
RppStatus rppt_resample_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Resample augmentation on HIP backend
* \details Resample augmentation for audio data
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [in] inRate Input sampling rate (1D tensor in Pinned memory, of size batchSize)
* \param [in] outRate Output sampling rate (1D tensor in Pinned memory, of size batchSize)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in Pinned memory, of size batchSize * 2)
* \param [in] window Resampling window (struct of type RpptRpptResamplingWindow in HIP/Pinned memory)
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_resample_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! @}
*/

Expand Down
24 changes: 12 additions & 12 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_jitter_img150x150.png Sample Output
* \image html effects_augmentations_jitter_150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -491,7 +491,7 @@ RppStatus rppt_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_jitter_img150x150.png Sample Output
* \image html effects_augmentations_jitter_150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param un[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down Expand Up @@ -576,8 +576,8 @@ RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_erase_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_erase_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand All @@ -601,8 +601,8 @@ RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt
* \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_glitch_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_glitch_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -623,8 +623,8 @@ RppStatus rppt_glitch_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
* \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_glitch_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_glitch_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand All @@ -645,8 +645,8 @@ RppStatus rppt_glitch_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_pixelate_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -667,8 +667,8 @@ RppStatus rppt_pixelate_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t d
* \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_pixelate_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
4 changes: 2 additions & 2 deletions include/rppt_tensor_geometric_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,8 @@ RppStatus rppt_remap_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details Performs a remap operation using user specified remap tables for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout. For each image, the output(x,y) = input(mapx(x, y), mapy(x, y)) for every (x,y) in the destination image.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html geometric_augmentations_remap_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html geometric_augmentations_remap_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
6 changes: 0 additions & 6 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
}

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
}

inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
{
coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));
Expand Down
5 changes: 5 additions & 0 deletions src/include/hip/rpp_hip_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ struct RPPTensorFunctionMetaData
(byte & 0x02 ? '1' : '0'), \
(byte & 0x01 ? '1' : '0')

// float4 floor

#define FLOOR4(src, dst) \
dst = make_int4(floorf(src.x), floorf(src.y), floorf(src.z), floorf(src.w));

/******************** HOST FUNCTIONS ********************/

inline int getplnpkdind(RppiChnFormat &format)
Expand Down
2 changes: 1 addition & 1 deletion src/modules/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ struct HandleImpl
- 293 is the size required for storing reduction outputs for 600000 size sample
- 128 is the size required for storing cutOffDB values for batch size 128 */
hipMalloc(&(this->initHandle->mem.mgpu.scratchBufferHip.floatmem), sizeof(Rpp32f) * 76853888);
hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400);
hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160
}
};

Expand Down
1 change: 1 addition & 0 deletions src/modules/hip/hip_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ SOFTWARE.
#include "kernel/mel_filter_bank.hpp"
#include "kernel/pre_emphasis_filter.hpp"
#include "kernel/to_decibels.hpp"
#include "kernel/resample.hpp"

#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
Loading