Skip to content

Commit

Permalink
Merge pull request #324 from HazarathKumarM/hk/mel_filter_bank_hip
Browse files Browse the repository at this point in the history
Mel Filter Bank HIP - Address Review comments, Merge Develop changes and resolve conflicts
  • Loading branch information
r-abishek authored Aug 26, 2024
2 parents 3d8d123 + 05e8524 commit 9160062
Show file tree
Hide file tree
Showing 33 changed files with 736 additions and 321 deletions.
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rocm-docs-core[api_reference]==1.7.1
rocm-docs-core[api_reference]==1.7.2
2 changes: 1 addition & 1 deletion docs/sphinx/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core[api-reference]==1.7.1
rocm-docs-core[api-reference]==1.7.2
# via -r requirements.in
smmap==5.0.1
# via gitdb
Expand Down
44 changes: 42 additions & 2 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ SOFTWARE.
#define RPP_MAX_8U ( 255 )
/*! \brief RPP maximum dimensions in tensor \ingroup group_rppdefs \page subpage_rppt */
#define RPPT_MAX_DIMS ( 5 )
/*! \brief RPP maximum channels in audio tensor \ingroup group_rppdefs \page subpage_rppt */
#define RPPT_MAX_AUDIO_CHANNELS ( 16 )

#define CHECK_RETURN_STATUS(x) do { \
int retval = (x); \
Expand All @@ -62,6 +64,7 @@ SOFTWARE.
} while (0)

#ifdef HIP_COMPILE
#include <hip/hip_runtime.h>
#define RPP_HOST_DEVICE __host__ __device__
#else
#define RPP_HOST_DEVICE
Expand Down Expand Up @@ -708,7 +711,7 @@ typedef struct GenericFilter
*/
typedef struct RpptResamplingWindow
{
inline void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
inline RPP_HOST_DEVICE void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
{
Rpp32s xc = std::ceil(x);
*loc0 = xc - lobes;
Expand Down Expand Up @@ -742,7 +745,7 @@ typedef struct RpptResamplingWindow
Rpp32f scale = 1, center = 1;
Rpp32s lobes = 0, coeffs = 0;
Rpp32s lookupSize = 0;
std::vector<Rpp32f> lookup;
Rpp32f *lookup = nullptr;
__m128 pCenter, pScale;
} RpptResamplingWindow;

Expand Down Expand Up @@ -806,6 +809,43 @@ struct SlaneyMelScale : public BaseMelScale
public:
~SlaneyMelScale() {};
};
inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.f - (x * x * 0.16666667)) : std::sin(x) / x;
}

inline Rpp64f hann(Rpp64f x)
{
return 0.5 * (1 + std::cos(x * M_PI));
}

// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
{
Rpp32f scale = 2.0f * lobes / (coeffs - 1);
Rpp32f scale_envelope = 2.0f / coeffs;
window.coeffs = coeffs;
window.lobes = lobes;
window.lookupSize = coeffs + 5;
#ifdef GPU_SUPPORT
CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
#else
window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
#endif
Rpp32s center = (coeffs - 1) * 0.5f;
for (int i = 0; i < coeffs; i++) {
Rpp32f x = (i - center) * scale;
Rpp32f y = (i - center) * scale_envelope;
Rpp32f w = sinc(x) * hann(y);
window.lookup[i + 1] = w;
}
window.center = center + 1;
window.scale = 1 / scale;
window.pCenter = _mm_set1_ps(window.center);
window.pScale = _mm_set1_ps(window.scale);
}

/******************** HOST memory typedefs ********************/

Expand Down
27 changes: 23 additions & 4 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ extern "C" {
\n Finds the starting index and length of non silent region in the audio buffer by comparing the
calculated short-term power with cutoff value passed
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
* \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
* \param [out] detectionLengthTensor length of non silent region (1D tensor in HOST memory, of size batchSize)
Expand All @@ -70,7 +70,7 @@ RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr src
\n Finds the starting index and length of non silent region in the audio buffer by comparing the
calculated short-term power with cutoff value passed
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in Pinned/HIP memory, of size batchSize)
* \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in Pinned/HIP memory, of size batchSize)
* \param [out] detectionLengthTensor length of non silent region (1D tensor in Pinned/HIP memory, of size batchSize)
Expand Down Expand Up @@ -254,9 +254,9 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
/*! \brief Resample augmentation on HOST backend
* \details Resample augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [in] inRate Input sampling rate (1D tensor in HOST memory, of size batchSize)
* \param [in] outRate Output sampling rate (1D tensor in HOST memory, of size batchSize)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
Expand All @@ -268,6 +268,25 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
*/
RppStatus rppt_resample_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Resample augmentation on HIP backend
* \details Resample augmentation for audio data
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [in] inRate Input sampling rate (1D tensor in Pinned memory, of size batchSize)
* \param [in] outRate Output sampling rate (1D tensor in Pinned memory, of size batchSize)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in Pinned memory, of size batchSize * 2)
* \param [in] window Resampling window (struct of type RpptRpptResamplingWindow in HIP/Pinned memory)
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_resample_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! @}
*/

Expand Down
24 changes: 12 additions & 12 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_jitter_img150x150.png Sample Output
* \image html effects_augmentations_jitter_150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -491,7 +491,7 @@ RppStatus rppt_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.png Sample Input
* \image html effects_augmentations_jitter_img150x150.png Sample Output
* \image html effects_augmentations_jitter_150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param un[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down Expand Up @@ -576,8 +576,8 @@ RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_erase_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_erase_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand All @@ -601,8 +601,8 @@ RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt
* \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_glitch_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_glitch_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -623,8 +623,8 @@ RppStatus rppt_glitch_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
* \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_glitch_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_glitch_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand All @@ -645,8 +645,8 @@ RppStatus rppt_glitch_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_pixelate_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -667,8 +667,8 @@ RppStatus rppt_pixelate_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t d
* \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html effects_augmentations_pixelate_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
4 changes: 2 additions & 2 deletions include/rppt_tensor_geometric_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -613,8 +613,8 @@ RppStatus rppt_remap_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
* \details Performs a remap operation using user specified remap tables for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout. For each image, the output(x,y) = input(mapx(x, y), mapy(x, y)) for every (x,y) in the destination image.<br>
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
* - dstPtr depth ranges - Will be same depth as srcPtr.
* \image html img150x150.jpg Sample Input
* \image html geometric_augmentations_remap_img150x150.jpg Sample Output
* \image html img150x150.png Sample Input
* \image html geometric_augmentations_remap_img150x150.png Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
6 changes: 0 additions & 6 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
}

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
}

inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
{
coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));
Expand Down
5 changes: 5 additions & 0 deletions src/include/hip/rpp_hip_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ struct RPPTensorFunctionMetaData
(byte & 0x02 ? '1' : '0'), \
(byte & 0x01 ? '1' : '0')

// float4 floor

#define FLOOR4(src, dst) \
dst = make_int4(floorf(src.x), floorf(src.y), floorf(src.z), floorf(src.w));

/******************** HOST FUNCTIONS ********************/

inline int getplnpkdind(RppiChnFormat &format)
Expand Down
2 changes: 1 addition & 1 deletion src/modules/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ struct HandleImpl
- 293 is the size required for storing reduction outputs for 600000 size sample
- 128 is the size required for storing cutOffDB values for batch size 128 */
hipMalloc(&(this->initHandle->mem.mgpu.scratchBufferHip.floatmem), sizeof(Rpp32f) * 76853888);
hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400);
hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160
}
};

Expand Down
1 change: 1 addition & 0 deletions src/modules/hip/hip_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ SOFTWARE.
#include "kernel/mel_filter_bank.hpp"
#include "kernel/pre_emphasis_filter.hpp"
#include "kernel/to_decibels.hpp"
#include "kernel/resample.hpp"

#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
Loading

0 comments on commit 9160062

Please sign in to comment.