Skip to content

Commit

Permalink
Merge pull request #306 from sampath1117/sr/resample_develop_merge
Browse files Browse the repository at this point in the history
Resample HIP develop merge
  • Loading branch information
r-abishek authored Aug 5, 2024
2 parents 6c3c214 + b1c87ea commit b59c519
Show file tree
Hide file tree
Showing 14 changed files with 623 additions and 79 deletions.
41 changes: 40 additions & 1 deletion include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ SOFTWARE.
} while (0)

#ifdef HIP_COMPILE
#include <hip/hip_runtime.h>
#define RPP_HOST_DEVICE __host__ __device__
#else
#define RPP_HOST_DEVICE
Expand Down Expand Up @@ -742,10 +743,48 @@ typedef struct RpptResamplingWindow
Rpp32f scale = 1, center = 1;
Rpp32s lobes = 0, coeffs = 0;
Rpp32s lookupSize = 0;
Rpp32f *lookup;
Rpp32f *lookup = nullptr;
__m128 pCenter, pScale;
} RpptResamplingWindow;

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * (1.0f / 6)) : std::sin(x) / x;
}

inline Rpp64f hann(Rpp64f x)
{
return 0.5 * (1 + std::cos(x * M_PI));
}

// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
{
Rpp32f scale = 2.0f * lobes / (coeffs - 1);
Rpp32f scale_envelope = 2.0f / coeffs;
window.coeffs = coeffs;
window.lobes = lobes;
window.lookupSize = coeffs + 5;
#ifdef GPU_SUPPORT
CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
#else
window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
#endif
Rpp32s center = (coeffs - 1) * 0.5f;
for (int i = 0; i < coeffs; i++) {
Rpp32f x = (i - center) * scale;
Rpp32f y = (i - center) * scale_envelope;
Rpp32f w = sinc(x) * hann(y);
window.lookup[i + 1] = w;
}
window.center = center + 1;
window.scale = 1 / scale;
window.pCenter = _mm_set1_ps(window.center);
window.pScale = _mm_set1_ps(window.scale);
}

/******************** HOST memory typedefs ********************/

/*! \brief RPP HOST 32-bit float memory
Expand Down
32 changes: 16 additions & 16 deletions include/rppt_tensor_arithmetic_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ extern "C" {
* It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
* adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -70,8 +70,8 @@ RppStatus rppt_fused_multiply_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPt
* It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
* adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -92,8 +92,8 @@ RppStatus rppt_fused_multiply_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr
* \details This function performs the addition operation on a batch of 4D tensors.
* It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -113,8 +113,8 @@ RppStatus rppt_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
* \details This function performs the addition operation on a batch of 4D tensors.
* It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -134,8 +134,8 @@ RppStatus rppt_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDesc
* \details This function performs the subtraction operation on a batch of 4D tensors.
* It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -155,8 +155,8 @@ RppStatus rppt_subtract_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
* \details This function performs the subtraction operation on a batch of 4D tensors.
* It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -176,8 +176,8 @@ RppStatus rppt_subtract_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGeneri
* \details This function performs the multiplication operation on a batch of 4D tensors.
* It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -197,8 +197,8 @@ RppStatus rppt_multiply_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
* \details This function performs the multiplication operation on a batch of 4D tensors.
* It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand Down
46 changes: 41 additions & 5 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
#endif // GPU_SUPPORT

/*! \brief To Decibels augmentation on HOST backend
* \details To Decibels augmentation for 1D audio buffer converts magnitude values to decibel values
* \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [in] srcDims source tensor sizes for each element in batch (2D tensor in HOST memory, of size batchSize * 2)
* \param [in] cutOffDB minimum or cut-off ratio in dB
* \param [in] multiplier factor by which the logarithm is multiplied
Expand All @@ -103,6 +103,25 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
*/
RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief To Decibels augmentation on HIP backend
* \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [in] srcDims source tensor sizes for each element in batch (2D tensor in Pinned/HIP memory, of size batchSize * 2)
* \param [in] cutOffDB minimum or cut-off ratio in dB
* \param [in] multiplier factor by which the logarithm is multiplied
* \param [in] referenceMagnitude Reference magnitude if not provided maximum value of input used as reference
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_to_decibels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Pre Emphasis Filter augmentation on HOST backend
* \details Pre Emphasis Filter augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
Expand All @@ -122,9 +141,9 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
/*! \brief Down Mixing augmentation on HOST backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
Expand All @@ -134,6 +153,23 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
*/
RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Down Mixing augmentation on HIP backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HIP/Pinned memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
* \details Spectrogram for 1D audio buffer
* \param [in] srcPtr source tensor in HOST memory
Expand Down
8 changes: 4 additions & 4 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,8 @@ RppStatus rppt_jitter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
/*! \brief Gaussian noise augmentation on HOST backend
* \details This function adds gaussian noise to a batch of 4D tensors.
* Support added for u8 -> u8, f32 -> f32 datatypes.
* \image html input150x150x4.gif Sample Input
* \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcGenericDescPtr source tensor descriptor
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -534,8 +534,8 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src
/*! \brief Gaussian noise augmentation on HIP backend
* \details This function adds gaussian noise to a batch of 4D tensors.
* Support added for u8 -> u8, f32 -> f32 datatypes.
* \image html input150x150x4.gif Sample Input
* \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcGenericDescPtr source tensor descriptor
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
8 changes: 4 additions & 4 deletions include/rppt_tensor_geometric_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,8 @@ RppStatus rppt_crop_and_patch_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPt
/*! \brief Flip voxel augmentation HOST
* \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
<br> Support added for f32 -> f32 and u8 -> u8 dataypes.
* \image html input150x150x4.gif Sample Input
* \image html geometric_augmentations_flip_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -564,8 +564,8 @@ RppStatus rppt_flip_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
/*! \brief Flip voxel augmentation GPU
* \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
<br> Support added for f32 -> f32 and u8 -> u8 dataypes.
* \image html input150x150x4.gif Sample Input
* \image html geometric_augmentations_flip_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
6 changes: 0 additions & 6 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
}

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
}

inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
{
coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));
Expand Down
2 changes: 2 additions & 0 deletions src/modules/hip/hip_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ SOFTWARE.
#define HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP

#include "kernel/non_silent_region_detection.hpp"
#include "kernel/down_mixing.hpp"
#include "kernel/to_decibels.hpp"
#include "kernel/resample.hpp"

#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
Loading

0 comments on commit b59c519

Please sign in to comment.