Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resample HIP develop merge #306

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ SOFTWARE.
} while (0)

#ifdef HIP_COMPILE
#include <hip/hip_runtime.h>
#define RPP_HOST_DEVICE __host__ __device__
#else
#define RPP_HOST_DEVICE
Expand Down Expand Up @@ -742,10 +743,48 @@ typedef struct RpptResamplingWindow
Rpp32f scale = 1, center = 1;
Rpp32s lobes = 0, coeffs = 0;
Rpp32s lookupSize = 0;
Rpp32f *lookup;
Rpp32f *lookup = nullptr;
__m128 pCenter, pScale;
} RpptResamplingWindow;

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * (1.0f / 6)) : std::sin(x) / x;
}

inline Rpp64f hann(Rpp64f x)
{
return 0.5 * (1 + std::cos(x * M_PI));
}

// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
{
Rpp32f scale = 2.0f * lobes / (coeffs - 1);
Rpp32f scale_envelope = 2.0f / coeffs;
window.coeffs = coeffs;
window.lobes = lobes;
window.lookupSize = coeffs + 5;
#ifdef GPU_SUPPORT
CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
#else
window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
#endif
Rpp32s center = (coeffs - 1) * 0.5f;
for (int i = 0; i < coeffs; i++) {
Rpp32f x = (i - center) * scale;
Rpp32f y = (i - center) * scale_envelope;
Rpp32f w = sinc(x) * hann(y);
window.lookup[i + 1] = w;
}
window.center = center + 1;
window.scale = 1 / scale;
window.pCenter = _mm_set1_ps(window.center);
window.pScale = _mm_set1_ps(window.scale);
}

/******************** HOST memory typedefs ********************/

/*! \brief RPP HOST 32-bit float memory
Expand Down
32 changes: 16 additions & 16 deletions include/rppt_tensor_arithmetic_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ extern "C" {
* It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
* adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -70,8 +70,8 @@ RppStatus rppt_fused_multiply_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPt
* It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
* adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -92,8 +92,8 @@ RppStatus rppt_fused_multiply_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr
* \details This function performs the addition operation on a batch of 4D tensors.
* It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -113,8 +113,8 @@ RppStatus rppt_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
* \details This function performs the addition operation on a batch of 4D tensors.
* It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -134,8 +134,8 @@ RppStatus rppt_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDesc
* \details This function performs the subtraction operation on a batch of 4D tensors.
* It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -155,8 +155,8 @@ RppStatus rppt_subtract_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
* \details This function performs the subtraction operation on a batch of 4D tensors.
* It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand All @@ -176,8 +176,8 @@ RppStatus rppt_subtract_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGeneri
* \details This function performs the multiplication operation on a batch of 4D tensors.
* It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HOST memory
Expand All @@ -197,8 +197,8 @@ RppStatus rppt_multiply_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
* \details This function performs the multiplication operation on a batch of 4D tensors.
* It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
* Support added for f32 -> f32 dataype.
* \image html input150x150x4.gif Sample Input
* \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param[in] srcGenericDescPtr source tensor descriptor
* \param[out] dstPtr destination tensor in HIP memory
Expand Down
46 changes: 41 additions & 5 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
#endif // GPU_SUPPORT

/*! \brief To Decibels augmentation on HOST backend
* \details To Decibels augmentation for 1D audio buffer converts magnitude values to decibel values
* \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [in] srcDims source tensor sizes for each element in batch (2D tensor in HOST memory, of size batchSize * 2)
* \param [in] cutOffDB minimum or cut-off ratio in dB
* \param [in] multiplier factor by which the logarithm is multiplied
Expand All @@ -103,6 +103,25 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
*/
RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief To Decibels augmentation on HIP backend
* \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
* \param [in] srcDims source tensor sizes for each element in batch (2D tensor in Pinned/HIP memory, of size batchSize * 2)
* \param [in] cutOffDB minimum or cut-off ratio in dB
* \param [in] multiplier factor by which the logarithm is multiplied
* \param [in] referenceMagnitude Reference magnitude if not provided maximum value of input used as reference
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_to_decibels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Pre Emphasis Filter augmentation on HOST backend
* \details Pre Emphasis Filter augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
Expand All @@ -122,9 +141,9 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
/*! \brief Down Mixing augmentation on HOST backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
Expand All @@ -134,6 +153,23 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
*/
RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Down Mixing augmentation on HIP backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HIP/Pinned memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
* \details Spectrogram for 1D audio buffer
* \param [in] srcPtr source tensor in HOST memory
Expand Down
8 changes: 4 additions & 4 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,8 @@ RppStatus rppt_jitter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
/*! \brief Gaussian noise augmentation on HOST backend
* \details This function adds gaussian noise to a batch of 4D tensors.
* Support added for u8 -> u8, f32 -> f32 datatypes.
* \image html input150x150x4.gif Sample Input
* \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcGenericDescPtr source tensor descriptor
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -534,8 +534,8 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src
/*! \brief Gaussian noise augmentation on HIP backend
* \details This function adds gaussian noise to a batch of 4D tensors.
* Support added for u8 -> u8, f32 -> f32 datatypes.
* \image html input150x150x4.gif Sample Input
* \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcGenericDescPtr source tensor descriptor
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
8 changes: 4 additions & 4 deletions include/rppt_tensor_geometric_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,8 @@ RppStatus rppt_crop_and_patch_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPt
/*! \brief Flip voxel augmentation HOST
* \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
<br> Support added for f32 -> f32 and u8 -> u8 dataypes.
* \image html input150x150x4.gif Sample Input
* \image html geometric_augmentations_flip_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HOST memory
Expand All @@ -564,8 +564,8 @@ RppStatus rppt_flip_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
/*! \brief Flip voxel augmentation GPU
* \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
<br> Support added for f32 -> f32 and u8 -> u8 dataypes.
* \image html input150x150x4.gif Sample Input
* \image html geometric_augmentations_flip_150x150x4.gif Sample Output
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
* \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
* \param [out] dstPtr destination tensor in HIP memory
Expand Down
6 changes: 0 additions & 6 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
}

inline Rpp32f sinc(Rpp32f x)
{
x *= M_PI;
return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
}

inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
{
coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));
Expand Down
2 changes: 2 additions & 0 deletions src/modules/hip/hip_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ SOFTWARE.
#define HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP

#include "kernel/non_silent_region_detection.hpp"
#include "kernel/down_mixing.hpp"
#include "kernel/to_decibels.hpp"
#include "kernel/resample.hpp"

#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
Loading