Merge pull request #306 from sampath1117/sr/resample_develop_merge

Resample HIP develop merge
r-abishek · Aug 5, 2024 · b59c519 · b59c519
2 parents 6c3c214 + b1c87ea
commit b59c519
Show file tree

Hide file tree

Showing 14 changed files with 623 additions and 79 deletions.
diff --git a/include/rppdefs.h b/include/rppdefs.h
@@ -64,6 +64,7 @@ SOFTWARE.
 } while (0)
 
 #ifdef HIP_COMPILE
+#include <hip/hip_runtime.h>
 #define RPP_HOST_DEVICE __host__ __device__
 #else
 #define RPP_HOST_DEVICE
@@ -742,10 +743,48 @@ typedef struct RpptResamplingWindow
     Rpp32f scale = 1, center = 1;
     Rpp32s lobes = 0, coeffs = 0;
     Rpp32s lookupSize = 0;
-    Rpp32f *lookup;
+    Rpp32f *lookup = nullptr;
     __m128 pCenter, pScale;
 } RpptResamplingWindow;
 
+inline Rpp32f sinc(Rpp32f x)
+{
+    x *= M_PI;
+    return (std::abs(x) < 1e-5f) ? (1.0f - x * x * (1.0f / 6)) : std::sin(x) / x;
+}
+
+inline Rpp64f hann(Rpp64f x)
+{
+    return 0.5 * (1 + std::cos(x * M_PI));
+}
+
+// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
+// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
+inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
+{
+    Rpp32f scale = 2.0f * lobes / (coeffs - 1);
+    Rpp32f scale_envelope = 2.0f / coeffs;
+    window.coeffs = coeffs;
+    window.lobes = lobes;
+    window.lookupSize = coeffs + 5;
+#ifdef GPU_SUPPORT
+    CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
+#else
+    window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
+#endif
+    Rpp32s center = (coeffs - 1) * 0.5f;
+    for (int i = 0; i < coeffs; i++) {
+        Rpp32f x = (i - center) * scale;
+        Rpp32f y = (i - center) * scale_envelope;
+        Rpp32f w = sinc(x) * hann(y);
+        window.lookup[i + 1] = w;
+    }
+    window.center = center + 1;
+    window.scale = 1 / scale;
+    window.pCenter = _mm_set1_ps(window.center);
+    window.pScale = _mm_set1_ps(window.scale);
+}
+
 /******************** HOST memory typedefs ********************/
 
 /*! \brief RPP HOST 32-bit float memory

diff --git a/include/rppt_tensor_arithmetic_operations.h b/include/rppt_tensor_arithmetic_operations.h
@@ -47,8 +47,8 @@ extern "C" {
  *          It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
  *          adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HOST memory
@@ -70,8 +70,8 @@ RppStatus rppt_fused_multiply_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPt
  *          It multiplies each element of the source tensor by a corresponding element in the 'mulTensor',
  *          adds a corresponding element from the 'addTensor', and stores the result in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_fused_multiply_add_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HIP memory
@@ -92,8 +92,8 @@ RppStatus rppt_fused_multiply_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr
  * \details This function performs the addition operation on a batch of 4D tensors.
  *          It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HOST memory
@@ -113,8 +113,8 @@ RppStatus rppt_add_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
  * \details This function performs the addition operation on a batch of 4D tensors.
  *          It adds a corresponding element from the 'addTensor' to source tensor, and stores the result in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_add_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_add_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HIP memory
@@ -134,8 +134,8 @@ RppStatus rppt_add_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDesc
  * \details This function performs the subtraction operation on a batch of 4D tensors.
  *          It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HOST memory
@@ -155,8 +155,8 @@ RppStatus rppt_subtract_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
  * \details This function performs the subtraction operation on a batch of 4D tensors.
  *          It takes a corresponding element from 'subtractTensor' and subtracts it from source tensor. Result is stored in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_subtract_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HIP memory
@@ -176,8 +176,8 @@ RppStatus rppt_subtract_scalar_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcGeneri
  * \details This function performs the multiplication operation on a batch of 4D tensors.
  *          It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HOST memory
@@ -197,8 +197,8 @@ RppStatus rppt_multiply_scalar_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGener
  * \details This function performs the multiplication operation on a batch of 4D tensors.
  *          It takes a corresponding element from 'multiplyTensor' and multiplies it with source tensor. Result is stored in the destination tensor.
  *          Support added for f32 -> f32 dataype.
- * \image html input150x150x4.gif Sample Input
- * \image html arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/arithmetic_operations_multiply_scalar_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param[in] srcGenericDescPtr source tensor descriptor
  * \param[out] dstPtr destination tensor in HIP memory

diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
@@ -87,11 +87,11 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
 #endif // GPU_SUPPORT
 
 /*! \brief To Decibels augmentation on HOST backend
- * \details To Decibels augmentation for 1D audio buffer converts magnitude values to decibel values
+ * \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
  * \param [in] srcPtr source tensor in HOST memory
- * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
  * \param [out] dstPtr destination tensor in HOST memory
- * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
  * \param [in] srcDims source tensor sizes for each element in batch (2D tensor in HOST memory, of size batchSize * 2)
  * \param [in] cutOffDB  minimum or cut-off ratio in dB
  * \param [in] multiplier factor by which the logarithm is multiplied
@@ -103,6 +103,25 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr, RpptDescPtr srcD
  */
 RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);
 
+#ifdef GPU_SUPPORT
+/*! \brief To Decibels augmentation on HIP backend
+ * \details To Decibels augmentation for 1D/2D audio buffer converts magnitude values to decibel values
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel/2D audio tensor with 1 channel), offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcDims source tensor sizes for each element in batch (2D tensor in Pinned/HIP memory, of size batchSize * 2)
+ * \param [in] cutOffDB  minimum or cut-off ratio in dB
+ * \param [in] multiplier factor by which the logarithm is multiplied
+ * \param [in] referenceMagnitude Reference magnitude if not provided maximum value of input used as reference
+ * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_to_decibels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr srcDims, Rpp32f cutOffDB, Rpp32f multiplier, Rpp32f referenceMagnitude, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! \brief Pre Emphasis Filter augmentation on HOST backend
  * \details Pre Emphasis Filter augmentation for audio data
  * \param [in] srcPtr source tensor in HOST memory
@@ -122,9 +141,9 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
 /*! \brief Down Mixing augmentation on HOST backend
 * \details Down Mixing augmentation for audio data
 * \param [in] srcPtr source tensor in HOST memory
-* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
 * \param [out] dstPtr destination tensor in HOST memory
-* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
 * \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
 * \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
 * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
@@ -134,6 +153,23 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
 */
 RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
 
+#ifdef GPU_SUPPORT
+/*! \brief Down Mixing augmentation on HIP backend
+* \details Down Mixing augmentation for audio data
+* \param [in] srcPtr source tensor in HIP memory
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
+* \param [out] dstPtr destination tensor in HIP memory
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
+* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HIP/Pinned memory, of size batchSize * 2)
+* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
+* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+* \return A <tt> \ref RppStatus</tt> enumeration.
+* \retval RPP_SUCCESS Successful completion.
+* \retval RPP_ERROR* Unsuccessful completion.
+*/
+RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
  * \details Spectrogram for 1D audio buffer
  * \param [in] srcPtr source tensor in HOST memory

diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
@@ -512,8 +512,8 @@ RppStatus rppt_jitter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
 /*! \brief  Gaussian noise augmentation on HOST backend
  * \details This function adds gaussian noise to a batch of 4D tensors.
  *          Support added for u8 -> u8, f32 -> f32 datatypes.
- * \image html input150x150x4.gif Sample Input
- * \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param [in] srcGenericDescPtr source tensor descriptor
  * \param [out] dstPtr destination tensor in HOST memory
@@ -534,8 +534,8 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src
 /*! \brief  Gaussian noise augmentation on HIP backend
  * \details This function adds gaussian noise to a batch of 4D tensors.
  *          Support added for u8 -> u8, f32 -> f32 datatypes.
- * \image html input150x150x4.gif Sample Input
- * \image html effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/effects_augmentations_gaussian_noise_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcGenericDescPtr source tensor descriptor
  * \param [out] dstPtr destination tensor in HIP memory

diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h
@@ -541,8 +541,8 @@ RppStatus rppt_crop_and_patch_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPt
 /*! \brief Flip voxel augmentation HOST
  * \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
             <br> Support added for f32 -> f32 and u8 -> u8 dataypes.
- * \image html input150x150x4.gif Sample Input
- * \image html geometric_augmentations_flip_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HOST memory
@@ -564,8 +564,8 @@ RppStatus rppt_flip_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr srcGenericDes
 /*! \brief Flip voxel augmentation GPU
  * \details The flip voxel augmentation performs a mask-controlled horizontal/vertical/depth flip on a generic 4D tensor.
             <br> Support added for f32 -> f32 and u8 -> u8 dataypes.
- * \image html input150x150x4.gif Sample Input
- * \image html geometric_augmentations_flip_150x150x4.gif Sample Output
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenInputs/input150x150x4.gif Sample Input
+ * \image html https://raw.githubusercontent.com/ROCm/rpp/develop/docs/data/doxygenOutputs/geometric_augmentations_flip_150x150x4.gif Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcGenericDescPtr source tensor descriptor (Restrictions - numDims = 5, offsetInBytes >= 0, dataType = U8/F32, layout = NCDHW/NDHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory

diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp
@@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
     coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
 }
 
-inline Rpp32f sinc(Rpp32f x)
-{
-    x *= M_PI;
-    return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
-}
-
 inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
 {
     coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));

diff --git a/src/modules/hip/hip_tensor_audio_augmentations.hpp b/src/modules/hip/hip_tensor_audio_augmentations.hpp
@@ -26,6 +26,8 @@ SOFTWARE.
 #define HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
 
 #include "kernel/non_silent_region_detection.hpp"
+#include "kernel/down_mixing.hpp"
+#include "kernel/to_decibels.hpp"
 #include "kernel/resample.hpp"
 
 #endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP