Skip to content

Commit

Permalink
Merge branch 'develop' into ar/opt_pixelate
Browse files Browse the repository at this point in the history
  • Loading branch information
kiritigowda authored Aug 5, 2024
2 parents cef5c17 + 5c3772a commit 2c44bb2
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 4 deletions.
21 changes: 19 additions & 2 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
/*! \brief Down Mixing augmentation on HOST backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
Expand All @@ -153,6 +153,23 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
*/
RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);

#ifdef GPU_SUPPORT
/*! \brief Down Mixing augmentation on HIP backend
* \details Down Mixing augmentation for audio data
* \param [in] srcPtr source tensor in HIP memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HIP memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HIP/Pinned memory, of size batchSize * 2)
* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
* \details Spectrogram for 1D audio buffer
* \param [in] srcPtr source tensor in HOST memory
Expand Down
1 change: 1 addition & 0 deletions src/modules/hip/hip_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ SOFTWARE.
#define HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP

#include "kernel/non_silent_region_detection.hpp"
#include "kernel/down_mixing.hpp"
#include "kernel/to_decibels.hpp"

#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
72 changes: 72 additions & 0 deletions src/modules/hip/kernel/down_mixing.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <hip/hip_runtime.h>
#include "rpp_hip_common.hpp"

__global__ void down_mixing_hip_tensor(float *srcPtr,
uint srcStride,
float *dstPtr,
uint dstStride,
int2 *srcDimsTensor)

{
int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
int srcLength = srcDimsTensor[id_z].x;
int channels = srcDimsTensor[id_z].y;

if (id_x >= srcLength)
return;

float outVal = 0.0f;
uint srcIdx = id_z * srcStride + id_x * channels;
int i = 0;
int alignedChannels = (channels / 8) * 8;

// do 8 pixel load till alignedChannels value
if (alignedChannels)
{
d_float8 outVal_f8;
outVal_f8.f4[0] = static_cast<float4>(0.0f);
outVal_f8.f4[1] = outVal_f8.f4[0];
for(; i < alignedChannels; i += 8, srcIdx += 8)
{
d_float8 src_f8;
rpp_hip_load8_and_unpack_to_float8(srcPtr + srcIdx, &src_f8);
rpp_hip_math_add8(&outVal_f8, &src_f8, &outVal_f8);
}
outVal_f8.f4[0] += outVal_f8.f4[1];
outVal += (outVal_f8.f1[0] + outVal_f8.f1[1] + outVal_f8.f1[2] + outVal_f8.f1[3]);
}
// process remaining channels
for(; i < channels; i++, srcIdx++)
outVal += srcPtr[srcIdx];
outVal *= (1.f / channels);

uint dstIdx = id_z * dstStride + id_x;
dstPtr[dstIdx] = outVal;
}

RppStatus hip_exec_down_mixing_tensor(Rpp32f *srcPtr,
RpptDescPtr srcDescPtr,
Rpp32f *dstPtr,
RpptDescPtr dstDescPtr,
Rpp32s *srcDimsTensor,
bool normalizeWeights,
rpp::Handle& handle)
{
Rpp32s globalThreads_x = dstDescPtr->strides.nStride;
Rpp32s globalThreads_y = 1;
Rpp32s globalThreads_z = dstDescPtr->n;

hipLaunchKernelGGL(down_mixing_hip_tensor,
dim3(ceil((Rpp32f)globalThreads_x/LOCAL_THREADS_X_1DIM), ceil((Rpp32f)globalThreads_y/LOCAL_THREADS_Y_1DIM), ceil((Rpp32f)globalThreads_z/LOCAL_THREADS_Z_1DIM)),
dim3(LOCAL_THREADS_X_1DIM, LOCAL_THREADS_Y_1DIM, LOCAL_THREADS_Z_1DIM),
0,
handle.GetStream(),
srcPtr,
srcDescPtr->strides.nStride,
dstPtr,
dstDescPtr->strides.nStride,
reinterpret_cast<int2 *>(srcDimsTensor));

return RPP_SUCCESS;
}
36 changes: 36 additions & 0 deletions src/modules/rppt_tensor_audio_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,5 +362,41 @@ RppStatus rppt_to_decibels_gpu(RppPtr_t srcPtr,
#endif // backend
}

/******************** down_mixing ********************/

RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr,
RpptDescPtr srcDescPtr,
RppPtr_t dstPtr,
RpptDescPtr dstDescPtr,
Rpp32s *srcDimsTensor,
bool normalizeWeights,
rppHandle_t rppHandle)
{
#ifdef HIP_COMPILE
Rpp32u tensorDims = srcDescPtr->numDims - 1; // exclude batchsize from input dims
if (tensorDims != 1 && tensorDims != 2)
return RPP_ERROR_INVALID_SRC_DIMS;

if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
hip_exec_down_mixing_tensor(static_cast<Rpp32f*>(srcPtr),
srcDescPtr,
static_cast<Rpp32f*>(dstPtr),
dstDescPtr,
srcDimsTensor,
normalizeWeights,
rpp::deref(rppHandle));
}
else
{
return RPP_ERROR_NOT_IMPLEMENTED;
}

return RPP_SUCCESS;
#elif defined(OCL_COMPILE)
return RPP_ERROR_NOT_IMPLEMENTED;
#endif // backend
}

#endif // GPU_SUPPORT
#endif // AUDIO_SUPPORT
28 changes: 28 additions & 0 deletions utilities/test_suite/HIP/Tensor_audio_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ int main(int argc, char **argv)
set_audio_descriptor_dims_and_strides(srcDescPtr, batchSize, maxSrcHeight, maxSrcWidth, maxSrcChannels, offsetInBytes);
int maxDstChannels = maxSrcChannels;
if(testCase == 3)
{
srcDescPtr->numDims = 3;
maxDstChannels = 1;
}
set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);

// set buffer sizes for src/dst
Expand All @@ -131,6 +134,11 @@ int main(int argc, char **argv)
CHECK_RETURN_STATUS(hipHostMalloc(&srcDims, batchSize * sizeof(RpptImagePatch)));
CHECK_RETURN_STATUS(hipHostMalloc(&dstDims, batchSize * sizeof(RpptImagePatch)));

// allocate the buffer for srcDimsTensor
Rpp32s *srcDimsTensor;
if(testCase == 3)
CHECK_RETURN_STATUS(hipHostMalloc(&srcDimsTensor, batchSize * 2 * sizeof(Rpp32s)));

Rpp32s *detectedIndex = nullptr, *detectionLength = nullptr;
if(testCase == 0)
{
Expand Down Expand Up @@ -190,6 +198,24 @@ int main(int argc, char **argv)

break;
}
case 3:
{
testCaseName = "down_mixing";
bool normalizeWeights = false;

for (int i = 0, j = 0; i < batchSize; i++, j += 2)
{
srcDimsTensor[j] = srcLengthTensor[i];
srcDimsTensor[j + 1] = channelsTensor[i];
dstDims[i].height = srcLengthTensor[i];
dstDims[i].width = 1;
}

startWallTime = omp_get_wtime();
rppt_down_mixing_gpu(d_inputf32, srcDescPtr, d_outputf32, dstDescPtr, srcDimsTensor, normalizeWeights, handle);

break;
}
default:
{
missingFuncFlag = 1;
Expand Down Expand Up @@ -263,6 +289,8 @@ int main(int argc, char **argv)
CHECK_RETURN_STATUS(hipHostFree(channelsTensor));
CHECK_RETURN_STATUS(hipHostFree(srcDims));
CHECK_RETURN_STATUS(hipHostFree(dstDims));
if(testCase == 3)
CHECK_RETURN_STATUS(hipHostFree(srcDimsTensor));
if (detectedIndex != nullptr)
CHECK_RETURN_STATUS(hipHostFree(detectedIndex));
if (detectionLength != nullptr)
Expand Down
4 changes: 2 additions & 2 deletions utilities/test_suite/HIP/runAudioTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
outFolderPath = os.getcwd()
buildFolderPath = os.getcwd()
caseMin = 0
caseMax = 1
caseMax = 3


# Get a list of log files based on a flag for preserving output
Expand Down Expand Up @@ -224,7 +224,7 @@ def rpp_test_suite_parser_and_validator():
subprocess.call(["make", "-j16"], cwd=".") # nosec

# List of cases supported
supportedCaseList = ['0', '1']
supportedCaseList = ['0', '1', '3']
if qaMode and batchSize != 3:
print("QA tests can only run with a batch size of 3.")
exit(0)
Expand Down
3 changes: 3 additions & 0 deletions utilities/test_suite/HOST/Tensor_audio_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ int main(int argc, char **argv)
set_audio_descriptor_dims_and_strides(srcDescPtr, batchSize, maxSrcHeight, maxSrcWidth, maxSrcChannels, offsetInBytes);
int maxDstChannels = maxSrcChannels;
if(testCase == 3)
{
srcDescPtr->numDims = 3;
maxDstChannels = 1;
}
set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);

// create generic descriptor in case of slice
Expand Down

0 comments on commit 2c44bb2

Please sign in to comment.