Skip to content

Commit

Permalink
Merge pull request #271 from sampath1117/sr/audio_merge_fixes
Browse files Browse the repository at this point in the history
Merge fixes for audio augmentations
  • Loading branch information
r-abishek authored May 9, 2024
2 parents 1a3015c + 5470941 commit 982f18e
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 31 deletions.
9 changes: 0 additions & 9 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,15 +428,6 @@ typedef enum
REFLECT
} RpptAudioBorderType;

/*! \brief RPPT Spectrogram Layout enum
* \ingroup group_rppdefs
*/
typedef enum
{
FT = 0, //Frequency Major
TF, //Time Major
} RpptSpectrogramLayout;

/*! \brief RPPT Mel Scale Formula
* \ingroup group_rppdefs
*/
Expand Down
9 changes: 4 additions & 5 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
* \param [in] centerWindows indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step
* \param [in] reflectPadding indicates the padding policy when sampling outside the bounds of the signal
Expand All @@ -124,20 +124,19 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
* \param [in] power exponent of the magnitude of the spectrum
* \param [in] windowLength window size in number of samples
* \param [in] windowStep step between the STFT windows in number of samples
* \param [in] layout specifies output layout of spectrogram
* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, RpptSpectrogramLayout layout, rppHandle_t rppHandle);
RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, rppHandle_t rppHandle);

/*! \brief Mel filter bank augmentation HOST backend
* \details Mel filter bank augmentation for audio data
* \param[in] srcPtr source tensor in HOST memory
* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT)
* \param[out] dstPtr destination tensor in HOST memory
* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT)
* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param[in] maxFreq maximum frequency if not provided maxFreq = sampleRate / 2
* \param[in] minFreq minimum frequency
Expand Down
2 changes: 1 addition & 1 deletion src/modules/cpu/kernel/pre_emphasis_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RppStatus pre_emphasis_filter_host_tensor(Rpp32f *srcPtr,
dstPtrTemp[0] = srcPtrTemp[0] - coeff * border;

Rpp32s vectorIncrement = 8;
Rpp32s alignedLength = (bufferLength / 8) * 8;
Rpp32s alignedLength = (bufferLength / 8) * 8 - 8;
__m256 pCoeff = _mm256_set1_ps(coeff);

Rpp32s vectorLoopCount = 1;
Expand Down
3 changes: 1 addition & 2 deletions src/modules/cpu/kernel/spectrogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,10 @@ RppStatus spectrogram_host_tensor(Rpp32f *srcPtr,
Rpp32s power,
Rpp32s windowLength,
Rpp32s windowStep,
RpptSpectrogramLayout layout,
rpp::Handle& handle)
{
Rpp32s windowCenterOffset = 0;
bool vertical = (layout == RpptSpectrogramLayout::FT);
bool vertical = (dstDescPtr->layout == RpptLayout::NFT);
if (centerWindows) windowCenterOffset = windowLength / 2;
if (nfft == 0) nfft = windowLength;
const Rpp32s numBins = nfft / 2 + 1;
Expand Down
4 changes: 2 additions & 2 deletions src/modules/rppt_tensor_audio_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,10 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr,
Rpp32s power,
Rpp32s windowLength,
Rpp32s windowStep,
RpptSpectrogramLayout layout,
rppHandle_t rppHandle)
{
if ((dstDescPtr->layout != RpptLayout::NFT) && (dstDescPtr->layout != RpptLayout::NTF)) return RPP_ERROR_INVALID_DST_LAYOUT;

if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
spectrogram_host_tensor(static_cast<Rpp32f*>(srcPtr),
Expand All @@ -186,7 +187,6 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr,
power,
windowLength,
windowStep,
layout,
rpp::deref(rppHandle));

return RPP_SUCCESS;
Expand Down
22 changes: 10 additions & 12 deletions utilities/test_suite/HOST/Tensor_audio_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,13 @@ int main(int argc, char **argv)
RpptImagePatch *srcDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch));
RpptImagePatch *dstDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch));

// buffers used for non silent region detection
Rpp32s detectedIndex[batchSize], detectionLength[batchSize];

// run case-wise RPP API and measure time
rppHandle_t handle;
rppCreateWithBatchSize(&handle, srcDescPtr->n, 3);

int noOfIterations = (int)audioNames.size() / batchSize;
double maxWallTime = 0, minWallTime = 500, avgWallTime = 0;
string testCaseName;
Expand All @@ -158,8 +162,6 @@ int main(int argc, char **argv)
case 0:
{
testCaseName = "non_silent_region_detection";
Rpp32s detectedIndex[batchSize];
Rpp32s detectionLength[batchSize];
Rpp32f cutOffDB = -60.0;
Rpp32s windowLength = 2048;
Rpp32f referencePower = 0.0f;
Expand All @@ -168,10 +170,6 @@ int main(int argc, char **argv)
startWallTime = omp_get_wtime();
rppt_non_silent_region_detection_host(inputf32, srcDescPtr, srcLengthTensor, detectedIndex, detectionLength, cutOffDB, windowLength, referencePower, resetInterval, handle);

// QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
if (testType == 0)
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);

break;
}
case 1:
Expand Down Expand Up @@ -238,15 +236,15 @@ int main(int argc, char **argv)
Rpp32s windowLength = 320;
Rpp32s windowStep = 160;
Rpp32s nfft = 512;
RpptSpectrogramLayout layout = RpptSpectrogramLayout::FT;
dstDescPtr->layout = RpptLayout::NFT;

int windowOffset = 0;
if(!centerWindows)
windowOffset = windowLength;

maxDstWidth = 0;
maxDstHeight = 0;
if(layout == RpptSpectrogramLayout::FT)
if(dstDescPtr->layout == RpptLayout::NFT)
{
for(int i = 0; i < noOfAudioFiles; i++)
{
Expand Down Expand Up @@ -274,7 +272,7 @@ int main(int argc, char **argv)
outputf32 = (Rpp32f *)realloc(outputf32, spectrogramBufferSize * sizeof(Rpp32f));

startWallTime = omp_get_wtime();
rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, layout, handle);
rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, handle);

break;
}
Expand Down Expand Up @@ -421,9 +419,9 @@ int main(int argc, char **argv)
// QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
if (testType == 0)
{
/* Run only if testCase is not 0
For testCase 0 verify_non_silent_region_detection function is used for QA testing */
if (testCase != 0)
if (testCase == 0)
verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
else
verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath);

/* Dump the outputs to csv files for debugging
Expand Down

0 comments on commit 982f18e

Please sign in to comment.