diff --git a/include/rppdefs.h b/include/rppdefs.h index 7d49c457d..3256444f3 100644 --- a/include/rppdefs.h +++ b/include/rppdefs.h @@ -428,15 +428,6 @@ typedef enum REFLECT } RpptAudioBorderType; -/*! \brief RPPT Spectrogram Layout enum - * \ingroup group_rppdefs - */ -typedef enum -{ - FT = 0, //Frequency Major - TF, //Time Major -} RpptSpectrogramLayout; - /*! \brief RPPT Mel Scale Formula * \ingroup group_rppdefs */ diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h index 13259cd22..31f3e95ef 100644 --- a/include/rppt_tensor_audio_augmentations.h +++ b/include/rppt_tensor_audio_augmentations.h @@ -115,7 +115,7 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_ * \param [in] srcPtr source tensor in HOST memory * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32) * \param [out] dstPtr destination tensor in HOST memory - * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32) + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF) * \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize) * \param [in] centerWindows indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step * \param [in] reflectPadding indicates the padding policy when sampling outside the bounds of the signal @@ -124,20 +124,19 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_ * \param [in] power exponent of the magnitude of the spectrum * \param [in] windowLength window size in number of samples * \param [in] windowStep step between the STFT windows in number of samples - * \param [in] layout specifies output layout of spectrogram * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() * \return A \ref RppStatus enumeration. * \retval RPP_SUCCESS Successful completion. * \retval RPP_ERROR* Unsuccessful completion. */ -RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, RpptSpectrogramLayout layout, rppHandle_t rppHandle); +RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, rppHandle_t rppHandle); /*! \brief Mel filter bank augmentation HOST backend * \details Mel filter bank augmentation for audio data * \param[in] srcPtr source tensor in HOST memory - * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF) + * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT) * \param[out] dstPtr destination tensor in HOST memory - * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF) + * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT) * \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2) * \param[in] maxFreq maximum frequency if not provided maxFreq = sampleRate / 2 * \param[in] minFreq minimum frequency diff --git a/src/modules/cpu/kernel/pre_emphasis_filter.hpp b/src/modules/cpu/kernel/pre_emphasis_filter.hpp index 1d25921ad..889cd2dec 100644 --- a/src/modules/cpu/kernel/pre_emphasis_filter.hpp +++ b/src/modules/cpu/kernel/pre_emphasis_filter.hpp @@ -50,7 +50,7 @@ RppStatus pre_emphasis_filter_host_tensor(Rpp32f *srcPtr, dstPtrTemp[0] = srcPtrTemp[0] - coeff * border; Rpp32s vectorIncrement = 8; - Rpp32s alignedLength = (bufferLength / 8) * 8; + Rpp32s alignedLength = (bufferLength / 8) * 8 - 8; __m256 pCoeff = _mm256_set1_ps(coeff); Rpp32s vectorLoopCount = 1; diff --git a/src/modules/cpu/kernel/spectrogram.hpp b/src/modules/cpu/kernel/spectrogram.hpp index e072b7633..2489d2180 100644 --- a/src/modules/cpu/kernel/spectrogram.hpp +++ b/src/modules/cpu/kernel/spectrogram.hpp @@ -80,11 +80,10 @@ RppStatus spectrogram_host_tensor(Rpp32f *srcPtr, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, - RpptSpectrogramLayout layout, rpp::Handle& handle) { Rpp32s windowCenterOffset = 0; - bool vertical = (layout == RpptSpectrogramLayout::FT); + bool vertical = (dstDescPtr->layout == RpptLayout::NFT); if (centerWindows) windowCenterOffset = windowLength / 2; if (nfft == 0) nfft = windowLength; const Rpp32s numBins = nfft / 2 + 1; diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp index bafaf93fb..0267985e5 100644 --- a/src/modules/rppt_tensor_audio_augmentations.cpp +++ b/src/modules/rppt_tensor_audio_augmentations.cpp @@ -169,9 +169,10 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, - RpptSpectrogramLayout layout, rppHandle_t rppHandle) { + if ((dstDescPtr->layout != RpptLayout::NFT) && (dstDescPtr->layout != RpptLayout::NTF)) return RPP_ERROR_INVALID_DST_LAYOUT; + if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { spectrogram_host_tensor(static_cast(srcPtr), @@ -186,7 +187,6 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, power, windowLength, windowStep, - layout, rpp::deref(rppHandle)); return RPP_SUCCESS; diff --git a/utilities/test_suite/HOST/Tensor_audio_host.cpp b/utilities/test_suite/HOST/Tensor_audio_host.cpp index f982d3360..3ec2e0060 100644 --- a/utilities/test_suite/HOST/Tensor_audio_host.cpp +++ b/utilities/test_suite/HOST/Tensor_audio_host.cpp @@ -138,9 +138,13 @@ int main(int argc, char **argv) RpptImagePatch *srcDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch)); RpptImagePatch *dstDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch)); + // buffers used for non silent region detection + Rpp32s detectedIndex[batchSize], detectionLength[batchSize]; + // run case-wise RPP API and measure time rppHandle_t handle; rppCreateWithBatchSize(&handle, srcDescPtr->n, 3); + int noOfIterations = (int)audioNames.size() / batchSize; double maxWallTime = 0, minWallTime = 500, avgWallTime = 0; string testCaseName; @@ -158,8 +162,6 @@ int main(int argc, char **argv) case 0: { testCaseName = "non_silent_region_detection"; - Rpp32s detectedIndex[batchSize]; - Rpp32s detectionLength[batchSize]; Rpp32f cutOffDB = -60.0; Rpp32s windowLength = 2048; Rpp32f referencePower = 0.0f; @@ -168,10 +170,6 @@ int main(int argc, char **argv) startWallTime = omp_get_wtime(); rppt_non_silent_region_detection_host(inputf32, srcDescPtr, srcLengthTensor, detectedIndex, detectionLength, cutOffDB, windowLength, referencePower, resetInterval, handle); - // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests - if (testType == 0) - verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst); - break; } case 1: @@ -238,7 +236,7 @@ int main(int argc, char **argv) Rpp32s windowLength = 320; Rpp32s windowStep = 160; Rpp32s nfft = 512; - RpptSpectrogramLayout layout = RpptSpectrogramLayout::FT; + dstDescPtr->layout = RpptLayout::NFT; int windowOffset = 0; if(!centerWindows) @@ -246,7 +244,7 @@ int main(int argc, char **argv) maxDstWidth = 0; maxDstHeight = 0; - if(layout == RpptSpectrogramLayout::FT) + if(dstDescPtr->layout == RpptLayout::NFT) { for(int i = 0; i < noOfAudioFiles; i++) { @@ -274,7 +272,7 @@ int main(int argc, char **argv) outputf32 = (Rpp32f *)realloc(outputf32, spectrogramBufferSize * sizeof(Rpp32f)); startWallTime = omp_get_wtime(); - rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, layout, handle); + rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, handle); break; } @@ -421,9 +419,9 @@ int main(int argc, char **argv) // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests if (testType == 0) { - /* Run only if testCase is not 0 - For testCase 0 verify_non_silent_region_detection function is used for QA testing */ - if (testCase != 0) + if (testCase == 0) + verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst); + else verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath); /* Dump the outputs to csv files for debugging