diff --git a/include/rppdefs.h b/include/rppdefs.h
index 7d49c457d..3256444f3 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -428,15 +428,6 @@ typedef enum
REFLECT
} RpptAudioBorderType;
-/*! \brief RPPT Spectrogram Layout enum
- * \ingroup group_rppdefs
- */
-typedef enum
-{
- FT = 0, //Frequency Major
- TF, //Time Major
-} RpptSpectrogramLayout;
-
/*! \brief RPPT Mel Scale Formula
* \ingroup group_rppdefs
*/
diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
index 13259cd22..31f3e95ef 100644
--- a/include/rppt_tensor_audio_augmentations.h
+++ b/include/rppt_tensor_audio_augmentations.h
@@ -115,7 +115,7 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
* \param [in] srcPtr source tensor in HOST memory
* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param [out] dstPtr destination tensor in HOST memory
- * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
* \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
* \param [in] centerWindows indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step
* \param [in] reflectPadding indicates the padding policy when sampling outside the bounds of the signal
@@ -124,20 +124,19 @@ RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
* \param [in] power exponent of the magnitude of the spectrum
* \param [in] windowLength window size in number of samples
* \param [in] windowStep step between the STFT windows in number of samples
- * \param [in] layout specifies output layout of spectrogram
* \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize()
* \return A \ref RppStatus enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
-RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, RpptSpectrogramLayout layout, rppHandle_t rppHandle);
+RppStatus rppt_spectrogram_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, bool centerWindows, bool reflectPadding, Rpp32f *windowFunction, Rpp32s nfft, Rpp32s power, Rpp32s windowLength, Rpp32s windowStep, rppHandle_t rppHandle);
/*! \brief Mel filter bank augmentation HOST backend
* \details Mel filter bank augmentation for audio data
* \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
+ * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT)
* \param[out] dstPtr destination tensor in HOST memory
- * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT / NTF)
+ * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32, layout - NFT)
* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
* \param[in] maxFreq maximum frequency if not provided maxFreq = sampleRate / 2
* \param[in] minFreq minimum frequency
diff --git a/src/modules/cpu/kernel/pre_emphasis_filter.hpp b/src/modules/cpu/kernel/pre_emphasis_filter.hpp
index 1d25921ad..889cd2dec 100644
--- a/src/modules/cpu/kernel/pre_emphasis_filter.hpp
+++ b/src/modules/cpu/kernel/pre_emphasis_filter.hpp
@@ -50,7 +50,7 @@ RppStatus pre_emphasis_filter_host_tensor(Rpp32f *srcPtr,
dstPtrTemp[0] = srcPtrTemp[0] - coeff * border;
Rpp32s vectorIncrement = 8;
- Rpp32s alignedLength = (bufferLength / 8) * 8;
+ Rpp32s alignedLength = (bufferLength / 8) * 8 - 8;
__m256 pCoeff = _mm256_set1_ps(coeff);
Rpp32s vectorLoopCount = 1;
diff --git a/src/modules/cpu/kernel/spectrogram.hpp b/src/modules/cpu/kernel/spectrogram.hpp
index e072b7633..2489d2180 100644
--- a/src/modules/cpu/kernel/spectrogram.hpp
+++ b/src/modules/cpu/kernel/spectrogram.hpp
@@ -80,11 +80,10 @@ RppStatus spectrogram_host_tensor(Rpp32f *srcPtr,
Rpp32s power,
Rpp32s windowLength,
Rpp32s windowStep,
- RpptSpectrogramLayout layout,
rpp::Handle& handle)
{
Rpp32s windowCenterOffset = 0;
- bool vertical = (layout == RpptSpectrogramLayout::FT);
+ bool vertical = (dstDescPtr->layout == RpptLayout::NFT);
if (centerWindows) windowCenterOffset = windowLength / 2;
if (nfft == 0) nfft = windowLength;
const Rpp32s numBins = nfft / 2 + 1;
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
index bafaf93fb..0267985e5 100644
--- a/src/modules/rppt_tensor_audio_augmentations.cpp
+++ b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -169,9 +169,10 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr,
Rpp32s power,
Rpp32s windowLength,
Rpp32s windowStep,
- RpptSpectrogramLayout layout,
rppHandle_t rppHandle)
{
+ if ((dstDescPtr->layout != RpptLayout::NFT) && (dstDescPtr->layout != RpptLayout::NTF)) return RPP_ERROR_INVALID_DST_LAYOUT;
+
if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
spectrogram_host_tensor(static_cast(srcPtr),
@@ -186,7 +187,6 @@ RppStatus rppt_spectrogram_host(RppPtr_t srcPtr,
power,
windowLength,
windowStep,
- layout,
rpp::deref(rppHandle));
return RPP_SUCCESS;
diff --git a/utilities/test_suite/HOST/Tensor_audio_host.cpp b/utilities/test_suite/HOST/Tensor_audio_host.cpp
index f982d3360..3ec2e0060 100644
--- a/utilities/test_suite/HOST/Tensor_audio_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_audio_host.cpp
@@ -138,9 +138,13 @@ int main(int argc, char **argv)
RpptImagePatch *srcDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch));
RpptImagePatch *dstDims = (RpptImagePatch *) calloc(batchSize, sizeof(RpptImagePatch));
+ // buffers used for non silent region detection
+ Rpp32s detectedIndex[batchSize], detectionLength[batchSize];
+
// run case-wise RPP API and measure time
rppHandle_t handle;
rppCreateWithBatchSize(&handle, srcDescPtr->n, 3);
+
int noOfIterations = (int)audioNames.size() / batchSize;
double maxWallTime = 0, minWallTime = 500, avgWallTime = 0;
string testCaseName;
@@ -158,8 +162,6 @@ int main(int argc, char **argv)
case 0:
{
testCaseName = "non_silent_region_detection";
- Rpp32s detectedIndex[batchSize];
- Rpp32s detectionLength[batchSize];
Rpp32f cutOffDB = -60.0;
Rpp32s windowLength = 2048;
Rpp32f referencePower = 0.0f;
@@ -168,10 +170,6 @@ int main(int argc, char **argv)
startWallTime = omp_get_wtime();
rppt_non_silent_region_detection_host(inputf32, srcDescPtr, srcLengthTensor, detectedIndex, detectionLength, cutOffDB, windowLength, referencePower, resetInterval, handle);
- // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
- if (testType == 0)
- verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
-
break;
}
case 1:
@@ -238,7 +236,7 @@ int main(int argc, char **argv)
Rpp32s windowLength = 320;
Rpp32s windowStep = 160;
Rpp32s nfft = 512;
- RpptSpectrogramLayout layout = RpptSpectrogramLayout::FT;
+ dstDescPtr->layout = RpptLayout::NFT;
int windowOffset = 0;
if(!centerWindows)
@@ -246,7 +244,7 @@ int main(int argc, char **argv)
maxDstWidth = 0;
maxDstHeight = 0;
- if(layout == RpptSpectrogramLayout::FT)
+ if(dstDescPtr->layout == RpptLayout::NFT)
{
for(int i = 0; i < noOfAudioFiles; i++)
{
@@ -274,7 +272,7 @@ int main(int argc, char **argv)
outputf32 = (Rpp32f *)realloc(outputf32, spectrogramBufferSize * sizeof(Rpp32f));
startWallTime = omp_get_wtime();
- rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, layout, handle);
+ rppt_spectrogram_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcLengthTensor, centerWindows, reflectPadding, windowFn, nfft, power, windowLength, windowStep, handle);
break;
}
@@ -421,9 +419,9 @@ int main(int argc, char **argv)
// QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
if (testType == 0)
{
- /* Run only if testCase is not 0
- For testCase 0 verify_non_silent_region_detection function is used for QA testing */
- if (testCase != 0)
+ if (testCase == 0)
+ verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
+ else
verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath);
/* Dump the outputs to csv files for debugging