diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in
index 72eac3392..a4621a9ab 100644
--- a/docs/sphinx/requirements.in
+++ b/docs/sphinx/requirements.in
@@ -1 +1 @@
-rocm-docs-core[api_reference]==1.7.1
+rocm-docs-core[api_reference]==1.7.2
diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt
index 6de43cd87..236b5c21d 100644
--- a/docs/sphinx/requirements.txt
+++ b/docs/sphinx/requirements.txt
@@ -92,7 +92,7 @@ requests==2.32.3
     # via
     #   pygithub
     #   sphinx
-rocm-docs-core[api-reference]==1.7.1
+rocm-docs-core[api-reference]==1.7.2
     # via -r requirements.in
 smmap==5.0.1
     # via gitdb
diff --git a/include/rppdefs.h b/include/rppdefs.h
index caf8f4487..2c0430ebc 100644
--- a/include/rppdefs.h
+++ b/include/rppdefs.h
@@ -52,6 +52,8 @@ SOFTWARE.
 #define RPP_MAX_8U      ( 255 )
 /*! \brief RPP maximum dimensions in tensor \ingroup group_rppdefs \page subpage_rppt */
 #define RPPT_MAX_DIMS   ( 5 )
+/*! \brief RPP maximum channels in audio tensor \ingroup group_rppdefs \page subpage_rppt */
+#define RPPT_MAX_AUDIO_CHANNELS   ( 16 )
 
 #define CHECK_RETURN_STATUS(x) do { \
   int retval = (x); \
@@ -62,6 +64,7 @@ SOFTWARE.
 } while (0)
 
 #ifdef HIP_COMPILE
+#include <hip/hip_runtime.h>
 #define RPP_HOST_DEVICE __host__ __device__
 #else
 #define RPP_HOST_DEVICE
@@ -708,7 +711,7 @@ typedef struct GenericFilter
  */
 typedef struct RpptResamplingWindow
 {
-    inline void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
+    inline RPP_HOST_DEVICE void input_range(Rpp32f x, Rpp32s *loc0, Rpp32s *loc1)
     {
         Rpp32s xc = std::ceil(x);
         *loc0 = xc - lobes;
@@ -742,7 +745,7 @@ typedef struct RpptResamplingWindow
     Rpp32f scale = 1, center = 1;
     Rpp32s lobes = 0, coeffs = 0;
     Rpp32s lookupSize = 0;
-    std::vector<Rpp32f> lookup;
+    Rpp32f *lookup = nullptr;
     __m128 pCenter, pScale;
 } RpptResamplingWindow;
 
@@ -806,6 +809,43 @@ struct SlaneyMelScale : public BaseMelScale
     public:
         ~SlaneyMelScale() {};
 };
+inline Rpp32f sinc(Rpp32f x)
+{
+    x *= M_PI;
+    return (std::abs(x) < 1e-5f) ? (1.f - (x * x * 0.16666667)) : std::sin(x) / x;
+}
+
+inline Rpp64f hann(Rpp64f x)
+{
+    return 0.5 * (1 + std::cos(x * M_PI));
+}
+
+// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
+// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
+inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
+{
+    Rpp32f scale = 2.0f * lobes / (coeffs - 1);
+    Rpp32f scale_envelope = 2.0f / coeffs;
+    window.coeffs = coeffs;
+    window.lobes = lobes;
+    window.lookupSize = coeffs + 5;
+#ifdef GPU_SUPPORT
+    CHECK_RETURN_STATUS(hipHostMalloc(&(window.lookup), window.lookupSize * sizeof(Rpp32f)));
+#else
+    window.lookup = static_cast<Rpp32f *>(malloc(window.lookupSize * sizeof(Rpp32f)));
+#endif
+    Rpp32s center = (coeffs - 1) * 0.5f;
+    for (int i = 0; i < coeffs; i++) {
+        Rpp32f x = (i - center) * scale;
+        Rpp32f y = (i - center) * scale_envelope;
+        Rpp32f w = sinc(x) * hann(y);
+        window.lookup[i + 1] = w;
+    }
+    window.center = center + 1;
+    window.scale = 1 / scale;
+    window.pCenter = _mm_set1_ps(window.center);
+    window.pScale = _mm_set1_ps(window.scale);
+}
 
 /******************** HOST memory typedefs ********************/
 
diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
index 52b8cea38..0401fec14 100644
--- a/include/rppt_tensor_audio_augmentations.h
+++ b/include/rppt_tensor_audio_augmentations.h
@@ -49,7 +49,7 @@ extern "C" {
             \n Finds the starting index and length of non silent region in the audio buffer by comparing the
             calculated short-term power with cutoff value passed
  * \param [in] srcPtr source tensor in HOST memory
- * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
  * \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
  * \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
  * \param [out] detectionLengthTensor length of non silent region  (1D tensor in HOST memory, of size batchSize)
@@ -70,7 +70,7 @@ RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr src
             \n Finds the starting index and length of non silent region in the audio buffer by comparing the
             calculated short-term power with cutoff value passed
  * \param [in] srcPtr source tensor in HIP memory
- * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
  * \param [in] srcLengthTensor source audio buffer length (1D tensor in Pinned/HIP memory, of size batchSize)
  * \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in Pinned/HIP memory, of size batchSize)
  * \param [out] detectionLengthTensor length of non silent region  (1D tensor in Pinned/HIP memory, of size batchSize)
@@ -254,9 +254,9 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
 /*! \brief Resample augmentation on HOST backend
 * \details Resample augmentation for audio data
 * \param [in] srcPtr source tensor in HOST memory
-* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
 * \param [out] dstPtr destination tensor in HOST memory
-* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
 * \param [in] inRate Input sampling rate (1D tensor in HOST memory, of size batchSize)
 * \param [in] outRate Output sampling rate (1D tensor in HOST memory, of size batchSize)
 * \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
@@ -268,6 +268,25 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP
 */
 RppStatus rppt_resample_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);
 
+#ifdef GPU_SUPPORT
+/*! \brief Resample augmentation on HIP backend
+* \details Resample augmentation for audio data
+* \param [in] srcPtr source tensor in HIP memory
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
+* \param [out] dstPtr destination tensor in HIP memory
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2 or 3 (for single-channel or multi-channel audio tensor), offsetInBytes >= 0, dataType = F32)
+* \param [in] inRate Input sampling rate (1D tensor in Pinned memory, of size batchSize)
+* \param [in] outRate Output sampling rate (1D tensor in Pinned memory, of size batchSize)
+* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in Pinned memory, of size batchSize * 2)
+* \param [in] window Resampling window (struct of type RpptRpptResamplingWindow in HIP/Pinned memory)
+* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+* \return A <tt> \ref RppStatus</tt> enumeration.
+* \retval RPP_SUCCESS Successful completion.
+* \retval RPP_ERROR* Unsuccessful completion.
+*/
+RppStatus rppt_resample_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *inRateTensor, Rpp32f *outRateTensor, Rpp32s *srcDimsTensor, RpptResamplingWindow &window, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! @}
  */
 
diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
index bd046c535..eaf5d84a5 100644
--- a/include/rppt_tensor_effects_augmentations.h
+++ b/include/rppt_tensor_effects_augmentations.h
@@ -470,7 +470,7 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
  * \image html img150x150.png Sample Input
- * \image html effects_augmentations_jitter_img150x150.png Sample Output
+ * \image html effects_augmentations_jitter_150x150.png Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HOST memory
@@ -491,7 +491,7 @@ RppStatus rppt_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
  * \image html img150x150.png Sample Input
- * \image html effects_augmentations_jitter_img150x150.png Sample Output
+ * \image html effects_augmentations_jitter_150x150.png Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param un[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory
@@ -576,8 +576,8 @@ RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
  * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  *          srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  *          dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html effects_augmentations_erase_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_erase_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory
@@ -601,8 +601,8 @@ RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt
  * \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html effects_augmentations_glitch_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_glitch_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HOST memory
@@ -623,8 +623,8 @@ RppStatus rppt_glitch_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst
  * \details The glitch augmentation adds a glitch effect for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html effects_augmentations_glitch_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_glitch_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory
@@ -645,8 +645,8 @@ RppStatus rppt_glitch_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
  * \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_pixelate_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HOST memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HOST memory
@@ -667,8 +667,8 @@ RppStatus rppt_pixelate_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t d
  * \details The pixelate augmentation performs a pixelate transformation for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html effects_augmentations_pixelate_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_pixelate_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory
diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h
index 28dd516e6..986c36a03 100644
--- a/include/rppt_tensor_geometric_augmentations.h
+++ b/include/rppt_tensor_geometric_augmentations.h
@@ -613,8 +613,8 @@ RppStatus rppt_remap_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
  * \details Performs a remap operation using user specified remap tables for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout. For each image, the output(x,y) = input(mapx(x, y), mapy(x, y)) for every (x,y) in the destination image.<br>
  * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
  * - dstPtr depth ranges - Will be same depth as srcPtr.
- * \image html img150x150.jpg Sample Input
- * \image html geometric_augmentations_remap_img150x150.jpg Sample Output
+ * \image html img150x150.png Sample Input
+ * \image html geometric_augmentations_remap_img150x150.png Sample Output
  * \param [in] srcPtr source tensor in HIP memory
  * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
  * \param [out] dstPtr destination tensor in HIP memory
diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp
index 973d728c6..899aee456 100644
--- a/src/include/cpu/rpp_cpu_common.hpp
+++ b/src/include/cpu/rpp_cpu_common.hpp
@@ -5452,12 +5452,6 @@ inline void compute_bicubic_coefficient(Rpp32f weight, Rpp32f &coeff)
     coeff = (x >= 2) ? 0 : ((x > 1) ? (x * x * (-0.5f * x + 2.5f) - 4.0f * x + 2.0f) : (x * x * (1.5f * x - 2.5f) + 1.0f));
 }
 
-inline Rpp32f sinc(Rpp32f x)
-{
-    x *= M_PI;
-    return (std::abs(x) < 1e-5f) ? (1.0f - x * x * ONE_OVER_6) : std::sin(x) / x;
-}
-
 inline void compute_lanczos3_coefficient(Rpp32f weight, Rpp32f &coeff)
 {
     coeff = fabs(weight) >= 3 ? 0.0f : (sinc(weight) * sinc(weight * 0.333333f));
diff --git a/src/include/hip/rpp_hip_common.hpp b/src/include/hip/rpp_hip_common.hpp
index 721800c80..d83506b9f 100644
--- a/src/include/hip/rpp_hip_common.hpp
+++ b/src/include/hip/rpp_hip_common.hpp
@@ -156,6 +156,11 @@ struct RPPTensorFunctionMetaData
   (byte & 0x02 ? '1' : '0'), \
   (byte & 0x01 ? '1' : '0')
 
+// float4 floor
+
+#define FLOOR4(src, dst) \
+dst = make_int4(floorf(src.x), floorf(src.y), floorf(src.z), floorf(src.w));
+
 /******************** HOST FUNCTIONS ********************/
 
 inline int getplnpkdind(RppiChnFormat &format)
diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp
index 883c78316..9fb6a992f 100644
--- a/src/modules/hip/handlehip.cpp
+++ b/src/modules/hip/handlehip.cpp
@@ -245,7 +245,7 @@ struct HandleImpl
            - 293 is the size required for storing reduction outputs for 600000 size sample
            - 128 is the size required for storing cutOffDB values for batch size 128 */
         hipMalloc(&(this->initHandle->mem.mgpu.scratchBufferHip.floatmem), sizeof(Rpp32f) * 76853888);
-        hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400);
+        hipHostMalloc(&(this->initHandle->mem.mgpu.scratchBufferPinned.floatmem), sizeof(Rpp32f) * 8294400);    // 3840 x 2160
     }
 };
 
diff --git a/src/modules/hip/hip_tensor_audio_augmentations.hpp b/src/modules/hip/hip_tensor_audio_augmentations.hpp
index cbf057dc6..f97212d96 100644
--- a/src/modules/hip/hip_tensor_audio_augmentations.hpp
+++ b/src/modules/hip/hip_tensor_audio_augmentations.hpp
@@ -30,5 +30,6 @@ SOFTWARE.
 #include "kernel/mel_filter_bank.hpp"
 #include "kernel/pre_emphasis_filter.hpp"
 #include "kernel/to_decibels.hpp"
+#include "kernel/resample.hpp"
 
 #endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
diff --git a/src/modules/hip/kernel/resample.hpp b/src/modules/hip/kernel/resample.hpp
new file mode 100644
index 000000000..fcdf5064b
--- /dev/null
+++ b/src/modules/hip/kernel/resample.hpp
@@ -0,0 +1,299 @@
+#include <hip/hip_runtime.h>
+#include "rpp_hip_common.hpp"
+
+// -------------------- Set 0 - resample kernel device helpers  --------------------
+
+__device__ __forceinline__ float resample_hip_compute(float &x, float &scale, float &center, float *lookup, int &lookupSize)
+{
+    float locRaw = x * scale + center;
+    int locFloor = std::floor(locRaw);
+    float weight = locRaw - locFloor;
+    locFloor = std::max(std::min(locFloor, lookupSize - 2), 0);
+    float current = lookup[locFloor];
+    float next = lookup[locFloor + 1];
+    return current + weight * (next - current);
+}
+
+__device__ __forceinline__ void resample_hip_compute(float4 *src_f4, float4 *dst_f4, const float4 *scale_f4, const float4 *center_f4, float *lookup)
+{
+    float4 locRaw_f4 = (*src_f4) * (*scale_f4) + (*center_f4);
+    int4 locFloor_i4;
+    FLOOR4(locRaw_f4, locFloor_i4);
+    float4 weight_f4 = make_float4(locRaw_f4.x - locFloor_i4.x, locRaw_f4.y - locFloor_i4.y, locRaw_f4.z - locFloor_i4.z, locRaw_f4.w - locFloor_i4.w);
+    float4 current_f4 = make_float4(lookup[locFloor_i4.x], lookup[locFloor_i4.y], lookup[locFloor_i4.z], lookup[locFloor_i4.w]);
+    float4 next_f4 = make_float4(lookup[locFloor_i4.x + 1], lookup[locFloor_i4.y + 1], lookup[locFloor_i4.z + 1], lookup[locFloor_i4.w + 1]);
+    *dst_f4 = current_f4 + weight_f4 * (next_f4 - current_f4);
+}
+
+// -------------------- Set 1 - resample kernel host helpers  --------------------
+
+inline void compute_output_dims(Rpp32f *inRateTensor,
+                                Rpp32f *outRateTensor,
+                                Rpp32s *srcLengthTensor,
+                                Rpp32s *dstLengthTensor,
+                                Rpp32u batchSize)
+{
+    for (Rpp32s i = 0, j = 0; i < batchSize; i++, j += 2)
+    {
+        dstLengthTensor[j] = std::ceil(srcLengthTensor[j] * outRateTensor[i] / inRateTensor[i]);
+        dstLengthTensor[j + 1] = srcLengthTensor[j + 1];
+    }
+}
+
+// -------------------- Set 2 - resample kernels --------------------
+
+__global__ void resample_single_channel_hip_tensor(float *srcPtr,
+                                                   float *dstPtr,
+                                                   uint2 strides,
+                                                   int2 *srcDimsTensor,
+                                                   int2 *dstDimsTensor,
+                                                   float *inRateTensor,
+                                                   float *outRateTensor,
+                                                   RpptResamplingWindow *window)
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    int srcLength = srcDimsTensor[id_z].x;
+    int dstLength = dstDimsTensor[id_z].x;
+    int outBlock = id_x * hipBlockDim_x;
+    int blockEnd = std::min(outBlock + static_cast<int>(hipBlockDim_x), dstLength);
+
+    if (dstLength != srcLength)
+    {
+        double scale = static_cast<double>(inRateTensor[id_z]) / outRateTensor[id_z];
+        extern __shared__ float lookup_smem[];
+
+        // copy all values from window lookup table to shared memory lookup table
+        for (int k = hipThreadIdx_x; k < window->lookupSize; k += hipBlockDim_x)
+            lookup_smem[k] = window->lookup[k];
+        __syncthreads();
+
+        if (outBlock >= dstLength)
+            return;
+
+        // extract the window scale, center and lookup size values from window
+        float windowScale = window->scale;
+        float windowCenter = window->center;
+        int lookupSize = window->lookupSize;
+        float4 windowScale_f4 = static_cast<float4>(windowScale);
+        float4 windowCenter_f4 = static_cast<float4>(windowCenter);
+        float4 increment_f4 = static_cast<float4>(8.0f);
+        d_float8 locInit_f8;
+        locInit_f8.f4[0] = make_float4(0, 1, 2, 3);
+        locInit_f8.f4[1] = make_float4(4, 5, 6, 7);
+
+        // compute block wise values required for processing
+        double inBlockRaw = outBlock * scale;
+        int inBlockRounded = static_cast<int>(inBlockRaw);
+        float inPos = inBlockRaw - inBlockRounded;
+        float fscale = scale;
+        uint dstIdx = id_z * strides.y + outBlock;
+        float *inBlockPtr = srcPtr + id_z * strides.x + inBlockRounded;
+
+        // process block size (256) elements in single thread
+        for (int outPos = outBlock; outPos < blockEnd; outPos++, inPos += fscale, dstIdx++)
+        {
+            int loc0, loc1;
+            window->input_range(inPos, &loc0, &loc1);
+
+            // check if computed loc0, loc1 values are beyond the input dimensions and update accordingly
+            if (loc0 + inBlockRounded < 0)
+                loc0 = -inBlockRounded;
+            if (loc1 + inBlockRounded > srcLength)
+                loc1 = srcLength - inBlockRounded;
+            int locInWindow = loc0;
+            float locBegin = locInWindow - inPos;
+            float accum = 0.0f;
+
+            d_float8 locInWindow_f8, accum_f8;
+            locInWindow_f8.f4[0] = static_cast<float4>(locBegin) + locInit_f8.f4[0];
+            locInWindow_f8.f4[1] = static_cast<float4>(locBegin) + locInit_f8.f4[1];
+            accum_f8.f4[0] = static_cast<float4>(0.0f);
+            accum_f8.f4[1] = static_cast<float4>(0.0f);
+            for (; locInWindow + 7 < loc1; locInWindow += 8)
+            {
+                d_float8 weights_f8;
+                resample_hip_compute(&locInWindow_f8.f4[0], &weights_f8.f4[0], &windowScale_f4, &windowCenter_f4, lookup_smem);
+                resample_hip_compute(&locInWindow_f8.f4[1], &weights_f8.f4[1], &windowScale_f4, &windowCenter_f4, lookup_smem);
+
+                d_float8 src_f8;
+                rpp_hip_load8_and_unpack_to_float8(inBlockPtr + locInWindow, &src_f8);
+                accum_f8.f4[0] +=  src_f8.f4[0] * weights_f8.f4[0];
+                accum_f8.f4[1] +=  src_f8.f4[1] * weights_f8.f4[1];
+
+                locInWindow_f8.f4[0] += increment_f4;
+                locInWindow_f8.f4[1] += increment_f4;
+            }
+            accum_f8.f4[0] += accum_f8.f4[1];
+            accum += (accum_f8.f1[0] + accum_f8.f1[1] + accum_f8.f1[2] + accum_f8.f1[3]);   // perform small work of reducing float4 to float
+
+            float x = locInWindow - inPos;
+            for (; locInWindow < loc1; locInWindow++, x++)
+            {
+                float w = resample_hip_compute(x, windowScale, windowCenter, lookup_smem, lookupSize);
+                accum += inBlockPtr[locInWindow] * w;
+            }
+
+            // Final store to dst
+            dstPtr[dstIdx] = accum;
+        }
+    }
+    // copy input to output if dstLength is same as srcLength
+    else
+    {
+        if (outBlock >= dstLength)
+            return;
+
+        uint srcIdx = id_z * strides.x + outBlock;
+        uint dstIdx = id_z * strides.y + outBlock;
+        for (int outPos = outBlock; outPos < blockEnd; outPos++, dstIdx++, srcIdx++)
+            dstPtr[dstIdx] = srcPtr[srcIdx];
+    }
+}
+
+__global__ void resample_multi_channel_hip_tensor(float *srcPtr,
+                                                  float *dstPtr,
+                                                  uint2 strides,
+                                                  int2 *srcDimsTensor,
+                                                  int2 *dstDimsTensor,
+                                                  float *inRateTensor,
+                                                  float *outRateTensor,
+                                                  RpptResamplingWindow *window)
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    int srcLength = srcDimsTensor[id_z].x;
+    int numChannels = srcDimsTensor[id_z].y;
+    int dstLength = dstDimsTensor[id_z].x;
+    int outBlock = id_x * hipBlockDim_x;
+    int blockEnd = std::min(outBlock + static_cast<int>(hipBlockDim_x), dstLength);
+
+    if (dstLength != srcLength)
+    {
+        double scale = static_cast<double>(inRateTensor[id_z]) / outRateTensor[id_z];
+        extern __shared__ float lookup_smem[];
+
+        // copy all values from window lookup table to shared memory lookup table
+        for (int k = hipThreadIdx_x; k < window->lookupSize; k += hipBlockDim_x)
+            lookup_smem[k] = window->lookup[k];
+        __syncthreads();
+
+        if (outBlock >= dstLength)
+            return;
+
+        // extract the window scale, center and lookup size values from window
+        float windowScale = window->scale;
+        float windowCenter = window->center;
+        int lookupSize = window->lookupSize;
+
+        // compute block wise values required for processing
+        double inBlockRaw = outBlock * scale;
+        int inBlockRounded = static_cast<int>(inBlockRaw);
+        float inPos = inBlockRaw - inBlockRounded;
+        float fscale = scale;
+        uint dstIdx = id_z * strides.y + outBlock * numChannels;
+        float *inBlockPtr = srcPtr + id_z * strides.x + (inBlockRounded * numChannels);
+
+        // process block size * channels (256 * channels) elements in single thread
+        for (int outPos = outBlock; outPos < blockEnd; outPos++, inPos += fscale, dstIdx += numChannels)
+        {
+            int loc0, loc1;
+            window->input_range(inPos, &loc0, &loc1);
+
+            // check if computed loc0, loc1 values are beyond the input dimensions and update accordingly
+            if (loc0 + inBlockRounded < 0)
+                loc0 = -inBlockRounded;
+            if (loc1 + inBlockRounded > srcLength)
+                loc1 = srcLength - inBlockRounded;
+            float locInWindow = loc0 - inPos;
+            int2 offsetLocs_i2 = make_int2(loc0, loc1) * static_cast<int2>(numChannels);    // offsetted loc0, loc1 values for multi channel case
+
+            float accum[RPPT_MAX_AUDIO_CHANNELS] = {0.0f};
+            for (int offsetLoc = offsetLocs_i2.x; offsetLoc < offsetLocs_i2.y; offsetLoc += numChannels, locInWindow++)
+            {
+                float w = resample_hip_compute(locInWindow, windowScale, windowCenter, lookup_smem, lookupSize);
+                for (int c = 0; c < numChannels; c++)
+                    accum[c] += inBlockPtr[offsetLoc + c] * w;
+            }
+
+            // Final store to dst
+            for (int c = 0; c < numChannels; c++)
+                dstPtr[dstIdx + c] = accum[c];
+        }
+    }
+    else
+    {
+        if (outBlock >= dstLength)
+            return;
+
+        uint srcIdx = id_z * strides.x + outBlock * numChannels;
+        uint dstIdx = id_z * strides.y + outBlock * numChannels;
+        for (int outPos = outBlock; outPos < blockEnd; outPos++, dstIdx += numChannels, srcIdx += numChannels)
+        {
+            for (int c = 0; c < numChannels; c++)
+                dstPtr[dstIdx + c] = srcPtr[srcIdx + c];
+        }
+    }
+}
+
+// -------------------- Set 3 - resample kernels executor --------------------
+
+RppStatus hip_exec_resample_tensor(Rpp32f *srcPtr,
+                                   RpptDescPtr srcDescPtr,
+                                   Rpp32f *dstPtr,
+                                   RpptDescPtr dstDescPtr,
+                                   Rpp32f *inRateTensor,
+                                   Rpp32f *outRateTensor,
+                                   Rpp32s *srcDimsTensor,
+                                   RpptResamplingWindow &window,
+                                   rpp::Handle& handle)
+{
+    Rpp32s globalThreads_x = dstDescPtr->strides.hStride;
+    Rpp32s globalThreads_y = 1;
+    Rpp32s globalThreads_z = dstDescPtr->n;
+    Rpp32u tensorDims = srcDescPtr->numDims - 1; // exclude batchsize from input dims
+    RppSize_t sharedMemorySizeInBytes = (window.lookupSize * sizeof(Rpp32f)); // shared memory size needed for resample kernel
+
+    // using the input sampling rate, output sampling rate compute the output dims
+    Rpp32s *dstDimsTensor = reinterpret_cast<Rpp32s *>(handle.GetInitHandle()->mem.mgpu.scratchBufferPinned.floatmem);
+    compute_output_dims(inRateTensor, outRateTensor, srcDimsTensor, dstDimsTensor, dstDescPtr->n);
+
+    // For 1D audio tensors (channels = 1)
+    if (tensorDims == 1)
+    {
+        hipLaunchKernelGGL(resample_single_channel_hip_tensor,
+                           dim3(ceil((Rpp32f)globalThreads_x/LOCAL_THREADS_X_1DIM), ceil((Rpp32f)globalThreads_y/LOCAL_THREADS_Y_1DIM), ceil((Rpp32f)globalThreads_z/LOCAL_THREADS_Z_1DIM)),
+                           dim3(LOCAL_THREADS_X_1DIM, LOCAL_THREADS_Y_1DIM, LOCAL_THREADS_Z_1DIM),
+                           sharedMemorySizeInBytes,
+                           handle.GetStream(),
+                           srcPtr,
+                           dstPtr,
+                           make_uint2(srcDescPtr->strides.nStride, dstDescPtr->strides.nStride),
+                           reinterpret_cast<int2 *>(srcDimsTensor),
+                           reinterpret_cast<int2 *>(dstDimsTensor),
+                           inRateTensor,
+                           outRateTensor,
+                           &window);
+    }
+    // For 2D audio tensors (channels > 1)
+    else if (tensorDims == 2)
+    {
+        hipLaunchKernelGGL(resample_multi_channel_hip_tensor,
+                           dim3(ceil((Rpp32f)globalThreads_x/LOCAL_THREADS_X_1DIM), ceil((Rpp32f)globalThreads_y/LOCAL_THREADS_Y_1DIM), ceil((Rpp32f)globalThreads_z/LOCAL_THREADS_Z_1DIM)),
+                           dim3(LOCAL_THREADS_X_1DIM, LOCAL_THREADS_Y_1DIM, LOCAL_THREADS_Z_1DIM),
+                           sharedMemorySizeInBytes,
+                           handle.GetStream(),
+                           srcPtr,
+                           dstPtr,
+                           make_uint2(srcDescPtr->strides.nStride, dstDescPtr->strides.nStride),
+                           reinterpret_cast<int2 *>(srcDimsTensor),
+                           reinterpret_cast<int2 *>(dstDimsTensor),
+                           inRateTensor,
+                           outRateTensor,
+                           &window);
+    }
+    
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
index 673f8d6de..536f537a3 100644
--- a/src/modules/rppt_tensor_audio_augmentations.cpp
+++ b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -298,9 +298,12 @@ RppStatus rppt_non_silent_region_detection_gpu(RppPtr_t srcPtr,
                                                rppHandle_t rppHandle)
 {
 #ifdef HIP_COMPILE
+    Rpp32u tensorDims = srcDescPtr->numDims - 1; // exclude batchsize from input dims
+    if (tensorDims != 1)
+        return RPP_ERROR_INVALID_SRC_DIMS;
+
     if (srcDescPtr->dataType == RpptDataType::F32)
     {
-
         return hip_exec_non_silent_region_detection_tensor(static_cast<Rpp32f*>(srcPtr),
                                                            srcDescPtr,
                                                            srcLengthTensor,
@@ -480,7 +483,47 @@ RppStatus rppt_mel_filter_bank_gpu(RppPtr_t srcPtr,
         return RPP_ERROR_NOT_IMPLEMENTED;
     }
 
-    #elif defined(OCL_COMPILE)
+#elif defined(OCL_COMPILE)
+    return RPP_ERROR_NOT_IMPLEMENTED;
+#endif // backend
+}
+
+/******************** resample ********************/
+
+RppStatus rppt_resample_gpu(RppPtr_t srcPtr,
+                            RpptDescPtr srcDescPtr,
+                            RppPtr_t dstPtr,
+                            RpptDescPtr dstDescPtr,
+                            Rpp32f *inRateTensor,
+                            Rpp32f *outRateTensor,
+                            Rpp32s *srcDimsTensor,
+                            RpptResamplingWindow &window,
+                            rppHandle_t rppHandle)
+{
+#ifdef HIP_COMPILE
+    Rpp32u tensorDims = srcDescPtr->numDims - 1; // exclude batchsize from input dims
+    if (tensorDims != 1 && tensorDims != 2)
+        return RPP_ERROR_INVALID_SRC_DIMS;
+
+    if (srcDescPtr->dataType == RpptDataType::F32)
+    {
+        hip_exec_resample_tensor(static_cast<Rpp32f*>(srcPtr),
+                                 srcDescPtr,
+                                 static_cast<Rpp32f*>(dstPtr),
+                                 dstDescPtr,
+                                 inRateTensor,
+                                 outRateTensor,
+                                 srcDimsTensor,
+                                 window,
+                                 rpp::deref(rppHandle));
+        return RPP_SUCCESS;
+    }
+    else
+    {
+        return RPP_ERROR_NOT_IMPLEMENTED;
+    }
+
+#elif defined(OCL_COMPILE)
     return RPP_ERROR_NOT_IMPLEMENTED;
 #endif // backend
 }
diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp
index 789767735..d17f6dd93 100644
--- a/src/modules/rppt_tensor_effects_augmentations.cpp
+++ b/src/modules/rppt_tensor_effects_augmentations.cpp
@@ -2181,7 +2181,7 @@ RppStatus rppt_pixelate_gpu(RppPtr_t srcPtr,
     interDesc = *srcDescPtr;
     RpptDescPtr interDescPtr = &interDesc;
 
-    RpptImagePatchPtr internalDstImgSizes = reinterpret_cast<RpptImagePatch *>(rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
+    RpptImagePatchPtr internalDstImgSizes = reinterpret_cast<RpptImagePatch *>(rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferPinned.floatmem);
     RpptROI *internalRoiTensorPtrSrc = reinterpret_cast<RpptROI *>(internalDstImgSizes + dstDescPtr->n);
 
     for (int i = 0; i < srcDescPtr->n; i++)
diff --git a/utilities/test_suite/HIP/Tensor_audio_hip.cpp b/utilities/test_suite/HIP/Tensor_audio_hip.cpp
index 9d42680fe..6076d9238 100644
--- a/utilities/test_suite/HIP/Tensor_audio_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_audio_hip.cpp
@@ -30,8 +30,8 @@ int main(int argc, char **argv)
     const int MIN_ARG_COUNT = 7;
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: ./Tensor_audio_hip <src folder> <case number = 0:0> <test type 0/1> <numRuns> <batchSize> <dst folder>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: ./Tensor_audio_hip <src folder> <case number = 0:0> <test type 0/1> <numRuns> <batchSize> <dst folder>\n";
         return -1;
     }
 
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -111,7 +111,6 @@ int main(int argc, char **argv)
         maxDstChannels = 1;
     }
     set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);
-
     // set buffer sizes for src/dst
     if(testCase == 7)
     {
@@ -124,9 +123,14 @@ int main(int argc, char **argv)
         oBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n;
     }
 
+    // compute maximum possible buffer size of resample
+    unsigned long long resampleMaxBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride * 1.15;
+    if (testCase == 6)
+        oBufferSize = resampleMaxBufferSize;
+
     // allocate hip buffers for input & output
-    Rpp32f *inputf32 = (Rpp32f *)calloc(iBufferSize, sizeof(Rpp32f));
-    Rpp32f *outputf32 = (Rpp32f *)calloc(oBufferSize, sizeof(Rpp32f));
+    Rpp32f *inputf32 = static_cast<Rpp32f *>(calloc(iBufferSize, sizeof(Rpp32f)));
+    Rpp32f *outputf32 = static_cast<Rpp32f *>(calloc(oBufferSize, sizeof(Rpp32f)));
 
     void *d_inputf32, *d_outputf32;
     CHECK_RETURN_STATUS(hipMalloc(&d_inputf32, iBufferSize * sizeof(Rpp32f)));
@@ -142,6 +146,10 @@ int main(int argc, char **argv)
     CHECK_RETURN_STATUS(hipHostMalloc(&srcDims, batchSize * sizeof(RpptImagePatch)));
     CHECK_RETURN_STATUS(hipHostMalloc(&dstDims, batchSize * sizeof(RpptImagePatch)));
 
+    // allocate the buffer for srcDimsTensor
+    Rpp32s *srcDimsTensor;
+    CHECK_RETURN_STATUS(hipHostMalloc(&srcDimsTensor, batchSize * 2 * sizeof(Rpp32s)));
+
     Rpp32s *detectedIndex = nullptr, *detectionLength = nullptr;
     if(testCase == 0)
     {
@@ -149,10 +157,16 @@ int main(int argc, char **argv)
         CHECK_RETURN_STATUS(hipHostMalloc(&detectionLength, batchSize * sizeof(Rpp32f)));
     }
 
-    // allocate the buffer for srcDimsTensor
-    Rpp32s *srcDimsTensor;
-    CHECK_RETURN_STATUS(hipHostMalloc(&srcDimsTensor, batchSize * 2 * sizeof(Rpp32s)));
-    Rpp32f *coeff;
+    // declare pointer of type RpptResamplingWindow used for resample augmentation
+    Rpp32f *inRateTensor = nullptr, *outRateTensor = nullptr;
+    RpptResamplingWindow *window = nullptr;
+    if (testCase == 6)
+    {
+        CHECK_RETURN_STATUS(hipHostMalloc(&inRateTensor, batchSize * sizeof(Rpp32f)));
+        CHECK_RETURN_STATUS(hipHostMalloc(&outRateTensor, batchSize * sizeof(Rpp32f)));
+    }
+
+    Rpp32f *coeff = nullptr;
     if(testCase == 2)
         CHECK_RETURN_STATUS(hipHostMalloc(&coeff, batchSize * sizeof(Rpp32f)));
 
@@ -165,7 +179,7 @@ int main(int argc, char **argv)
     int noOfIterations = static_cast<int>(audioNames.size()) / batchSize;
     double maxWallTime = 0, minWallTime = 500, avgWallTime = 0;
     string testCaseName;
-    printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for (int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         // read and decode audio and fill the audio dim values
@@ -242,6 +256,47 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 6:
+                {
+                    testCaseName = "resample";
+
+                    maxDstWidth = 0;
+                    for(int i = 0, j = 0; i < batchSize; i++, j += 2)
+                    {
+                        inRateTensor[i] = 16000;
+                        outRateTensor[i] = 16000 * 1.15f;
+                        Rpp32f scaleRatio = outRateTensor[i] / inRateTensor[i];
+                        srcDimsTensor[j] = srcLengthTensor[i];
+                        srcDimsTensor[j + 1] = channelsTensor[i];
+                        dstDims[i].width = static_cast<int>(std::ceil(scaleRatio * srcLengthTensor[i]));
+                        dstDims[i].height = 1;
+                        maxDstWidth = std::max(maxDstWidth, static_cast<int>(dstDims[i].width));
+                    }
+                    Rpp32f quality = 50.0f;
+                    Rpp32s lobes = std::round(0.007 * quality * quality - 0.09 * quality + 3);
+                    Rpp32s lookupSize = lobes * 64 + 1;
+                    if (window == nullptr)
+                    {
+                        CHECK_RETURN_STATUS(hipHostMalloc(&window, sizeof(RpptResamplingWindow)));
+                        windowed_sinc(*window, lookupSize, lobes);
+                    }
+
+                    dstDescPtr->w = maxDstWidth;
+                    dstDescPtr->strides.nStride = dstDescPtr->c * dstDescPtr->w * dstDescPtr->h;
+
+                    // check if the required output buffer size is greater than predefined resampleMaxBufferSize
+                    if (dstDescPtr->n * dstDescPtr->strides.nStride > resampleMaxBufferSize)
+                    {
+                        std::cout << "\nError! Requested resample output size is greater than predefined max size for resample in test suite."
+                                     "\nPlease modify resampleMaxBufferSize value in test suite as per your requirements for running resample kernel" << std::endl;
+                        exit(0);
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    rppt_resample_gpu(d_inputf32, srcDescPtr, d_outputf32, dstDescPtr, inRateTensor, outRateTensor, srcDimsTensor, *window, handle);
+
+                    break;
+                }
                 case 7:
                 {
                     testCaseName = "mel_filter_bank";
@@ -279,7 +334,7 @@ int main(int argc, char **argv)
             endWallTime = omp_get_wtime();
             if (missingFuncFlag == 1)
             {
-                printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str());
+                cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
                 return -1;
             }
 
@@ -339,7 +394,7 @@ int main(int argc, char **argv)
     CHECK_RETURN_STATUS(hipFree(d_outputf32));
     CHECK_RETURN_STATUS(hipHostFree(srcLengthTensor));
     CHECK_RETURN_STATUS(hipHostFree(channelsTensor));
-    if(testCase == 2)
+    if(coeff != nullptr)
         CHECK_RETURN_STATUS(hipHostFree(coeff));
     CHECK_RETURN_STATUS(hipHostFree(srcDims));
     CHECK_RETURN_STATUS(hipHostFree(dstDims));
@@ -348,5 +403,16 @@ int main(int argc, char **argv)
         CHECK_RETURN_STATUS(hipHostFree(detectedIndex));
     if (detectionLength != nullptr)
         CHECK_RETURN_STATUS(hipHostFree(detectionLength));
+    if (window != nullptr)
+    {
+        if (window->lookup != nullptr)
+            CHECK_RETURN_STATUS(hipHostFree(window->lookup));
+        CHECK_RETURN_STATUS(hipHostFree(window));
+    }
+    if (inRateTensor != nullptr)
+        CHECK_RETURN_STATUS(hipHostFree(inRateTensor));
+    if (outRateTensor != nullptr)
+        CHECK_RETURN_STATUS(hipHostFree(outRateTensor));
+        
     return 0;
 }
diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index 685c21203..bfdc008f4 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -80,26 +80,26 @@ int main(int argc, char **argv)
 
     if (verbosity == 1)
     {
-        printf("\nInputs for this test case are:");
-        printf("\nsrc1 = %s", argv[1]);
-        printf("\nsrc2 = %s", argv[2]);
+        cout << "\nInputs for this test case are:";
+        cout << "\nsrc1 = " << argv[1];
+        cout << "\nsrc2 = " << argv[2];
         if (testType == 0)
-            printf("\ndst = %s", argv[3]);
-        printf("\nu8 / f16 / f32 / u8->f16 / u8->f32 / i8 / u8->i8 (0/1/2/3/4/5/6) = %s", argv[4]);
-        printf("\noutputFormatToggle (pkd->pkd = 0 / pkd->pln = 1) = %s", argv[5]);
-        printf("\ncase number (0:91) = %s", argv[6]);
-        printf("\nnumber of times to run = %s", argv[8]);
-        printf("\ntest type - (0 = unit tests / 1 = performance tests) = %s", argv[9]);
-        printf("\nlayout type - (0 = PKD3/ 1 = PLN3/ 2 = PLN1) = %s", argv[10]);
-        printf("\nqa mode - 0/1 = %s", argv[12]);
-        printf("\ndecoder type - (0 = TurboJPEG / 1 = OpenCV) = %s", argv[13]);
-        printf("\nbatch size = %s", argv[14]);
+            cout << "\ndst = " << argv[3];
+        cout << "\nu8 / f16 / f32 / u8->f16 / u8->f32 / i8 / u8->i8 (0/1/2/3/4/5/6) = " << argv[4];
+        cout << "\noutputFormatToggle (pkd->pkd = 0 / pkd->pln = 1) = " << argv[5];
+        cout << "\ncase number (0:91) = " << argv[6];
+        cout << "\nnumber of times to run = " << argv[8];
+        cout << "\ntest type - (0 = unit tests / 1 = performance tests) = " << argv[9];
+        cout << "\nlayout type - (0 = PKD3/ 1 = PLN3/ 2 = PLN1) = " << argv[10];
+        cout << "\nqa mode - 0/1 = " << argv[12];
+        cout << "\ndecoder type - (0 = TurboJPEG / 1 = OpenCV) = " << argv[13];
+        cout << "\nbatch size = " << argv[14];
     }
 
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: <src1 folder> <src2 folder (place same as src1 folder for single image functionalities)> <dst folder> <u8 = 0 / f16 = 1 / f32 = 2 / u8->f16 = 3 / u8->f32 = 4 / i8 = 5 / u8->i8 = 6> <outputFormatToggle (pkd->pkd = 0 / pkd->pln = 1)> <case number = 0:87> <number of runs > 0> <layout type (layout type - (0 = PKD3/ 1 = PLN3/ 2 = PLN1)> < qa mode (0/1)> <decoder type (0/1)> <batch size > 1> <roiList> <verbosity = 0/1>>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: <src1 folder> <src2 folder (place same as src1 folder for single image functionalities)> <dst folder> <u8 = 0 / f16 = 1 / f32 = 2 / u8->f16 = 3 / u8->f32 = 4 / i8 = 5 / u8->i8 = 6> <outputFormatToggle (pkd->pkd = 0 / pkd->pln = 1)> <case number = 0:87> <number of runs > 0> <layout type (0 = PKD3/ 1 = PLN3/ 2 = PLN1)> <qa mode (0/1)> <decoder type (0/1)> <batch size > 1> <roiList> <verbosity = 0/1>>\n";
         return -1;
     }
 
@@ -107,24 +107,24 @@ int main(int argc, char **argv)
     {
         if(testCase == 36 || testCase == 31 || testCase == 35 || testCase == 45 || testCase == 86)
         {
-            printf("\ncase %d does not exist for PLN1 layout\n", testCase);
+            cout << "\ncase " << testCase << " does not exist for PLN1 layout\n";
             return -1;
         }
         else if (outputFormatToggle != 0)
         {
-            printf("\nPLN1 cases don't have outputFormatToggle! Please input outputFormatToggle = 0\n");
+            cout << "\nPLN1 cases don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
             return -1;
         }
     }
 
     if(pln1OutTypeCase && outputFormatToggle != 0)
     {
-        printf("\ntest case %d don't have outputFormatToggle! Please input outputFormatToggle = 0\n", testCase);
+        cout << "\ntest case " << testCase << " don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
         return -1;
     }
     else if (reductionTypeCase && outputFormatToggle != 0)
     {
-        printf("\nReduction Kernels don't have outputFormatToggle! Please input outputFormatToggle = 0\n");
+        cout << "\nReduction Kernels don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
         return -1;
     }
     else if(batchSize > MAX_BATCH_SIZE)
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -420,7 +420,7 @@ int main(int argc, char **argv)
         CHECK_RETURN_STATUS(hipHostMalloc(&d_interDstPtr, srcDescPtr->strides.nStride * srcDescPtr->n * sizeof(Rpp32f)));
 
     // case-wise RPP API and measure time script for Unit and Performance test
-    printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
@@ -1462,7 +1462,7 @@ int main(int argc, char **argv)
             wallTime = endWallTime - startWallTime;
             if (missingFuncFlag == 1)
             {
-                printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str());
+                cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
                 return -1;
             }
 
@@ -1474,15 +1474,18 @@ int main(int argc, char **argv)
 
         if (testType == 0)
         {
-            cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms/batch"<< endl;
+            cout <<"\n\n";
+            if(noOfIterations > 1)
+                cout <<"Execution Timings for Iteration "<< iterCount+1 <<":"<<endl;
+            cout << "GPU Backend Wall Time: " << wallTime <<" ms/batch";
             // Display results for reduction functions
             if (reductionTypeCase)
             {
                 if(srcDescPtr->c == 3)
-                    printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
+                    cout << "\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ";
                 else if(srcDescPtr->c == 1)
                 {
-                    printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
+                    cout << "\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ";
                     reductionFuncResultArrLength = srcDescPtr->n;
                 }
 
@@ -1511,7 +1514,7 @@ int main(int argc, char **argv)
                     else
                         print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
                 }
-                printf("\n");
+                cout << "\n";
 
                 /*Compare the output of the function with golden outputs only if
                 1.QA Flag is set
diff --git a/utilities/test_suite/HIP/Tensor_misc_hip.cpp b/utilities/test_suite/HIP/Tensor_misc_hip.cpp
index cb0d53b34..b4fa560e8 100644
--- a/utilities/test_suite/HIP/Tensor_misc_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_misc_hip.cpp
@@ -30,8 +30,8 @@ int main(int argc, char **argv)
     const int MIN_ARG_COUNT = 9;
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: ./Tensor_misc_hip <case number = 0:2> <test type 0/1> <toggle 0/1> <number of dimensions> <batch size> <num runs> <additional param> <dst path> <script path>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: ./Tensor_misc_hip <case number = 0:2> <test type 0/1> <toggle 0/1> <number of dimensions> <batch size> <num runs> <additional param> <dst path> <script path>\n";
         return -1;
     }
     Rpp32u testCase, testType, nDim, batchSize, numRuns, toggle;
@@ -53,14 +53,14 @@ int main(int argc, char **argv)
 
     if (qaMode && batchSize != 3)
     {
-        std::cout<<"QA mode can only run with batchsize 3"<<std::endl;
+        cout<<"QA mode can only run with batchsize 3"<<std::endl;
         return -1;
     }
 
     string funcName = augmentationMiscMap[testCase];
     if (funcName.empty())
     {
-        printf("\ncase %d is not supported\n", testCase);
+        cout << "\ncase " << testCase << " is not supported\n";
         return -1;
     }
 
@@ -141,7 +141,7 @@ int main(int argc, char **argv)
     string testCaseName;
 
     // case-wise RPP API and measure time script for Unit and Performance test
-    printf("\nRunning %s %d times (each time with a batch size of %d) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << ") and computing mean statistics...";
     for(int perfCount = 0; perfCount < numRuns; perfCount++)
     {
         switch(testCase)
diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
index 44f2dea72..16107c608 100644
--- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
@@ -34,7 +34,7 @@ int main(int argc, char * argv[])
 
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
         exit(1);
     }
 
@@ -57,7 +57,7 @@ int main(int argc, char * argv[])
     }
     if(batchSize > MAX_BATCH_SIZE)
     {
-        std::cout << "\n Batchsize should be less than or equal to "<< MAX_BATCH_SIZE << " Aborting!";
+        cout << "\n Batchsize should be less than or equal to "<< MAX_BATCH_SIZE << " Aborting!";
         exit(0);
     }
 
@@ -65,7 +65,7 @@ int main(int argc, char * argv[])
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -174,7 +174,7 @@ int main(int argc, char * argv[])
         CHECK_RETURN_STATUS(hipMalloc(&d_outputU8, iBufferSizeU8));
     }
 
-    printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << funcName << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
@@ -391,7 +391,7 @@ int main(int argc, char * argv[])
         wallTime *= 1000;
         if (missingFuncFlag == 1)
         {
-            printf("\nThe functionality doesn't yet exist in RPP\n");
+            cout << "\nThe functionality doesn't yet exist in RPP\n";
             return -1;
         }
 
@@ -399,7 +399,10 @@ int main(int argc, char * argv[])
         CHECK_RETURN_STATUS(hipMemcpy(outputF32, d_outputF32, oBufferSizeInBytes, hipMemcpyDeviceToHost));
         if(testType == 0)
         {
-            cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl;
+            cout <<"\n\n";
+            if(noOfIterations > 1)
+                cout <<"Execution Timings for Iteration "<< iterCount+1 <<":"<<endl;
+            cout << "GPU Backend Wall Time: " << wallTime <<" ms per batch";
             if(DEBUG_MODE)
             {
                 std::ofstream refFile;
diff --git a/utilities/test_suite/HIP/runAudioTests.py b/utilities/test_suite/HIP/runAudioTests.py
index 2c710bfda..da8e5a7ba 100644
--- a/utilities/test_suite/HIP/runAudioTests.py
+++ b/utilities/test_suite/HIP/runAudioTests.py
@@ -72,7 +72,7 @@ def generate_performance_reports(RESULTS_DIR):
     print(dfPrint_noIndices)
 
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
-    print("./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
+    print("\n./Tensor_audio_hip " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_hip", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
@@ -101,13 +101,10 @@ def run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath, profilingOption = "NO"):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     if testType == 0:
         run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath)
     elif testType == 1 and profilingOption == "NO":
+        print("\n")
         run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
     else:
         run_performance_test_with_profiler_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
@@ -224,11 +221,16 @@ def rpp_test_suite_parser_and_validator():
 subprocess.call(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '3', '7']
+supportedCaseList = ['0', '1', '2', '3', '6', '7']
 if qaMode and batchSize != 3:
     print("QA tests can only run with a batch size of 3.")
     exit(0)
 
+noCaseSupported = all(case not in supportedCaseList for case in caseList)
+if noCaseSupported:
+    print("\ncase numbers %s are not supported" % caseList)
+    exit(0)
+
 for case in caseList:
     if "--input_path" not in sys.argv:
         if case == "3":
@@ -247,7 +249,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_audio_hip -----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_audio_hip")
 
 # Performance tests
 if testType == 1 and profilingOption == "NO":
diff --git a/utilities/test_suite/HIP/runMiscTests.py b/utilities/test_suite/HIP/runMiscTests.py
index ee97f4547..ad1e3bf54 100644
--- a/utilities/test_suite/HIP/runMiscTests.py
+++ b/utilities/test_suite/HIP/runMiscTests.py
@@ -74,7 +74,7 @@ def generate_performance_reports(RESULTS_DIR):
     print(dfPrint_noIndices)
 
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
-    print("./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
+    print("\n./Tensor_misc_hip " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_hip", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
@@ -97,15 +97,13 @@ def run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns
     print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg, profilingOption = 'NO'):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     if testType == 0:
         run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
     elif testType == 1 and profilingOption == "NO":
+        print("\n")
         run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
     elif testType == 1 and profilingOption == "YES":
+        print("\n")
         run_performance_test_with_profiler_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
 
 # Parse and validate command-line arguments for the RPP test suite
@@ -211,6 +209,10 @@ def rpp_test_suite_parser_and_validator():
 subprocess.call(["make", "-j16"], cwd=".")    # nosec
 
 supportedCaseList = ['0', '1', '2']
+noCaseSupported = all(case not in supportedCaseList for case in caseList)
+if noCaseSupported:
+    print("\ncase numbers %s are not supported" % caseList)
+    exit(0)
 for case in caseList:
     if case not in supportedCaseList:
         continue
@@ -275,7 +277,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_misc_hip ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_misc_hip")
 
 # Performance tests
 if (testType == 1 and profilingOption == "NO"):
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index c6b194f9e..8857e6ac5 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -51,18 +51,13 @@ def get_log_file_list(preserveOutput):
     ]
 
 def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
+    print("\n")
     bitDepths = range(7)
     outputFormatToggles = [0, 1]
     if qaMode:
         bitDepths = [0]
         outputFormatToggles = [0]
     for bitDepth in bitDepths:
-        print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-
         for outputFormatToggle in outputFormatToggles:
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
@@ -70,14 +65,14 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
 
             if case == "40" or case == "41" or case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
+                    print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPath + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSize))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSize), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
+                    print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
@@ -87,7 +82,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 if case =='79':
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
-                    print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
+                    print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType))
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
@@ -106,14 +101,8 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         read_from_subprocess_and_write_to_log(process, logFile)
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
-
+    print("\n")
     for bitDepth in range(7):
-        print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-
         for outputFormatToggle in range(2):
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
@@ -122,17 +111,20 @@ def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPa
             if case == "40" or case == "41" or case == "49" or case == "54":
                 for kernelSize in range(3, 10, 2):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, kernelSize, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
+                    print("")
             elif case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
+                    print("")
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 for interpolationType in range(6):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
+                    print("")
             else:
                 run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, "0", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
-                print("------------------------------------------------------------------------------------------")
+                print("------------------------------------------------------------------------------------------\n")
 
 def run_performance_test_with_profiler(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, additionalParam, additionalParamType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     addtionalParamString = additionalParamType + str(additionalParam)
@@ -288,12 +280,10 @@ def rpp_test_suite_parser_and_validator():
     os.makedirs(dstPath + "/Tensor_PLN1")
     os.makedirs(dstPath + "/Tensor_PLN3")
 
-print("\n\n\n\n\n")
-print("##########################################################################################")
-print("Running all layout Inputs...")
-print("##########################################################################################")
-
 if(testType == 0):
+    noCaseSupported = all(case not in supportedCaseList for case in caseList)
+    if noCaseSupported:
+        print("case numbers %s are not supported" % caseList)
     for case in caseList:
         if case not in supportedCaseList:
             continue
@@ -321,6 +311,9 @@ def rpp_test_suite_parser_and_validator():
         create_layout_directories(dstPath, layoutDict)
 else:
     if (testType == 1 and profilingOption == "NO"):
+        noCaseSupported = all(case not in supportedCaseList for case in caseList)
+        if noCaseSupported:
+            print("case numbers %s are not supported" % caseList)
         for case in caseList:
             if case not in supportedCaseList:
                 continue
@@ -338,6 +331,9 @@ def rpp_test_suite_parser_and_validator():
     elif (testType == 1 and profilingOption == "YES"):
         NEW_FUNC_GROUP_LIST = [0, 15, 20, 29, 36, 40, 42, 49, 56, 65, 69]
 
+        noCaseSupported = all(case not in supportedCaseList for case in caseList)
+        if noCaseSupported:
+            print("case numbers %s are not supported" % caseList)
         for case in caseList:
             if case not in supportedCaseList:
                 continue
@@ -350,14 +346,8 @@ def rpp_test_suite_parser_and_validator():
             for layout in range(3):
                 dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
-                print("\n\n\n\n")
-                print("--------------------------------")
-                print("Running a New Functionality...")
-                print("--------------------------------")
-
+                print("\n")
                 for bitDepth in range(7):
-                    print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-
                     for outputFormatToggle in range(2):
                         # There is no layout toggle for PLN1 case, so skip this case
                         if layout == 2 and outputFormatToggle == 1:
@@ -495,4 +485,4 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_hip")
diff --git a/utilities/test_suite/HIP/runVoxelTests.py b/utilities/test_suite/HIP/runVoxelTests.py
index 31c9dd22f..43cb2f6b3 100644
--- a/utilities/test_suite/HIP/runVoxelTests.py
+++ b/utilities/test_suite/HIP/runVoxelTests.py
@@ -57,11 +57,11 @@ def func_group_finder(case_number):
         return "miscellaneous"
 
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
-    print("./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
+    print("\n./Tensor_voxel_hip " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_hip", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
-    print("------------------------------------------------------------------------------------------")
+    print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
    with open(loggingFolder + "/Tensor_voxel_hip_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
@@ -71,16 +71,17 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
             output = process.stdout.readline()
             if not output and process.poll() is not None:
                 break
-            output = output.decode().strip()  # Decode bytes to string and strip extra whitespace
-            print(output)
-            logFile.write(output)
+            output = output.decode('utf-8')
+            if output:
+                print(output, end='')
+                logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters
                 cleanedOutput = cleanedOutput.strip()  # Remove leading/trailing whitespace
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
-        print("------------------------------------------------------------------------------------------")
+        print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_with_profiler_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     layoutName = get_layout_name(layout)
@@ -243,20 +244,15 @@ def rpp_test_suite_parser_and_validator():
     os.makedirs(dstPath + "/Tensor_PLN1")
     os.makedirs(dstPath + "/Tensor_PLN3")
 
-print("\n\n\n\n\n")
-print("##########################################################################################")
-print("Running all layout Inputs...")
-print("##########################################################################################")
-
 bitDepths = [0, 2]
 if (testType == 0 or (testType == 1 and profilingOption == "NO")):
+    noCaseSupported = all(case not in supportedCaseList for case in caseList)
+    if noCaseSupported:
+        print("\ncase numbers %s are not supported" % caseList)
+        exit(0)
     for case in caseList:
         if case not in supportedCaseList:
             continue
-        print("\n\n\n\n")
-        print("--------------------------------")
-        print("Running a New Functionality...")
-        print("--------------------------------")
         for layout in range(3):
             dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
             if testType == 0 and qaMode == 0:
@@ -267,11 +263,13 @@ def rpp_test_suite_parser_and_validator():
             if testType == 0 and qaMode:
                 bitDepths = [2]
             for bitDepth in bitDepths:
-                print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-                print("\n\n\n\n")
                 run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 elif (testType == 1 and profilingOption == "YES"):
     NEW_FUNC_GROUP_LIST = [0, 1]
+    noCaseSupported = all(case not in supportedCaseList for case in caseList)
+    if noCaseSupported:
+        print("\ncase numbers %s are not supported" % caseList)
+        exit(0)
     for case in caseList:
         if case not in supportedCaseList:
             continue
@@ -347,7 +345,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_voxel_hip ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_voxel_hip")
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 if (testType == 0 and qaMode == 0): # Unit tests
diff --git a/utilities/test_suite/HOST/Tensor_audio_host.cpp b/utilities/test_suite/HOST/Tensor_audio_host.cpp
index 3f924411e..b3a71e9c6 100644
--- a/utilities/test_suite/HOST/Tensor_audio_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_audio_host.cpp
@@ -30,8 +30,8 @@ int main(int argc, char **argv)
     const int MIN_ARG_COUNT = 7;
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: ./Tensor_host_audio <src folder> <case number = 0:7> <test type 0/1> <numRuns> <batchSize> <dst folder>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: ./Tensor_host_audio <src folder> <case number = 0:7> <test type 0/1> <numRuns> <batchSize> <dst folder>\n";
         return -1;
     }
 
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -111,7 +111,6 @@ int main(int argc, char **argv)
         maxDstChannels = 1;
     }
     set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);
-    dstDescPtr->numDims = 3;
 
     // create generic descriptor in case of slice
     RpptGenericDesc descriptor3D;
@@ -137,6 +136,11 @@ int main(int argc, char **argv)
         oBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n;
     }
 
+    // compute maximum possible buffer size of resample
+    unsigned long long resampleMaxBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride * 1.15;
+    if (testCase == 6)
+        oBufferSize = resampleMaxBufferSize;
+
     // allocate host buffers for input & output
     Rpp32f *inputf32 = (Rpp32f *)calloc(iBufferSize, sizeof(Rpp32f));
     Rpp32f *outputf32 = (Rpp32f *)calloc(oBufferSize, sizeof(Rpp32f));
@@ -152,6 +156,9 @@ int main(int argc, char **argv)
     // buffers used for non silent region detection
     Rpp32s detectedIndex[batchSize], detectionLength[batchSize];
 
+    // RpptResamplingWindow instance used for resample augmentation
+    RpptResamplingWindow window;
+
     // run case-wise RPP API and measure time
     rppHandle_t handle;
     rppCreateWithBatchSize(&handle, srcDescPtr->n, 3);
@@ -159,7 +166,7 @@ int main(int argc, char **argv)
     int noOfIterations = (int)audioNames.size() / batchSize;
     double maxWallTime = 0, minWallTime = 500, avgWallTime = 0;
     string testCaseName;
-    printf("\nRunning %s %d times (each time with a batch size of %d audio files) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " audio files) and computing mean statistics...";
     for (int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         // read and decode audio and fill the audio dim values
@@ -336,21 +343,17 @@ int main(int argc, char **argv)
                     Rpp32f quality = 50.0f;
                     Rpp32s lobes = std::round(0.007 * quality * quality - 0.09 * quality + 3);
                     Rpp32s lookupSize = lobes * 64 + 1;
-                    RpptResamplingWindow window;
                     windowed_sinc(window, lookupSize, lobes);
 
                     dstDescPtr->w = maxDstWidth;
                     dstDescPtr->strides.nStride = dstDescPtr->c * dstDescPtr->w * dstDescPtr->h;
 
-                    // Set buffer sizes for dst
-                    Rpp64u resampleBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n;
-
-                    // Reinitialize host buffers for output
-                    outputf32 = (Rpp32f *)realloc(outputf32, sizeof(Rpp32f) * resampleBufferSize);
-                    if(!outputf32)
+                    // check if the required output buffer size is greater than predefined resampleMaxBufferSize
+                    if (dstDescPtr->n * dstDescPtr->strides.nStride > resampleMaxBufferSize)
                     {
-                        std::cout << "Unable to reallocate memory for output" << std::endl;
-                        break;
+                        std::cout << "\nError! Requested resample output size is greater than predefined max size for resample in test suite."
+                                     "\nPlease modify resampleMaxBufferSize value in test suite as per your requirements for running resample kernel" << std::endl;
+                        exit(0);
                     }
 
                     startWallTime = omp_get_wtime();
@@ -368,14 +371,8 @@ int main(int argc, char **argv)
                     RpptMelScaleFormula melFormula = RpptMelScaleFormula::SLANEY;
                     Rpp32s numFilter = 80;
                     bool normalize = true;
-                    Rpp32s srcDimsTensor[batchSize * 2];
                     // (height, width) for each tensor in a batch for given QA inputs.
-                    srcDimsTensor[0] = 257;
-                    srcDimsTensor[1] = 225;
-                    srcDimsTensor[2] = 257;
-                    srcDimsTensor[3] = 211;
-                    srcDimsTensor[4] = 257;
-                    srcDimsTensor[5] = 214;
+                    Rpp32s srcDimsTensor[] = {257, 225, 257, 211, 257, 214};
 
                     init_mel_filter_bank(&inputf32, &outputf32, srcDescPtr, dstDescPtr, dstDims, offsetInBytes, numFilter, batchSize, srcDimsTensor, scriptPath, testType);
 
@@ -394,7 +391,7 @@ int main(int argc, char **argv)
             endWallTime = omp_get_wtime();
             if (missingFuncFlag == 1)
             {
-                printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str());
+                cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
                 return -1;
             }
 
@@ -449,5 +446,7 @@ int main(int argc, char **argv)
     free(dstDims);
     free(inputf32);
     free(outputf32);
+    if (window.lookup != nullptr)
+        free(window.lookup);
     return 0;
 }
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index bc7e7b1b3..f370ca4d1 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -80,50 +80,50 @@ int main(int argc, char **argv)
 
     if (verbosity == 1)
     {
-       printf("\nInputs for this test case are:");
-        printf("\nsrc1 = %s", argv[1]);
-        printf("\nsrc2 = %s", argv[2]);
+        cout << "\nInputs for this test case are:";
+        cout << "\nsrc1 = " << argv[1];
+        cout << "\nsrc2 = " << argv[2];
         if (testType == 0)
-            printf("\ndst = %s", argv[3]);
-        printf("\nu8 / f16 / f32 / u8->f16 / u8->f32 / i8 / u8->i8 (0/1/2/3/4/5/6) = %s", argv[4]);
-        printf("\noutputFormatToggle (pkd->pkd = 0 / pkd->pln = 1) = %s", argv[5]);
-        printf("\ncase number (0:91) = %s", argv[6]);
-        printf("\nnumber of times to run = %s", argv[8]);
-        printf("\ntest type - (0 = unit tests / 1 = performance tests) = %s", argv[9]);
-        printf("\nlayout type - (0 = PKD3/ 1 = PLN3/ 2 = PLN1) = %s", argv[10]);
-        printf("\nqa mode - 0/1 = %s", argv[12]);
-        printf("\ndecoder type - (0 = TurboJPEG / 1 = OpenCV) = %s", argv[13]);
-        printf("\nbatch size = %s", argv[14]);
+            cout << "\ndst = " << argv[3];
+        cout << "\nu8 / f16 / f32 / u8->f16 / u8->f32 / i8 / u8->i8 (0/1/2/3/4/5/6) = " << argv[4];
+        cout << "\noutputFormatToggle (pkd->pkd = 0 / pkd->pln = 1) = " << argv[5];
+        cout << "\ncase number (0:91) = " << argv[6];
+        cout << "\nnumber of times to run = " << argv[8];
+        cout << "\ntest type - (0 = unit tests / 1 = performance tests) = " << argv[9];
+        cout << "\nlayout type - (0 = PKD3 / 1 = PLN3 / 2 = PLN1) = " << argv[10];
+        cout << "\nqa mode - 0/1 = " << argv[12];
+        cout << "\ndecoder type - (0 = TurboJPEG / 1 = OpenCV) = " << argv[13];
+        cout << "\nbatch size = " << argv[14];
     }
 
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: <src1 folder> <src2 folder (place same as src1 folder for single image functionalities)> <dst folder> <u8 = 0 / f16 = 1 / f32 = 2 / u8->f16 = 3 / u8->f32 = 4 / i8 = 5 / u8->i8 = 6> <outputFormatToggle (pkd->pkd = 0 / pkd->pln = 1)> <case number = 0:87> <number of runs > 0> <layout type (layout type - (0 = PKD3/ 1 = PLN3/ 2 = PLN1)> < qa mode (0/1)> <decoder type (0/1)> <batch size > 1> <roiList> <verbosity = 0/1>>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: <src1 folder> <src2 folder (place same as src1 folder for single image functionalities)> <dst folder> <u8 = 0 / f16 = 1 / f32 = 2 / u8->f16 = 3 / u8->f32 = 4 / i8 = 5 / u8->i8 = 6> <outputFormatToggle (pkd->pkd = 0 / pkd->pln = 1)> <case number = 0:87> <number of runs > 0> <layout type (layout type - (0 = PKD3 / 1 = PLN3 / 2 = PLN1)> < qa mode (0/1)> <decoder type (0/1)> <batch size > 1> <roiList> <verbosity = 0/1>>\n";
     }
 
     if (layoutType == 2)
     {
         if(testCase == 31 || testCase == 35 || testCase == 36 || testCase == 45 || testCase == 86)
         {
-            printf("\ncase %d does not exist for PLN1 layout\n", testCase);
+            cout << "\ncase " << testCase << " does not exist for PLN1 layout\n";
             return -1;
         }
         else if (outputFormatToggle != 0)
         {
-            printf("\nPLN1 cases don't have outputFormatToggle! Please input outputFormatToggle = 0\n");
+            cout << "\nPLN1 cases don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
             return -1;
         }
     }
 
     if(pln1OutTypeCase && outputFormatToggle != 0)
     {
-        printf("\ntest case %d don't have outputFormatToggle! Please input outputFormatToggle = 0\n", testCase);
+        cout << "\ntest case " << testCase << " don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
         return -1;
     }
     else if (reductionTypeCase && outputFormatToggle != 0)
     {
-        printf("\nReduction Kernels don't have outputFormatToggle! Please input outputFormatToggle = 0\n");
+        cout << "\nReduction Kernels don't have outputFormatToggle! Please input outputFormatToggle = 0\n";
         return -1;
     }
     else if(batchSize > MAX_BATCH_SIZE)
@@ -153,7 +153,7 @@ int main(int argc, char **argv)
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -375,7 +375,7 @@ int main(int argc, char **argv)
     string testCaseName;
 
     // case-wise RPP API and measure time script for Unit and Performance test
-    printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
@@ -1473,7 +1473,7 @@ int main(int argc, char **argv)
             wallTime = endWallTime - startWallTime;
             if (missingFuncFlag == 1)
             {
-                printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str());
+                cout << "\nThe functionality " << func << " doesn't yet exist in RPP\n";
                 return -1;
             }
 
@@ -1487,16 +1487,18 @@ int main(int argc, char **argv)
         if (testType == 0)
         {
             cout <<"\n\n";
+            if(noOfIterations > 1)
+                cout <<"Execution Timings for Iteration "<< iterCount+1 <<":"<<endl;
             cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl;
-            cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl;
+            cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch";
 
             if (reductionTypeCase)
             {
                 if(srcDescPtr->c == 3)
-                    printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
+                    cout<<"\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ";
                 else if(srcDescPtr->c == 1)
                 {
-                    printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
+                    cout << "\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ";
                     reductionFuncResultArrLength = srcDescPtr->n;
                 }
 
@@ -1525,7 +1527,7 @@ int main(int argc, char **argv)
                     else
                         print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
                 }
-                printf("\n");
+                cout << "\n";
 
                 /*Compare the output of the function with golden outputs only if
                 1.QA Flag is set
diff --git a/utilities/test_suite/HOST/Tensor_misc_host.cpp b/utilities/test_suite/HOST/Tensor_misc_host.cpp
index 4cf644310..e64f5c3fa 100644
--- a/utilities/test_suite/HOST/Tensor_misc_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_misc_host.cpp
@@ -30,8 +30,8 @@ int main(int argc, char **argv)
     const int MIN_ARG_COUNT = 9;
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
-        printf("\nUsage: ./Tensor_misc_host <case number = 0:1> <test type 0/1> <toggle 0/1> <number of dimensions> <batch size> <num runs> <additional param> <dst path> <script path>\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
+        cout << "\nUsage: ./Tensor_misc_host <case number = 0:1> <test type 0/1> <toggle 0/1> <number of dimensions> <batch size> <num runs> <additional param> <dst path> <script path>\n";
         return -1;
     }
     Rpp32u testCase, testType, nDim, batchSize, numRuns, toggle, addi;
@@ -53,14 +53,14 @@ int main(int argc, char **argv)
 
     if (qaMode && batchSize != 3)
     {
-        std::cout<<"QA mode can only run with batchsize 3"<<std::endl;
+        cout<<"QA mode can only run with batchsize 3"<<std::endl;
         return -1;
     }
 
     string funcName = augmentationMiscMap[testCase];
     if (funcName.empty())
     {
-        printf("\ncase %d is not supported\n", testCase);
+        cout << "\ncase " << testCase << " is not supported\n";
         return -1;
     }
 
@@ -127,7 +127,7 @@ int main(int argc, char **argv)
     string testCaseName;
 
     // case-wise RPP API and measure time script for Unit and Performance test
-    printf("\nRunning %s %d times (each time with a batch size of %d) and computing mean statistics...", func.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << func << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for(int perfCount = 0; perfCount < numRuns; perfCount++)
     {
         switch(testCase)
diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
index 89fddf2c6..39f3979a9 100644
--- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
@@ -34,7 +34,7 @@ int main(int argc, char * argv[])
 
     if (argc < MIN_ARG_COUNT)
     {
-        printf("\nImproper Usage! Needs all arguments!\n");
+        cout << "\nImproper Usage! Needs all arguments!\n";
         exit(1);
     }
 
@@ -57,7 +57,7 @@ int main(int argc, char * argv[])
     }
     if(batchSize > MAX_BATCH_SIZE)
     {
-        std::cout << "\n Batchsize should be less than or equal to "<< MAX_BATCH_SIZE << " Aborting!";
+        cout << "\n Batchsize should be less than or equal to "<< MAX_BATCH_SIZE << " Aborting!";
         exit(0);
     }
 
@@ -65,7 +65,7 @@ int main(int argc, char * argv[])
     if (funcName.empty())
     {
         if (testType == 0)
-            printf("\ncase %d is not supported\n", testCase);
+            cout << "\ncase " << testCase << " is not supported\n";
 
         return -1;
     }
@@ -165,7 +165,7 @@ int main(int argc, char * argv[])
         outputU8 = static_cast<Rpp8u *>(calloc(iBufferSizeU8, 1));
     }
 
-    printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize);
+    cout << "\nRunning " << funcName << " " << numRuns << " times (each time with a batch size of " << batchSize << " images) and computing mean statistics...";
     for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
         vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
@@ -373,7 +373,7 @@ int main(int argc, char * argv[])
 
             if (missingFuncFlag == 1)
             {
-                printf("\nThe functionality doesn't yet exist in RPP\n");
+                cout << "\nThe functionality doesn't yet exist in RPP\n";
                 return -1;
             }
         }
@@ -381,7 +381,10 @@ int main(int argc, char * argv[])
         wallTime *= 1000;
         if(testType == 0)
         {
-            cout << "\n\nCPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl;
+            cout <<"\n\n";
+            if(noOfIterations > 1)
+                cout <<"Execution Timings for Iteration "<< iterCount+1 <<":"<<endl;
+            cout << "CPU Backend Wall Time: " << wallTime <<" ms per batch";
             if(DEBUG_MODE)
             {
                 std::ofstream refFile;
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index a1771716b..7e9305176 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -45,7 +45,7 @@ def get_log_file_list():
     ]
 
 def run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath):
-    print("./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
+    print("\n./Tensor_audio_host " + srcPath + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(numRuns) + " " + str(batchSize))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_audio_host", srcPath, str(case), str(testType), str(numRuns), str(batchSize), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
@@ -59,13 +59,10 @@ def run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, ba
         print("------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     if testType == 0:
         run_unit_test_cmd(srcPath, case, numRuns, testType, batchSize, outFilePath)
     elif testType == 1:
+        print("\n")
         run_performance_test_cmd(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
 
 # Parse and validate command-line arguments for the RPP test suite
@@ -181,6 +178,11 @@ def rpp_test_suite_parser_and_validator():
     print("QA tests can only run with a batch size of 3.")
     exit(0)
 
+noCaseSupported = all(case not in supportedCaseList for case in caseList)
+if noCaseSupported:
+    print("\ncase numbers %s are not supported" % caseList)
+    exit(0)
+
 for case in caseList:
     if "--input_path" not in sys.argv:
         if case == "3":
@@ -200,7 +202,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_audio_host -----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_audio_host")
 
 # Performance tests
 if (testType == 1):
diff --git a/utilities/test_suite/HOST/runMiscTests.py b/utilities/test_suite/HOST/runMiscTests.py
index 931838f71..d7425f287 100644
--- a/utilities/test_suite/HOST/runMiscTests.py
+++ b/utilities/test_suite/HOST/runMiscTests.py
@@ -47,7 +47,7 @@ def get_log_file_list():
     ]
 
 def run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg):
-    print("./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
+    print("\n./Tensor_misc_host " + str(case) + " " + str(testType) + " " + str(toggle) + " " + str(numDims) + " " + str(batchSize) + " " + str(numRuns) + " " + str(additionalArg))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_misc_host", str(case), str(testType), str(toggle), str(numDims), str(batchSize), str(numRuns), str(additionalArg), outFilePath, scriptPath], stdout=subprocess.PIPE)    # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
@@ -60,13 +60,10 @@ def run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, to
         read_from_subprocess_and_write_to_log(process, logFile)
 
 def run_test(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg = ""):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     if testType == 0:
         run_unit_test_cmd(numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
     elif testType == 1:
+        print("\n")
         run_performance_test_cmd(loggingFolder, numDims, case, numRuns, testType, toggle, batchSize, outFilePath, additionalArg)
 
 # Parse and validate command-line arguments for the RPP test suite
@@ -167,6 +164,10 @@ def rpp_test_suite_parser_and_validator():
 subprocess.call(["make", "-j16"], cwd=".")    # nosec
 
 supportedCaseList = ['0', '1', '2']
+noCaseSupported = all(case not in supportedCaseList for case in caseList)
+if noCaseSupported:
+    print("\ncase numbers %s are not supported" % caseList)
+    exit(0)
 for case in caseList:
     if case not in supportedCaseList:
         continue
@@ -191,7 +192,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_misc_host ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_misc_host")
 
 # Performance tests
 if (testType == 1):
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 0ca2127ed..03d28a114 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -51,18 +51,12 @@ def get_log_file_list(preserveOutput):
     ]
 
 def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     bitDepths = range(7)
     outputFormatToggles = [0, 1]
     if qaMode:
         bitDepths = [0]
         outputFormatToggles = [0]
     for bitDepth in bitDepths:
-        print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-
         for outputFormatToggle in outputFormatToggles:
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
@@ -71,7 +65,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
             if case == "8":
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
-                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
+                    print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(noiseType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(noiseType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
@@ -81,12 +75,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo
                 if case =='79':
                     interpolationRange = 2
                 for interpolationType in range(interpolationRange):
-                    print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
+                    print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(interpolationType) + " 0")
                     result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)    # nosec
                     stdout_data, stderr_data = result.communicate()
                     print(stdout_data.decode())
             else:
-                print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
+                print("\n./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0")
                 result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE)    # nosec
                 stdout_data, stderr_data = result.communicate()
                 print(stdout_data.decode())
@@ -105,16 +99,11 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, d
         read_from_subprocess_and_write_to_log(process, logFile)
 
 def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
+    print("\n")
     bitDepths = range(7)
     if qaMode:
         bitDepths = [0]
     for bitDepth in bitDepths:
-        print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-
         for outputFormatToggle in range(2):
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
@@ -123,13 +112,15 @@ def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPa
                 # Run all variants of noise type functions with additional argument of noiseType = gausssianNoise / shotNoise / saltandpepperNoise
                 for noiseType in range(3):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, noiseType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
+                    print("")
             elif case == "21" or case == "23" or case == "24" or case == "79":
                 # Run all variants of interpolation functions with additional argument of interpolationType = bicubic / bilinear / gaussian / nearestneigbor / lanczos / triangular
                 for interpolationType in range(6):
                     run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
+                    print("")
             else:
                 run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, "0", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
-            print("------------------------------------------------------------------------------------------")
+            print("------------------------------------------------------------------------------------------\n")
 
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
@@ -263,12 +254,11 @@ def rpp_test_suite_parser_and_validator():
 # List of cases supported
 supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92']
 
-print("\n\n\n\n\n")
-print("##########################################################################################")
-print("Running all layout Inputs...")
-print("##########################################################################################")
-
 if testType == 0:
+    noCaseSupported = all(case not in supportedCaseList for case in caseList)
+    if noCaseSupported:
+        print("\ncase numbers %s are not supported" % caseList)
+        exit(0)
     for case in caseList:
         if case not in supportedCaseList:
             continue
@@ -294,6 +284,10 @@ def rpp_test_suite_parser_and_validator():
     if qaMode == 0:
         create_layout_directories(dstPath, layoutDict)
 else:
+    noCaseSupported = all(case not in supportedCaseList for case in caseList)
+    if noCaseSupported:
+        print("case numbers %s are not supported" % caseList)
+        exit(0)
     for case in caseList:
         if case not in supportedCaseList:
             continue
@@ -319,7 +313,7 @@ def rpp_test_suite_parser_and_validator():
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
         print("---------------------------------- Results of QA Test - Tensor_host ----------------------------------\n")
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_host")
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 # unit tests and QA mode disabled
diff --git a/utilities/test_suite/HOST/runVoxelTests.py b/utilities/test_suite/HOST/runVoxelTests.py
index f44c05f78..768f6bdb0 100644
--- a/utilities/test_suite/HOST/runVoxelTests.py
+++ b/utilities/test_suite/HOST/runVoxelTests.py
@@ -58,11 +58,11 @@ def func_group_finder(case_number):
         return "miscellaneous"
 
 def run_unit_test_cmd(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
-    print("./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
+    print("\n./Tensor_voxel_host " + headerPath + " " + dataPath + " " + dstPathTemp + " " + str(layout) + " " + str(case) + " " + str(numRuns) + " " + str(testType) + " " + str(qaMode) + " " + str(batchSize) + " " + str(bitDepth))
     result = subprocess.Popen([buildFolderPath + "/build/Tensor_voxel_host", headerPath, dataPath, dstPathTemp, str(layout), str(case), str(numRuns), str(testType), str(qaMode), str(batchSize), str(bitDepth), scriptPath], stdout=subprocess.PIPE) # nosec
     stdout_data, stderr_data = result.communicate()
     print(stdout_data.decode())
-    print("------------------------------------------------------------------------------------------")
+    print("\n------------------------------------------------------------------------------------------")
 
 def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     with open(loggingFolder + "/Tensor_voxel_host_" + logFileLayout + "_raw_performance_log.txt", "a") as logFile:
@@ -72,16 +72,17 @@ def run_performance_test_cmd(loggingFolder, logFileLayout, headerPath, dataPath,
             output = process.stdout.readline()
             if not output and process.poll() is not None:
                 break
-            output = output.decode().strip()  # Decode bytes to string and strip extra whitespace
-            print(output)
-            logFile.write(output)
+            output = output.decode('utf-8')
+            if output:
+                print(output, end='')
+                logFile.write(output)
             if "Running" in output or "max,min,avg wall times" in output:
                 cleanedOutput = ''.join(char for char in output if 32 <= ord(char) <= 126)  # Remove control characters
                 cleanedOutput = cleanedOutput.strip()  # Remove leading/trailing whitespace
                 logFile.write(cleanedOutput + '\n')
                 if "max,min,avg wall times" in output:
                     logFile.write("\n")
-        print("------------------------------------------------------------------------------------------")
+        print("\n------------------------------------------------------------------------------------------")
 
 def run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     if testType == 0:
@@ -212,19 +213,14 @@ def rpp_test_suite_parser_and_validator():
 # List of cases supported
 supportedCaseList = ['0', '1', '2', '3', '4', '5', '6']
 
-print("\n\n\n\n\n")
-print("##########################################################################################")
-print("Running all layout Inputs...")
-print("##########################################################################################")
-
 bitDepths = [0, 2]
+noCaseSupported = all(case not in supportedCaseList for case in caseList)
+if noCaseSupported:
+    print("\ncase numbers %s are not supported" % caseList)
+    exit(0)
 for case in caseList:
     if case not in supportedCaseList:
         continue
-    print("\n\n\n\n")
-    print("--------------------------------")
-    print("Running a New Functionality...")
-    print("--------------------------------")
     for layout in range(3):
         dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
         if testType == 0 and qaMode == 0:
@@ -235,8 +231,6 @@ def rpp_test_suite_parser_and_validator():
         if testType == 0 and qaMode:
             bitDepths = [2]
         for bitDepth in bitDepths:
-            print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
-            print("\n\n\n\n")
             run_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 
 # print the results of qa tests
@@ -246,7 +240,7 @@ def rpp_test_suite_parser_and_validator():
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, "Tensor_voxel_host")
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 if (testType == 0 and qaMode == 0):   # Unit tests
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
index f97dfeafa..f895446fd 100644
--- a/utilities/test_suite/common.py
+++ b/utilities/test_suite/common.py
@@ -29,18 +29,6 @@
 import shutil
 import pandas as pd
 
-try:
-    from errno import FileExistsError
-except ImportError:
-    # Python 2 compatibility
-    FileExistsError = OSError
-
-try:
-    from errno import FileExistsError
-except ImportError:
-    # Python 2 compatibility
-    FileExistsError = OSError
-
 try:
     from errno import FileExistsError
 except ImportError:
@@ -249,7 +237,7 @@ def generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR):
         print(dfPrint_noIndices)
 
 # Read the data from QA logs, process the data and print the results as a summary
-def print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList):
+def print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList, fileName):
     f = open(qaFilePath, 'r+')
     numLines = 0
     numPassed = 0
@@ -267,15 +255,15 @@ def print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList):
     resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
     resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
     f.write(resultsInfo)
-    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+    print("\n---------------------------------- Summary of QA Test - " + fileName + " ----------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
 
 # Read the data from performance logs, process the data and print the results as a summary
 def print_performance_tests_summary(logFile, functionalityGroupList, numRuns):
     try:
         f = open(logFile, "r")
-        print("\n\n\nOpened log file -> "+ logFile)
+        print("\nOpened log file -> " + logFile)
     except IOError:
-        print("Skipping file -> "+ logFile)
+        print("Skipping file -> " + logFile)
         return
 
     stats = []
@@ -299,7 +287,7 @@ def print_performance_tests_summary(logFile, functionalityGroupList, numRuns):
 
         if "max,min,avg wall times in ms/batch" in line:
             splitWordStart = "Running "
-            splitWordEnd = " " +str(numRuns)
+            splitWordEnd = " " + str(numRuns)
             prevLine = prevLine.partition(splitWordStart)[2].partition(splitWordEnd)[0]
             if prevLine not in functions:
                 functions.append(prevLine)
@@ -316,15 +304,15 @@ def print_performance_tests_summary(logFile, functionalityGroupList, numRuns):
             prevLine = line
 
     # Print log lengths
-    print("Functionalities - "+ str(funcCount))
+    print("Functionalities - " + str(funcCount))
 
     # Print summary of log
-    print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
+    headerFormat = "{:<70} {:<15} {:<15} {:<15} {:<15}"
+    rowFormat = "{:<70} {:<15} {:<15} {:<15} {:<15}"
+    print("\n" + headerFormat.format("Functionality", "Frames Count", "max(ms/batch)", "min(ms/batch)", "avg(ms/batch)") + "\n")
     if len(functions) != 0:
-        maxCharLength = len(max(functions, key = len))
-        functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
         for i, func in enumerate(functions):
-            print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i]))
+            print(rowFormat.format(func, str(frames[i]), str(maxVals[i]), str(minVals[i]), str(avgVals[i])))
     else:
         print("No variants under this category")
 
@@ -338,8 +326,9 @@ def read_from_subprocess_and_write_to_log(process, logFile):
         if not output and process.poll() is not None:
             break
         output = output.decode().strip()  # Decode bytes to string and strip extra whitespace
-        print(output)
-        logFile.write(output + '\n')
+        if output:
+            print(output)
+            logFile.write(output + '\n')
 
 # Returns the layout name based on layout value
 def get_layout_name(layout):
@@ -357,13 +346,13 @@ def print_case_list(imageAugmentationMap, backendType, parser):
         print("\n" + "="*30)
         print("Functionality Reference List")
         print("="*30 + "\n")
-        header_format = "{:<12} {:<15}"
-        print(header_format.format("CaseNumber", "Functionality"))
+        headerFormat = "{:<12} {:<15}"
+        print(headerFormat.format("CaseNumber", "Functionality"))
         print("-" * 27)
-        row_format = "{:<12} {:<15}"
+        rowFormat = "{:<12} {:<15}"
         for key, value_list in imageAugmentationMap.items():
             if backendType in value_list:
-                print(row_format.format(key, value_list[0]))
+                print(rowFormat.format(key, value_list[0]))
 
         sys.exit(0)
 
diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h
index 7dd5f88e6..84bb716d0 100644
--- a/utilities/test_suite/rpp_test_suite_audio.h
+++ b/utilities/test_suite/rpp_test_suite_audio.h
@@ -28,6 +28,7 @@ SOFTWARE.
 
 // Include this header file to use functions from libsndfile
 #include <sndfile.h>
+using namespace std;
 
 #define MEL_FILTER_BANK_MAX_HEIGHT 257 // Maximum height for mel filter bank set to 257 to ensure compatibility with test configuration
 
@@ -363,41 +364,6 @@ void verify_non_silent_region_detection(int *detectedIndex, int *detectionLength
     }
 }
 
-inline Rpp32f sinc(Rpp32f x)
-{
-    x *= M_PI;
-    return (std::abs(x) < 1e-5f) ? (1.0f - x * x * (1.0f / 6)) : std::sin(x) / x;
-}
-
-inline Rpp64f hann(Rpp64f x)
-{
-    return 0.5 * (1 + std::cos(x * M_PI));
-}
-
-// initialization function used for filling the values in Resampling window (RpptResamplingWindow)
-// using the coeffs and lobes value this function generates a LUT (look up table) which is further used in Resample audio augmentation
-inline void windowed_sinc(RpptResamplingWindow &window, Rpp32s coeffs, Rpp32s lobes)
-{
-    Rpp32f scale = 2.0f * lobes / (coeffs - 1);
-    Rpp32f scale_envelope = 2.0f / coeffs;
-    window.coeffs = coeffs;
-    window.lobes = lobes;
-    window.lookup.clear();
-    window.lookup.resize(coeffs + 5);
-    window.lookupSize = window.lookup.size();
-    Rpp32s center = (coeffs - 1) * 0.5f;
-    for (int i = 0; i < coeffs; i++) {
-        Rpp32f x = (i - center) * scale;
-        Rpp32f y = (i - center) * scale_envelope;
-        Rpp32f w = sinc(x) * hann(y);
-        window.lookup[i + 1] = w;
-    }
-    window.center = center + 1;
-    window.scale = 1 / scale;
-    window.pCenter = _mm_set1_ps(window.center);
-    window.pScale = _mm_set1_ps(window.scale);
-}
-
 // Mel filter bank initializer for unit and performance testing
 void inline init_mel_filter_bank(Rpp32f **inputf32, Rpp32f **outputf32, RpptDescPtr srcDescPtr, RpptDescPtr dstDescPtr, RpptImagePatch *dstDims, Rpp32u offsetInBytes, Rpp32s numFilter, int batchSize,  Rpp32s *srcDimsTensor, string scriptPath, int testType)
 {
diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h
index ef8d6c117..85a9f6aef 100644
--- a/utilities/test_suite/rpp_test_suite_common.h
+++ b/utilities/test_suite/rpp_test_suite_common.h
@@ -1169,16 +1169,16 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R
     else
         compare_outputs_pkd_and_pln1(output, binaryContent + pln1RefStride, dstDescPtr, dstImgSizes, refOutputHeight, refOutputWidth, refOutputSize, fileMatch);
 
-    std::cout << std::endl << "Results for " << func << " :" << std::endl;
+    std::cout << std::endl << "\nResults for " << func << " :" << std::endl;
     std::string status = func + ": ";
     if(fileMatch == dstDescPtr->n)
     {
-        std::cout << "PASSED!" << std::endl;
+        std::cout << "PASSED!";
         status += "PASSED";
     }
     else
     {
-        std::cout << "FAILED! " << fileMatch << "/" << dstDescPtr->n << " outputs are matching with reference outputs" << std::endl;
+        std::cout << "FAILED! " << fileMatch << "/" << dstDescPtr->n << " outputs are matching with reference outputs";
         status += "FAILED";
     }
 
diff --git a/utilities/test_suite/rpp_test_suite_voxel.h b/utilities/test_suite/rpp_test_suite_voxel.h
index 77de34f3b..817e54e53 100644
--- a/utilities/test_suite/rpp_test_suite_voxel.h
+++ b/utilities/test_suite/rpp_test_suite_voxel.h
@@ -716,7 +716,7 @@ inline void compare_output(Rpp32f* output, Rpp64u oBufferSize, string func, int
     FILE *fp;
     fp = fopen(refFile.c_str(), "rb");
     if (fp == NULL)
-        printf("Error opening file");
+        std::cout << "Error opening file";
 
     fseek(fp, 0, SEEK_END);
     long fsize = ftell(fp);