From e356d50093ce2f2b34637258dfac64feb04e086b Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Thu, 30 Mar 2023 23:54:17 -0700 Subject: [PATCH 01/15] added GetNumThreads() function to handle in RPP --- src/include/common/rpp/handle.hpp | 2 ++ src/modules/handlehost.cpp | 6 ++++++ src/modules/hip/handlehip.cpp | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp index 1940b494f..e2777ede4 100644 --- a/src/include/common/rpp/handle.hpp +++ b/src/include/common/rpp/handle.hpp @@ -72,6 +72,7 @@ struct Handle : rppHandle InitHandle* GetInitHandle() const; size_t GetBatchSize() const; + Rpp32u GetNumThreads() const; void SetBatchSize(size_t bSize) const; void rpp_destroy_object_host(); std::unique_ptr impl; @@ -88,6 +89,7 @@ struct Handle : rppHandle ~Handle(); InitHandle* GetInitHandle() const; size_t GetBatchSize() const; + Rpp32u GetNumThreads() const; void SetBatchSize(size_t bSize) const; void rpp_destroy_object_host(); diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index 5bb0fb5e8..9c563b9c4 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -40,6 +40,7 @@ namespace rpp { struct HandleImpl { size_t nBatchSize = 1; + Rpp32u numThreads = 1; InitHandle* initHandle = nullptr; void PreInitializeBufferCPU() @@ -81,6 +82,11 @@ size_t Handle::GetBatchSize() const return this->impl->nBatchSize; } +Rpp32u Handle::GetNumThreads() const +{ + return this->impl->numThreads; +} + void Handle::SetBatchSize(size_t bSize) const { this->impl->nBatchSize = bSize; diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index 795ebff54..61a429de5 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -129,6 +129,7 @@ struct HandleImpl bool enable_profiling = false; float profiling_result = 0.0; size_t nBatchSize = 1; + Rpp32u numThreads = 1; InitHandle* initHandle = nullptr; HandleImpl() : ctx(get_ctx()) {} @@ -385,6 +386,11 @@ size_t Handle::GetBatchSize() const return this->impl->nBatchSize; } +Rpp32u Handle::GetNumThreads() const +{ + return this->impl->numThreads; +} + void Handle::SetBatchSize(size_t bSize) const { this->impl->nBatchSize = bSize; From a42fb8b207af0697abef68e5dcc3537c55ffcf4f Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Thu, 30 Mar 2023 23:54:47 -0700 Subject: [PATCH 02/15] added numthreads changes for all tensor host augmentations and batchpd crop and cmn augmentations made changes to set numthreads as batchsize if numthreads value is 0 --- src/modules/cpu/host_fused_functions.hpp | 32 ++--- src/modules/cpu/kernel/blend.hpp | 20 ++-- src/modules/cpu/kernel/brightness.hpp | 20 ++-- src/modules/cpu/kernel/color_cast.hpp | 20 ++-- src/modules/cpu/kernel/color_jitter.hpp | 20 ++-- src/modules/cpu/kernel/color_to_greyscale.hpp | 20 ++-- src/modules/cpu/kernel/color_twist.hpp | 20 ++-- src/modules/cpu/kernel/contrast.hpp | 20 ++-- src/modules/cpu/kernel/copy.hpp | 36 +++--- src/modules/cpu/kernel/crop.hpp | 20 ++-- .../cpu/kernel/crop_mirror_normalize.hpp | 30 +++-- src/modules/cpu/kernel/exposure.hpp | 20 ++-- src/modules/cpu/kernel/flip.hpp | 20 ++-- src/modules/cpu/kernel/gamma_correction.hpp | 20 ++-- src/modules/cpu/kernel/gridmask.hpp | 20 ++-- src/modules/cpu/kernel/noise_gaussian.hpp | 20 ++-- .../cpu/kernel/noise_salt_and_pepper.hpp | 20 ++-- src/modules/cpu/kernel/noise_shot.hpp | 20 ++-- src/modules/cpu/kernel/resize.hpp | 45 ++++--- src/modules/cpu/kernel/resize_crop_mirror.hpp | 20 ++-- .../cpu/kernel/resize_mirror_normalize.hpp | 30 +++-- src/modules/cpu/kernel/spatter.hpp | 20 ++-- src/modules/cpu/kernel/swap_channels.hpp | 20 ++-- src/modules/handlehost.cpp | 4 +- src/modules/hip/handlehip.cpp | 4 +- src/modules/rppi_fused_functions.cpp | 42 ++++--- .../rppt_tensor_color_augmentations.cpp | 96 ++++++++++----- .../rppt_tensor_data_exchange_operations.cpp | 36 ++++-- .../rppt_tensor_effects_augmentations.cpp | 60 ++++++---- .../rppt_tensor_geometric_augmentations.cpp | 112 ++++++++++++------ 30 files changed, 551 insertions(+), 336 deletions(-) diff --git a/src/modules/cpu/host_fused_functions.hpp b/src/modules/cpu/host_fused_functions.hpp index 54357d967..3a3bd3b08 100644 --- a/src/modules/cpu/host_fused_functions.hpp +++ b/src/modules/cpu/host_fused_functions.hpp @@ -2919,12 +2919,12 @@ RppStatus crop_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, R Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3553,12 +3553,12 @@ RppStatus crop_mirror_normalize_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3833,12 +3833,12 @@ RppStatus crop_mirror_normalize_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4159,12 +4159,12 @@ RppStatus crop_mirror_normalize_u8_f_host_batch(T* srcPtr, RppiSize *batch_srcSi Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4564,7 +4564,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp32u srcBufferSize = nbatchSize * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel; Rpp32u dstBufferSize = nbatchSize * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * channel; @@ -4586,7 +4586,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ crop_mirror_normalize_f32_host_batch(srcPtrf32, batch_srcSize, batch_srcSizeMax, dstPtrf32, batch_dstSize, batch_dstSizeMax, batch_crop_pos_x, batch_crop_pos_y, batch_mean, batch_stdDev, batch_mirrorFlag, outputFormatToggle, - nbatchSize, chnFormat, channel); + nbatchSize, chnFormat, channel, numThreads); Rpp8s *dstPtrTemp; Rpp32f *dstPtrf32Temp; @@ -4612,12 +4612,12 @@ template RppStatus crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4770,12 +4770,12 @@ template RppStatus crop_host_u_f_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4954,12 +4954,12 @@ template RppStatus crop_host_u_i_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/kernel/blend.hpp b/src/modules/cpu/kernel/blend.hpp index 156ad9763..a2111bc11 100644 --- a/src/modules/cpu/kernel/blend.hpp +++ b/src/modules/cpu/kernel/blend.hpp @@ -32,12 +32,13 @@ RppStatus blend_u8_u8_host_tensor(Rpp8u *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -276,12 +277,13 @@ RppStatus blend_f32_f32_host_tensor(Rpp32f *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -499,12 +501,13 @@ RppStatus blend_f16_f16_host_tensor(Rpp16f *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -768,12 +771,13 @@ RppStatus blend_i8_i8_host_tensor(Rpp8s *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/brightness.hpp b/src/modules/cpu/kernel/brightness.hpp index 44c7ff07e..cc0bf06e0 100644 --- a/src/modules/cpu/kernel/brightness.hpp +++ b/src/modules/cpu/kernel/brightness.hpp @@ -32,12 +32,13 @@ RppStatus brightness_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -244,12 +245,13 @@ RppStatus brightness_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -462,12 +464,13 @@ RppStatus brightness_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -716,12 +719,13 @@ RppStatus brightness_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_cast.hpp b/src/modules/cpu/kernel/color_cast.hpp index 601720689..72c2b75dc 100644 --- a/src/modules/cpu/kernel/color_cast.hpp +++ b/src/modules/cpu/kernel/color_cast.hpp @@ -32,12 +32,13 @@ RppStatus color_cast_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -285,12 +286,13 @@ RppStatus color_cast_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -538,12 +540,13 @@ RppStatus color_cast_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -847,12 +850,13 @@ RppStatus color_cast_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_jitter.hpp b/src/modules/cpu/kernel/color_jitter.hpp index 8dd432961..928a031c4 100644 --- a/src/modules/cpu/kernel/color_jitter.hpp +++ b/src/modules/cpu/kernel/color_jitter.hpp @@ -34,12 +34,13 @@ RppStatus color_jitter_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -292,12 +293,13 @@ RppStatus color_jitter_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -550,12 +552,13 @@ RppStatus color_jitter_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -864,12 +867,13 @@ RppStatus color_jitter_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_to_greyscale.hpp b/src/modules/cpu/kernel/color_to_greyscale.hpp index 3f5b9f445..9b0e6c21d 100644 --- a/src/modules/cpu/kernel/color_to_greyscale.hpp +++ b/src/modules/cpu/kernel/color_to_greyscale.hpp @@ -29,10 +29,11 @@ RppStatus color_to_greyscale_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -160,10 +161,11 @@ RppStatus color_to_greyscale_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -295,10 +297,11 @@ RppStatus color_to_greyscale_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -445,10 +448,11 @@ RppStatus color_to_greyscale_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/cpu/kernel/color_twist.hpp b/src/modules/cpu/kernel/color_twist.hpp index 409e86e36..423cd2c9d 100644 --- a/src/modules/cpu/kernel/color_twist.hpp +++ b/src/modules/cpu/kernel/color_twist.hpp @@ -34,12 +34,13 @@ RppStatus color_twist_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -357,12 +358,13 @@ RppStatus color_twist_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -660,12 +662,13 @@ RppStatus color_twist_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1003,12 +1006,13 @@ RppStatus color_twist_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/contrast.hpp b/src/modules/cpu/kernel/contrast.hpp index 5855584bc..711065c65 100644 --- a/src/modules/cpu/kernel/contrast.hpp +++ b/src/modules/cpu/kernel/contrast.hpp @@ -32,12 +32,13 @@ RppStatus contrast_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -217,12 +218,13 @@ RppStatus contrast_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -403,12 +405,13 @@ RppStatus contrast_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -621,12 +624,13 @@ RppStatus contrast_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/copy.hpp b/src/modules/cpu/kernel/copy.hpp index 4b94cf168..678285c8d 100644 --- a/src/modules/cpu/kernel/copy.hpp +++ b/src/modules/cpu/kernel/copy.hpp @@ -28,13 +28,14 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -48,7 +49,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -103,7 +104,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -161,13 +162,14 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr srcDescPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -181,7 +183,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -237,7 +239,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -296,13 +298,14 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr srcDescPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -316,7 +319,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -388,7 +391,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -463,13 +466,14 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr srcDescPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; @@ -483,7 +487,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; @@ -538,7 +542,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/cpu/kernel/crop.hpp b/src/modules/cpu/kernel/crop.hpp index bed2b1131..8d95b7de7 100644 --- a/src/modules/cpu/kernel/crop.hpp +++ b/src/modules/cpu/kernel/crop.hpp @@ -30,12 +30,13 @@ RppStatus crop_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -179,12 +180,13 @@ RppStatus crop_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -330,12 +332,13 @@ RppStatus crop_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -513,12 +516,13 @@ RppStatus crop_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/crop_mirror_normalize.hpp b/src/modules/cpu/kernel/crop_mirror_normalize.hpp index f0d70579b..fe72bd046 100644 --- a/src/modules/cpu/kernel/crop_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/crop_mirror_normalize.hpp @@ -33,12 +33,13 @@ RppStatus crop_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -451,12 +452,13 @@ RppStatus crop_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -869,12 +871,13 @@ RppStatus crop_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1334,12 +1337,13 @@ RppStatus crop_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1752,12 +1756,13 @@ RppStatus crop_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -2198,12 +2203,13 @@ RppStatus crop_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/exposure.hpp b/src/modules/cpu/kernel/exposure.hpp index 40dd072a9..b2d7e8554 100644 --- a/src/modules/cpu/kernel/exposure.hpp +++ b/src/modules/cpu/kernel/exposure.hpp @@ -31,12 +31,13 @@ RppStatus exposure_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -215,12 +216,13 @@ RppStatus exposure_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -399,12 +401,13 @@ RppStatus exposure_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -617,12 +620,13 @@ RppStatus exposure_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/flip.hpp b/src/modules/cpu/kernel/flip.hpp index 7463fc84b..aee3e7aee 100644 --- a/src/modules/cpu/kernel/flip.hpp +++ b/src/modules/cpu/kernel/flip.hpp @@ -32,12 +32,13 @@ RppStatus flip_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -308,12 +309,13 @@ RppStatus flip_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -585,12 +587,13 @@ RppStatus flip_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -902,12 +905,13 @@ RppStatus flip_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/gamma_correction.hpp b/src/modules/cpu/kernel/gamma_correction.hpp index b6993332b..7a112405a 100644 --- a/src/modules/cpu/kernel/gamma_correction.hpp +++ b/src/modules/cpu/kernel/gamma_correction.hpp @@ -31,12 +31,13 @@ RppStatus gamma_correction_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -179,12 +180,13 @@ RppStatus gamma_correction_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -327,12 +329,13 @@ RppStatus gamma_correction_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -475,12 +478,13 @@ RppStatus gamma_correction_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/gridmask.hpp b/src/modules/cpu/kernel/gridmask.hpp index 745b38243..4e925ef67 100644 --- a/src/modules/cpu/kernel/gridmask.hpp +++ b/src/modules/cpu/kernel/gridmask.hpp @@ -34,12 +34,13 @@ RppStatus gridmask_u8_u8_host_tensor(Rpp8u *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -438,12 +439,13 @@ RppStatus gridmask_f32_f32_host_tensor(Rpp32f *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -824,12 +826,13 @@ RppStatus gridmask_f16_f16_host_tensor(Rpp16f *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1263,12 +1266,13 @@ RppStatus gridmask_i8_i8_host_tensor(Rpp8s *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_gaussian.hpp b/src/modules/cpu/kernel/noise_gaussian.hpp index d7cfe4d56..9bb9147bb 100644 --- a/src/modules/cpu/kernel/noise_gaussian.hpp +++ b/src/modules/cpu/kernel/noise_gaussian.hpp @@ -45,12 +45,13 @@ RppStatus gaussian_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -376,12 +377,13 @@ RppStatus gaussian_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -696,12 +698,13 @@ RppStatus gaussian_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1061,12 +1064,13 @@ RppStatus gaussian_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp index 6c57125e2..7cdb4775c 100644 --- a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp +++ b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp @@ -51,12 +51,13 @@ RppStatus salt_and_pepper_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -422,12 +423,13 @@ RppStatus salt_and_pepper_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -800,12 +802,13 @@ RppStatus salt_and_pepper_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1223,12 +1226,13 @@ RppStatus salt_and_pepper_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_shot.hpp b/src/modules/cpu/kernel/noise_shot.hpp index e87344c4e..b630759f8 100644 --- a/src/modules/cpu/kernel/noise_shot.hpp +++ b/src/modules/cpu/kernel/noise_shot.hpp @@ -44,12 +44,13 @@ RppStatus shot_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -423,12 +424,13 @@ RppStatus shot_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -811,12 +813,13 @@ RppStatus shot_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1244,12 +1247,13 @@ RppStatus shot_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize.hpp b/src/modules/cpu/kernel/resize.hpp index 35a2fe72a..9b66d63cb 100644 --- a/src/modules/cpu/kernel/resize.hpp +++ b/src/modules/cpu/kernel/resize.hpp @@ -34,12 +34,13 @@ RppStatus resize_nn_u8_u8_host_tensor(Rpp8u *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -253,12 +254,13 @@ RppStatus resize_nn_f32_f32_host_tensor(Rpp32f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -464,12 +466,13 @@ RppStatus resize_nn_i8_i8_host_tensor(Rpp8s *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -683,12 +686,13 @@ RppStatus resize_nn_f16_f16_host_tensor(Rpp16f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -791,12 +795,13 @@ RppStatus resize_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1030,12 +1035,13 @@ RppStatus resize_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1271,12 +1277,13 @@ RppStatus resize_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1513,12 +1520,13 @@ RppStatus resize_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1757,12 +1765,13 @@ RppStatus resize_separable_host_tensor(T *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - RpptInterpolationType interpolationType) + RpptInterpolationType interpolationType, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize_crop_mirror.hpp b/src/modules/cpu/kernel/resize_crop_mirror.hpp index a02c1129b..07a357533 100644 --- a/src/modules/cpu/kernel/resize_crop_mirror.hpp +++ b/src/modules/cpu/kernel/resize_crop_mirror.hpp @@ -32,12 +32,13 @@ RppStatus resize_crop_mirror_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -281,12 +282,13 @@ RppStatus resize_crop_mirror_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -535,12 +537,13 @@ RppStatus resize_crop_mirror_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -789,12 +792,13 @@ RppStatus resize_crop_mirror_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize_mirror_normalize.hpp b/src/modules/cpu/kernel/resize_mirror_normalize.hpp index 783648be3..613435013 100644 --- a/src/modules/cpu/kernel/resize_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/resize_mirror_normalize.hpp @@ -34,12 +34,13 @@ RppStatus resize_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -314,12 +315,13 @@ RppStatus resize_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -598,12 +600,13 @@ RppStatus resize_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -882,12 +885,13 @@ RppStatus resize_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1167,11 +1171,12 @@ RppStatus resize_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1448,11 +1453,12 @@ RppStatus resize_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/spatter.hpp b/src/modules/cpu/kernel/spatter.hpp index c78c1d386..621d0a18d 100644 --- a/src/modules/cpu/kernel/spatter.hpp +++ b/src/modules/cpu/kernel/spatter.hpp @@ -33,12 +33,13 @@ RppStatus spatter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -385,12 +386,13 @@ RppStatus spatter_f32_f32_host_tensor(Rpp32f *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -745,12 +747,13 @@ RppStatus spatter_f16_f16_host_tensor(Rpp16f *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1136,12 +1139,13 @@ RppStatus spatter_i8_i8_host_tensor(Rpp8s *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/swap_channels.hpp b/src/modules/cpu/kernel/swap_channels.hpp index 68c153028..4c62361d7 100644 --- a/src/modules/cpu/kernel/swap_channels.hpp +++ b/src/modules/cpu/kernel/swap_channels.hpp @@ -28,10 +28,11 @@ RppStatus swap_channels_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -224,10 +225,11 @@ RppStatus swap_channels_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr srcDescPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -420,10 +422,11 @@ RppStatus swap_channels_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr srcDescPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -656,10 +659,11 @@ RppStatus swap_channels_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr srcDescPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index 9c563b9c4..bf065496d 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -40,12 +40,14 @@ namespace rpp { struct HandleImpl { size_t nBatchSize = 1; - Rpp32u numThreads = 1; + Rpp32u numThreads = 0; InitHandle* initHandle = nullptr; void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); + if(this->numThreads == 0) + this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.maxSrcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index 61a429de5..65d78fef9 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -129,7 +129,7 @@ struct HandleImpl bool enable_profiling = false; float profiling_result = 0.0; size_t nBatchSize = 1; - Rpp32u numThreads = 1; + Rpp32u numThreads = 0; InitHandle* initHandle = nullptr; HandleImpl() : ctx(get_ctx()) {} @@ -172,6 +172,8 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); + if(this->numThreads == 0) + this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.srcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); diff --git a/src/modules/rppi_fused_functions.cpp b/src/modules/rppi_fused_functions.cpp index a7b6b9170..e22c94cb8 100644 --- a/src/modules/rppi_fused_functions.cpp +++ b/src/modules/rppi_fused_functions.cpp @@ -203,7 +203,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -218,7 +219,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -233,7 +235,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -248,7 +251,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -264,7 +268,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -279,7 +284,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::I8) { @@ -294,7 +300,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -448,7 +455,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -466,7 +474,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -484,7 +493,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -502,7 +512,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -521,7 +532,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -539,7 +551,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::I8) { @@ -557,7 +570,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_color_augmentations.cpp b/src/modules/rppt_tensor_color_augmentations.cpp index a9be73918..767e39dc1 100644 --- a/src/modules/rppt_tensor_color_augmentations.cpp +++ b/src/modules/rppt_tensor_color_augmentations.cpp @@ -54,7 +54,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -66,7 +67,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -78,7 +80,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -90,7 +93,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -118,7 +122,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -129,7 +134,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -140,7 +146,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -151,7 +158,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -181,7 +189,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -193,7 +202,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -205,7 +215,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -217,7 +228,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -251,7 +263,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -265,7 +278,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -279,7 +293,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -293,7 +308,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -327,7 +343,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -341,7 +358,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -355,7 +373,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -369,7 +388,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -404,7 +424,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -416,7 +437,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -428,7 +450,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -440,7 +463,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -468,7 +492,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -479,7 +504,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -490,7 +516,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -501,7 +528,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -531,7 +559,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -543,7 +572,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -555,7 +585,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -567,7 +598,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_data_exchange_operations.cpp b/src/modules/rppt_tensor_data_exchange_operations.cpp index b592baa30..274305289 100644 --- a/src/modules/rppt_tensor_data_exchange_operations.cpp +++ b/src/modules/rppt_tensor_data_exchange_operations.cpp @@ -46,7 +46,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -54,7 +55,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -62,7 +64,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -70,7 +73,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -92,7 +96,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -100,7 +105,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -108,7 +114,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -116,7 +123,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -161,7 +169,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -170,7 +179,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -179,7 +189,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -188,7 +199,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp index fac4ce1cf..31ca1efe1 100644 --- a/src/modules/rppt_tensor_effects_augmentations.cpp +++ b/src/modules/rppt_tensor_effects_augmentations.cpp @@ -59,7 +59,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -73,7 +74,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -87,7 +89,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -101,7 +104,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -141,7 +145,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -152,7 +157,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -163,7 +169,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -174,7 +181,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -216,7 +224,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -231,7 +240,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -246,7 +256,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -261,7 +272,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -297,7 +309,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -309,7 +322,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -321,7 +335,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -333,7 +348,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -367,7 +383,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -380,7 +397,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -393,7 +411,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -406,7 +425,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index 418b8de57..afad9170f 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -54,7 +54,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -64,7 +65,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -74,7 +76,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -84,7 +87,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -116,7 +120,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -129,7 +134,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -142,7 +148,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -155,7 +162,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -168,7 +176,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -181,7 +190,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -326,7 +336,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -338,7 +349,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -350,7 +362,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -362,7 +375,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -393,7 +407,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -404,7 +419,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -415,7 +431,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -426,7 +443,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -440,7 +458,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -451,7 +470,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -462,7 +482,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -473,7 +494,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } } else @@ -500,7 +522,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -514,7 +537,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -528,7 +552,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -542,7 +567,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -581,7 +607,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -595,7 +622,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -609,7 +637,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -623,7 +652,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -637,7 +667,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -651,7 +682,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -683,7 +715,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -695,7 +728,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -707,7 +741,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -719,7 +754,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -741,7 +777,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, return RPP_ERROR_NOT_IMPLEMENTED; RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); - + // Compute affine transformation matrix from rotate angle Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; for(int idx = 0; idx < srcDescPtr->n; idx++) @@ -1413,7 +1449,7 @@ RppStatus rppt_rotate_gpu(RppPtr_t srcPtr, #ifdef HIP_COMPILE if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) return RPP_ERROR_NOT_IMPLEMENTED; - + // Compute affine transformation matrix from rotate angle Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; for(int idx = 0; idx < srcDescPtr->n; idx++) From 2e0598c80f3f689328c7a8b5e2b8d69b13de055e Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Thu, 30 Mar 2023 23:55:50 -0700 Subject: [PATCH 03/15] added numThreads as a parameter to rppCreateWithBatchSize() function --- Readme.md | 3 ++- include/rpp.h | 3 ++- src/include/common/rpp/handle.hpp | 4 ++-- src/modules/handle_api.cpp | 4 ++-- src/modules/handlehost.cpp | 5 ++++- src/modules/hip/handlehip.cpp | 7 +++++-- utilities/rpp-performancetests/HIP_NEW/Single_host.cpp | 5 +++-- .../rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp | 3 ++- .../rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp | 3 ++- .../rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/Single_host.cpp | 5 +++-- .../rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp | 3 ++- .../rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp | 3 ++- .../rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp | 3 ++- utilities/rpp-performancetests/OCL_NEW/Single_host.cpp | 5 +++-- utilities/rpp-unittests/HIP_NEW/Single_host.cpp | 5 +++-- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Single_host.cpp | 5 +++-- utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp | 3 ++- utilities/rpp-unittests/OCL_NEW/Single_host.cpp | 5 +++-- 24 files changed, 59 insertions(+), 33 deletions(-) diff --git a/Readme.md b/Readme.md index 0b18c7218..54ccf42c2 100644 --- a/Readme.md +++ b/Readme.md @@ -221,7 +221,8 @@ $ sudo make install // Create handle rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); // Call the RPP API for the specific variant required (pkd3/pln3/pln1) rppi_brightness_u8_pkd3_batchPD_host(input, srcSize, maxSize, output, alpha, beta, noOfImages, handle); diff --git a/include/rpp.h b/include/rpp.h index 9346cd869..666131107 100644 --- a/include/rpp.h +++ b/include/rpp.h @@ -101,8 +101,9 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreate(rppHandle_t* handle); // Function to create a rpp handle for a batch. To be called in the beginning to initialize the rpp environment // *param[in] handle A pointer to rpp handle of type rppHandle_t // *param[in] nBatchSize Batch size +// *param[in] numThreads number of threads to be used for OpenMP pragma // *returns a rppStatus_t enumeration. -extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize); +extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads); /******************** rppDestroy ********************/ diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp index e2777ede4..5ac3bab09 100644 --- a/src/include/common/rpp/handle.hpp +++ b/src/include/common/rpp/handle.hpp @@ -66,7 +66,7 @@ using rocblas_handle_ptr = RPP_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle struct Handle : rppHandle { Handle(); - Handle(size_t nBatchSize); + Handle(size_t nBatchSize, Rpp32u numThreads); Handle(Handle&&) noexcept; ~Handle(); @@ -84,7 +84,7 @@ struct Handle : rppHandle { // Host handle related Handle(); - Handle(size_t nBatchSize); + Handle(size_t nBatchSize, Rpp32u numThreads); Handle(Handle&&) noexcept; ~Handle(); InitHandle* GetInitHandle() const; diff --git a/src/modules/handle_api.cpp b/src/modules/handle_api.cpp index 2e4680601..17b62a4d9 100644 --- a/src/modules/handle_api.cpp +++ b/src/modules/handle_api.cpp @@ -58,9 +58,9 @@ extern "C" rppStatus_t rppCreate(rppHandle_t* handle) return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(); }); } -extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize) +extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads) { - return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize); }); + return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads); }); } extern "C" rppStatus_t rppDestroy(rppHandle_t handle) diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index bf065496d..053388c3f 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -57,9 +57,12 @@ struct HandleImpl } }; -Handle::Handle(size_t batchSize) : impl(new HandleImpl()) +Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + if(numThreads == 0) + numThreads = batchSize; + impl->numThreads = numThreads; impl->PreInitializeBufferCPU(); } diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index 65d78fef9..aa724d7a1 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -172,7 +172,7 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); - if(this->numThreads == 0) + if(this->numThreads == 0) this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; @@ -280,9 +280,12 @@ Handle::Handle(rppAcceleratorQueue_t stream) : impl(new HandleImpl()) RPP_LOG_I(*this); } -Handle::Handle(size_t batchSize) : impl(new HandleImpl()) +Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + if(numThreads == 0) + numThreads = batchSize; + impl->numThreads = numThreads; this->SetAllocator(nullptr, nullptr, nullptr); impl->PreInitializeBufferCPU(); } diff --git a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp index a886c7c1c..88820af63 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -645,7 +645,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp index 5876a7c39..1c3c889b2 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp @@ -646,7 +646,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp index 8b10a76c1..7c00bdc20 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp @@ -748,7 +748,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp index 82803cc85..a1160c544 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp @@ -595,7 +595,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp index cb11db44f..4e6a2305b 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp @@ -586,7 +586,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp index 9cc4cfb9a..0d4c16aab 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp @@ -672,7 +672,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp index 8b57a81a0..1689ef5e4 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -647,7 +647,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp index 64ba48248..373d3f773 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp @@ -648,7 +648,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp index 1269fa409..a98c6daf0 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp @@ -751,7 +751,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp index f7025fb56..62512b0ea 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp @@ -609,7 +609,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp index 0b1c6ca05..b1917ba41 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp @@ -599,7 +599,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp index 5cfdbd69f..296fb945f 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp @@ -685,7 +685,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; From e1c5cbdc9488947a8fe9ae8f567fc88a9ac405d2 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 20 Mar 2023 10:07:07 -0700 Subject: [PATCH 04/15] added numthreads to batchpd kernels --- .../cpu/host_advanced_augmentations.hpp | 74 +++++----- .../cpu/host_arithmetic_operations.hpp | 54 +++---- .../cpu/host_color_model_conversions.hpp | 56 ++++---- src/modules/cpu/host_computer_vision.hpp | 52 +++---- src/modules/cpu/host_filter_operations.hpp | 42 +++--- src/modules/cpu/host_fused_functions.hpp | 68 ++++----- src/modules/cpu/host_geometry_transforms.hpp | 60 ++++---- src/modules/cpu/host_image_augmentations.hpp | 86 +++++------ src/modules/cpu/host_logical_operations.hpp | 24 ++-- .../cpu/host_morphological_transforms.hpp | 12 +- .../cpu/host_statistical_operations.hpp | 62 ++++---- src/modules/rppi_advanced_augmentations.cpp | 78 ++++++---- src/modules/rppi_arithmetic_operations.cpp | 81 +++++++---- src/modules/rppi_color_model_conversions.cpp | 69 ++++++--- src/modules/rppi_computer_vision.cpp | 114 ++++++++++----- src/modules/rppi_filter_operations.cpp | 63 +++++--- src/modules/rppi_fused_functions.cpp | 27 ++-- src/modules/rppi_geometry_transforms.cpp | 102 ++++++++----- src/modules/rppi_image_augmentations.cpp | 135 ++++++++++++------ src/modules/rppi_logical_operations.cpp | 36 +++-- src/modules/rppi_morphological_operations.cpp | 18 ++- src/modules/rppi_statistical_operations.cpp | 54 ++++--- .../rpp-unittests/HOST_NEW/testAllScript.sh | 8 +- 23 files changed, 817 insertions(+), 558 deletions(-) diff --git a/src/modules/cpu/host_advanced_augmentations.hpp b/src/modules/cpu/host_advanced_augmentations.hpp index 4cf21203a..2c11dae0d 100644 --- a/src/modules/cpu/host_advanced_augmentations.hpp +++ b/src/modules/cpu/host_advanced_augmentations.hpp @@ -34,12 +34,12 @@ RppStatus water_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s Rpp32f *batch_freq_x, Rpp32f *batch_freq_y, Rpp32f *batch_phase_x, Rpp32f *batch_phase_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -169,7 +169,7 @@ RppStatus water_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -289,12 +289,12 @@ template RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -430,7 +430,7 @@ RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -578,12 +578,12 @@ RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_sr RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -701,7 +701,7 @@ RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -830,12 +830,12 @@ RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, Rppi RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -966,7 +966,7 @@ RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1108,7 +1108,7 @@ RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, Rppi RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1134,7 +1134,7 @@ RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSiz srcPtr2_8uTemp++; } - non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel); + non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel, numThreads); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1162,12 +1162,12 @@ template RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1269,7 +1269,7 @@ RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1383,12 +1383,12 @@ RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1476,7 +1476,7 @@ RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1576,12 +1576,12 @@ RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1681,7 +1681,7 @@ RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1793,7 +1793,7 @@ RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1813,7 +1813,7 @@ RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiS srcPtr_8uTemp++; } - color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel); + color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel, numThreads); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1840,12 +1840,12 @@ template RppStatus erase_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_anchor_box_info, T *batch_colors, Rpp32u *batch_box_offset, Rpp32u *batch_num_of_boxes, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1922,7 +1922,7 @@ RppStatus erase_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2006,12 +2006,12 @@ RppStatus crop_and_patch_host_batch(T* srcPtr1, RppiSize *batch_srcSize1, RppiSi Rpp32u *batch_src1x1, Rpp32u *batch_src1y1, Rpp32u *batch_src1x2, Rpp32u *batch_src1y2, Rpp32u *batch_src2x1, Rpp32u *batch_src2y1, Rpp32u *batch_src2x2, Rpp32u *batch_src2y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u dstImageDimMax = batch_srcSizeMax1[batchCount].height * batch_srcSizeMax1[batchCount].width; @@ -2135,7 +2135,7 @@ RppStatus crop_and_patch_host_batch(T* srcPtr1, RppiSize *batch_srcSize1, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u dstImageDimMax = batch_srcSizeMax1[batchCount].height * batch_srcSizeMax1[batchCount].width; @@ -2258,14 +2258,14 @@ template RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp32u lutSize = 256; if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2337,7 +2337,7 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2433,12 +2433,12 @@ RppStatus glitch_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ Rpp32u *batch_x_offset_g, Rpp32u *batch_y_offset_g, Rpp32u *batch_x_offset_b, Rpp32u *batch_y_offset_b, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2539,7 +2539,7 @@ RppStatus glitch_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_arithmetic_operations.hpp b/src/modules/cpu/host_arithmetic_operations.hpp index 2a5e6494d..e15ed7ce8 100644 --- a/src/modules/cpu/host_arithmetic_operations.hpp +++ b/src/modules/cpu/host_arithmetic_operations.hpp @@ -30,12 +30,12 @@ THE SOFTWARE. template RppStatus absolute_difference_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -119,7 +119,7 @@ RppStatus absolute_difference_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -235,12 +235,12 @@ template RppStatus accumulate_weighted_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -315,7 +315,7 @@ RppStatus accumulate_weighted_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -420,12 +420,12 @@ RppStatus accumulate_weighted_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus accumulate_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -498,7 +498,7 @@ RppStatus accumulate_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -599,12 +599,12 @@ RppStatus accumulate_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus add_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -687,7 +687,7 @@ RppStatus add_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -800,12 +800,12 @@ RppStatus add_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus subtract_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -888,7 +888,7 @@ RppStatus subtract_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -985,12 +985,12 @@ RppStatus subtract_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus magnitude_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1077,7 +1077,7 @@ RppStatus magnitude_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1177,12 +1177,12 @@ RppStatus magnitude_host(T* srcPtr1, T* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus multiply_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1265,7 +1265,7 @@ RppStatus multiply_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1362,13 +1362,13 @@ RppStatus multiply_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus phase_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp32f multiplier = 255 / 1.570796; if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1452,7 +1452,7 @@ RppStatus phase_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1568,12 +1568,12 @@ RppStatus phase_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus accumulate_squared_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1641,7 +1641,7 @@ RppStatus accumulate_squared_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_color_model_conversions.hpp b/src/modules/cpu/host_color_model_conversions.hpp index 289c6916c..bbeb190b6 100644 --- a/src/modules/cpu/host_color_model_conversions.hpp +++ b/src/modules/cpu/host_color_model_conversions.hpp @@ -31,12 +31,12 @@ template RppStatus channel_extract_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_extractChannelNumber, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -66,7 +66,7 @@ RppStatus channel_extract_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -146,12 +146,12 @@ RppStatus channel_extract_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus channel_combine_host_batch(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -205,7 +205,7 @@ RppStatus channel_combine_host_batch(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -303,7 +303,7 @@ RppStatus channel_combine_host(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize srcS // if(chnFormat == RPPI_CHN_PLANAR) // { // omp_set_dynamic(0); -// #pragma omp parallel for num_threads(nbatchSize) +// #pragma omp parallel for num_threads(numThreads) // for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) // { // Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -384,7 +384,7 @@ RppStatus channel_combine_host(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize srcS // else if (chnFormat == RPPI_CHN_PACKED) // { // omp_set_dynamic(0); -// #pragma omp parallel for num_threads(nbatchSize) +// #pragma omp parallel for num_threads(numThreads) // for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) // { // Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -466,14 +466,14 @@ template RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { Rpp32u lutSize = 256; if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -514,7 +514,7 @@ RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -604,12 +604,12 @@ template RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32s *batch_adjustmentValue, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (channel == 1) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -704,7 +704,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -861,7 +861,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1098,12 +1098,12 @@ template RppStatus vignette_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1267,7 +1267,7 @@ RppStatus vignette_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2387,12 +2387,12 @@ template RppStatus hueRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_hueShift, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2470,7 +2470,7 @@ RppStatus hueRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3300,12 +3300,12 @@ template RppStatus saturationRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3371,7 +3371,7 @@ RppStatus saturationRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3499,10 +3499,10 @@ RppStatus tensor_look_up_table_host(T* srcPtr, T* dstPtr, T* lutPtr, template RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; @@ -3529,10 +3529,10 @@ RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize template RppStatus color_convert_hsv_to_rgb_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; diff --git a/src/modules/cpu/host_computer_vision.hpp b/src/modules/cpu/host_computer_vision.hpp index e8cdd31d5..1f2404f07 100644 --- a/src/modules/cpu/host_computer_vision.hpp +++ b/src/modules/cpu/host_computer_vision.hpp @@ -30,12 +30,12 @@ THE SOFTWARE. template RppStatus data_object_copy_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -67,7 +67,7 @@ RppStatus data_object_copy_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -110,12 +110,12 @@ RppStatus data_object_copy_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus local_binary_pattern_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -255,7 +255,7 @@ RppStatus local_binary_pattern_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -467,7 +467,7 @@ template RppStatus convert_bit_depth_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, Rpp32u conversionType, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { T *srcPtrTemp; U *dstPtrTemp; @@ -561,12 +561,12 @@ template RppStatus remap_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_rowRemapTable, Rpp32u *batch_colRemapTable, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -621,7 +621,7 @@ RppStatus remap_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -732,10 +732,10 @@ template RppStatus gaussian_image_pyramid_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -838,10 +838,10 @@ template RppStatus canny_edge_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, T *batch_maxThreshold, T *batch_minThreshold, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { T maxThreshold = batch_maxThreshold[batchCount]; @@ -1363,10 +1363,10 @@ template RppStatus laplacian_image_pyramid_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -1501,10 +1501,10 @@ RppStatus harris_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_src Rpp32u *batch_kernelSize, Rpp32f *batch_kValue, Rpp32f *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u gaussianKernelSize = batch_gaussianKernelSize[batchCount]; @@ -2057,10 +2057,10 @@ RppStatus reconstruction_laplacian_image_pyramid_host_batch(T* batch_srcPtr1, Rp T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -2170,10 +2170,10 @@ RppStatus hough_lines_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiS Rpp32f *batch_rho, Rpp32f *batch_theta, Rpp32u *batch_threshold, Rpp32u *batch_lineLength, Rpp32u *batch_lineGap, Rpp32u *batch_linesMax, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f rho = batch_rho[batchCount]; @@ -2704,10 +2704,10 @@ RppStatus fast_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSi Rpp32u *batch_numOfPixels, T *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u numOfPixels = batch_numOfPixels[batchCount]; @@ -3370,10 +3370,10 @@ template RppStatus hog_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_binsTensor, Rpp32u *batch_binsTensorLength, RppiSize *batch_kernelSize, RppiSize *batch_windowSize, Rpp32u *batch_windowStride, Rpp32u *batch_numOfBins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u binsTensorLength = batch_binsTensorLength[batchCount]; diff --git a/src/modules/cpu/host_filter_operations.hpp b/src/modules/cpu/host_filter_operations.hpp index bee8808cb..114910191 100644 --- a/src/modules/cpu/host_filter_operations.hpp +++ b/src/modules/cpu/host_filter_operations.hpp @@ -31,12 +31,12 @@ template RppStatus box_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -131,7 +131,7 @@ RppStatus box_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -264,12 +264,12 @@ template RppStatus median_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -409,7 +409,7 @@ RppStatus median_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -619,12 +619,12 @@ template RppStatus gaussian_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -721,7 +721,7 @@ RppStatus gaussian_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -856,12 +856,12 @@ template RppStatus nonlinear_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1001,7 +1001,7 @@ RppStatus nonlinear_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1211,12 +1211,12 @@ template RppStatus non_max_suppression_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1356,7 +1356,7 @@ RppStatus non_max_suppression_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1572,12 +1572,12 @@ template RppStatus sobel_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_sobelType, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1715,7 +1715,7 @@ RppStatus sobel_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1916,12 +1916,12 @@ template RppStatus custom_convolution_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_kernel, RppiSize *batch_rppiKernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2030,7 +2030,7 @@ RppStatus custom_convolution_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_fused_functions.hpp b/src/modules/cpu/host_fused_functions.hpp index 3a3bd3b08..aaa256331 100644 --- a/src/modules/cpu/host_fused_functions.hpp +++ b/src/modules/cpu/host_fused_functions.hpp @@ -33,12 +33,12 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -79,7 +79,7 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -787,12 +787,12 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -833,7 +833,7 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1590,12 +1590,12 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1636,7 +1636,7 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2429,12 +2429,12 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2507,7 +2507,7 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2586,12 +2586,12 @@ RppStatus crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2689,7 +2689,7 @@ RppStatus crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3102,7 +3102,7 @@ RppStatus crop_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3694,7 +3694,7 @@ RppStatus crop_mirror_normalize_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3997,7 +3997,7 @@ RppStatus crop_mirror_normalize_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4362,7 +4362,7 @@ RppStatus crop_mirror_normalize_u8_f_host_batch(T* srcPtr, RppiSize *batch_srcSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4692,7 +4692,7 @@ RppStatus crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4864,7 +4864,7 @@ RppStatus crop_host_u_f_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5046,7 +5046,7 @@ RppStatus crop_host_u_i_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5136,12 +5136,12 @@ template RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5249,7 +5249,7 @@ RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5358,12 +5358,12 @@ RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5471,7 +5471,7 @@ RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5581,12 +5581,12 @@ RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcS RppStatus resize_crop_mirror_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5696,7 +5696,7 @@ RppStatus resize_crop_mirror_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5807,13 +5807,13 @@ template RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -6046,7 +6046,7 @@ RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_geometry_transforms.hpp b/src/modules/cpu/host_geometry_transforms.hpp index 7c2c1e473..f04d4f17e 100644 --- a/src/modules/cpu/host_geometry_transforms.hpp +++ b/src/modules/cpu/host_geometry_transforms.hpp @@ -31,12 +31,12 @@ template RppStatus flip_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_flipAxis, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -234,7 +234,7 @@ RppStatus flip_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -710,12 +710,12 @@ RppStatus fisheye_base_host(T* srcPtrTemp, RppiSize srcSize, T* dstPtrTemp, template RppStatus fisheye_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -919,7 +919,7 @@ RppStatus fisheye_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1539,12 +1539,12 @@ template RppStatus lens_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, Rpp32f *batch_zoom, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1754,7 +1754,7 @@ RppStatus lens_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2251,12 +2251,12 @@ template RppStatus scale_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_percentage, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2359,7 +2359,7 @@ RppStatus scale_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2587,12 +2587,12 @@ template RppStatus rotate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_angleDeg, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2739,7 +2739,7 @@ RppStatus rotate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3014,12 +3014,12 @@ RppStatus rotate_host(T* srcPtr, RppiSize srcSize, T* dstPtr, RppiSize dstSize, template RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3101,7 +3101,7 @@ RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3186,12 +3186,12 @@ RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ template RppStatus resize_u8_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3280,7 +3280,7 @@ RppStatus resize_u8_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3384,12 +3384,12 @@ template RppStatus resize_crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3452,7 +3452,7 @@ RppStatus resize_crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3532,12 +3532,12 @@ template RppStatus warp_affine_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_affine, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3672,7 +3672,7 @@ RppStatus warp_affine_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3926,13 +3926,13 @@ template RppStatus warp_perspective_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_perspective, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { //Rpp32f perspective[9] = {0.707, 0.707, 0, -0.707, 0.707, 0, 0.001, 0.001, 1}; if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -4049,7 +4049,7 @@ RppStatus warp_perspective_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; diff --git a/src/modules/cpu/host_image_augmentations.hpp b/src/modules/cpu/host_image_augmentations.hpp index 98af4b133..9d1fc373d 100644 --- a/src/modules/cpu/host_image_augmentations.hpp +++ b/src/modules/cpu/host_image_augmentations.hpp @@ -35,12 +35,12 @@ template RppStatus brightness_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, Rpp32f *batch_beta, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -152,7 +152,7 @@ RppStatus brightness_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -324,12 +324,12 @@ template RppStatus contrast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_new_min, Rpp32u *batch_new_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -454,7 +454,7 @@ RppStatus contrast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -736,12 +736,12 @@ template RppStatus blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -873,7 +873,7 @@ RppStatus blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1079,12 +1079,12 @@ template RppStatus gamma_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_gamma, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1167,7 +1167,7 @@ RppStatus gamma_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1287,12 +1287,12 @@ template RppStatus exposure_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_exposureFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1401,7 +1401,7 @@ RppStatus exposure_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1568,12 +1568,12 @@ template RppStatus blur_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1854,7 +1854,7 @@ RppStatus blur_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2734,12 +2734,12 @@ RppStatus blur_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus histogram_balance_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2857,7 +2857,7 @@ RppStatus histogram_balance_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3021,10 +3021,10 @@ template RppStatus random_crop_letterbox_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3180,12 +3180,12 @@ RppStatus pixelate_base_pkd_host(T* srcPtrTemp, Rpp32u elementsInRow, T* dstPtrT template RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3378,7 +3378,7 @@ RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3964,12 +3964,12 @@ template RppStatus fog_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_fogValue, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4025,7 +4025,7 @@ RppStatus fog_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4181,7 +4181,7 @@ template RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_noiseProbability, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { @@ -4190,7 +4190,7 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4299,7 +4299,7 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4490,12 +4490,12 @@ template RppStatus snow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4562,7 +4562,7 @@ RppStatus snow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4759,10 +4759,10 @@ RppStatus rain_host(T* srcPtr, RppiSize srcSize,T* dstPtr, template RppStatus rain_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_rainPercentage, Rpp32u *batch_rainWidth, Rpp32u *batch_rainHeight, Rpp32f *batch_transparency, - Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel) + Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f rainPercentage = batch_rainPercentage[batchCount]; @@ -4799,12 +4799,12 @@ RppStatus random_shadow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32u *batch_x1, Rpp32u *batch_y1, Rpp32u *batch_x2, Rpp32u *batch_y2, Rpp32u *batch_numberOfShadows, Rpp32u *batch_maxSizeX, Rpp32u *batch_maxSizeY, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4875,7 +4875,7 @@ RppStatus random_shadow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5032,12 +5032,12 @@ template RppStatus jitter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5133,7 +5133,7 @@ RppStatus jitter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_logical_operations.hpp b/src/modules/cpu/host_logical_operations.hpp index b54c0d46f..c89f4f2b3 100644 --- a/src/modules/cpu/host_logical_operations.hpp +++ b/src/modules/cpu/host_logical_operations.hpp @@ -30,12 +30,12 @@ THE SOFTWARE. template RppStatus bitwise_AND_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -117,7 +117,7 @@ RppStatus bitwise_AND_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -213,12 +213,12 @@ RppStatus bitwise_AND_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus bitwise_NOT_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -294,7 +294,7 @@ RppStatus bitwise_NOT_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -398,12 +398,12 @@ RppStatus bitwise_NOT_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus exclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -486,7 +486,7 @@ RppStatus exclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -582,12 +582,12 @@ RppStatus exclusive_OR_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus inclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -670,7 +670,7 @@ RppStatus inclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_morphological_transforms.hpp b/src/modules/cpu/host_morphological_transforms.hpp index 42c51081a..eec19a01a 100644 --- a/src/modules/cpu/host_morphological_transforms.hpp +++ b/src/modules/cpu/host_morphological_transforms.hpp @@ -31,12 +31,12 @@ template RppStatus erode_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -204,7 +204,7 @@ RppStatus erode_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -440,12 +440,12 @@ template RppStatus dilate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -588,7 +588,7 @@ RppStatus dilate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_statistical_operations.hpp b/src/modules/cpu/host_statistical_operations.hpp index 494b28163..3494dc97c 100644 --- a/src/modules/cpu/host_statistical_operations.hpp +++ b/src/modules/cpu/host_statistical_operations.hpp @@ -31,12 +31,12 @@ THE SOFTWARE. template RppStatus min_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -115,7 +115,7 @@ RppStatus min_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -208,12 +208,12 @@ RppStatus min_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus max_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -292,7 +292,7 @@ RppStatus max_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -386,12 +386,12 @@ template RppStatus thresholding_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_min, T *batch_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -478,7 +478,7 @@ RppStatus thresholding_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -582,12 +582,12 @@ template RppStatus histogram_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32u *outputHistogram, Rpp32u bins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -641,7 +641,7 @@ RppStatus histogram_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *bat else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -709,12 +709,12 @@ RppStatus histogram_host(T* srcPtr, RppiSize srcSize, Rpp32u* outputHistogram, R template RppStatus histogram_equalization_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -832,7 +832,7 @@ RppStatus histogram_equalization_host_batch(T* srcPtr, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -996,12 +996,12 @@ template RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8u *batch_min, Rpp8u *batch_max, Rpp32u *batch_minLoc, Rpp32u *batch_maxLoc, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1056,7 +1056,7 @@ RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp8u *min = batch_min + batchCount; @@ -1112,7 +1112,7 @@ RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus min_max_loc_host(T* srcPtr, RppiSize srcSize, Rpp8u* min, Rpp8u* max, Rpp32u* minLoc, Rpp32u* maxLoc, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { *min = 255; *max = 0; @@ -1147,12 +1147,12 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1216,7 +1216,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -1284,12 +1284,12 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1326,7 +1326,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1365,7 +1365,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1402,7 +1402,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1437,7 +1437,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1473,7 +1473,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1547,10 +1547,10 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus integral_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; diff --git a/src/modules/rppi_advanced_augmentations.cpp b/src/modules/rppi_advanced_augmentations.cpp index b99dd5371..48cc10d81 100644 --- a/src/modules/rppi_advanced_augmentations.cpp +++ b/src/modules/rppi_advanced_augmentations.cpp @@ -71,7 +71,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -91,7 +92,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -111,7 +113,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -131,7 +134,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -230,7 +234,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -246,7 +251,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -262,7 +268,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -278,7 +285,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -381,7 +389,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -399,7 +408,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -417,7 +427,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -435,7 +446,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -518,7 +530,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -536,7 +549,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -554,7 +568,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -572,7 +587,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -687,7 +703,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -712,7 +729,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -737,7 +755,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -762,7 +781,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -859,7 +879,8 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -874,7 +895,8 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -951,7 +973,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -971,7 +994,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -991,7 +1015,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -1011,7 +1036,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } diff --git a/src/modules/rppi_arithmetic_operations.cpp b/src/modules/rppi_arithmetic_operations.cpp index 5daf8be86..eab212563 100644 --- a/src/modules/rppi_arithmetic_operations.cpp +++ b/src/modules/rppi_arithmetic_operations.cpp @@ -60,7 +60,8 @@ rppi_add_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_add_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_add_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_subtract_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_subtract_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_subtract_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -244,7 +250,8 @@ rppi_multiply_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -274,7 +281,8 @@ rppi_multiply_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -304,7 +312,8 @@ rppi_multiply_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -336,7 +345,8 @@ rppi_absolute_difference_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -366,7 +376,8 @@ rppi_absolute_difference_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -396,7 +407,8 @@ rppi_absolute_difference_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -428,7 +440,8 @@ rppi_phase_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -458,7 +471,8 @@ rppi_phase_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -488,7 +502,8 @@ rppi_phase_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -520,7 +535,8 @@ rppi_magnitude_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -550,7 +566,8 @@ rppi_magnitude_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -580,7 +597,8 @@ rppi_magnitude_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -610,7 +628,8 @@ rppi_accumulate_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -638,7 +657,8 @@ rppi_accumulate_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -666,7 +686,8 @@ rppi_accumulate_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -698,7 +719,8 @@ rppi_accumulate_weighted_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -728,7 +750,8 @@ rppi_accumulate_weighted_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -758,7 +781,8 @@ rppi_accumulate_weighted_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -786,7 +810,8 @@ rppi_accumulate_squared_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -812,7 +837,8 @@ rppi_accumulate_squared_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -838,7 +864,8 @@ rppi_accumulate_squared_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_color_model_conversions.cpp b/src/modules/rppi_color_model_conversions.cpp index efd9244f9..fc47c272c 100644 --- a/src/modules/rppi_color_model_conversions.cpp +++ b/src/modules/rppi_color_model_conversions.cpp @@ -60,7 +60,8 @@ rppi_hueRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_hueRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -122,7 +124,8 @@ rppi_saturationRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_saturationRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -179,7 +183,8 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -190,7 +195,8 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -216,7 +222,8 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -227,7 +234,8 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -260,7 +268,8 @@ rppi_color_temperature_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -290,7 +299,8 @@ rppi_color_temperature_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -320,7 +330,8 @@ rppi_color_temperature_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -352,7 +363,8 @@ rppi_vignette_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -382,7 +394,8 @@ rppi_vignette_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -412,7 +425,8 @@ rppi_vignette_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -437,7 +451,8 @@ rppi_channel_extract_u8_pln1_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -460,7 +475,8 @@ rppi_channel_extract_u8_pln3_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -483,7 +499,8 @@ rppi_channel_extract_u8_pkd3_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -510,7 +527,8 @@ rppi_channel_combine_u8_pln1_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -535,7 +553,8 @@ rppi_channel_combine_u8_pln3_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -560,7 +579,8 @@ rppi_channel_combine_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -592,7 +612,8 @@ rppi_look_up_table_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -622,7 +643,8 @@ rppi_look_up_table_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -652,7 +674,8 @@ rppi_look_up_table_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_computer_vision.cpp b/src/modules/rppi_computer_vision.cpp index 512e5ebc2..52147dfc4 100644 --- a/src/modules/rppi_computer_vision.cpp +++ b/src/modules/rppi_computer_vision.cpp @@ -58,7 +58,8 @@ rppi_local_binary_pattern_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -86,7 +87,8 @@ rppi_local_binary_pattern_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -114,7 +116,8 @@ rppi_local_binary_pattern_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -137,7 +140,8 @@ rppi_data_object_copy_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -158,7 +162,8 @@ rppi_data_object_copy_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -179,7 +184,8 @@ rppi_data_object_copy_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -206,7 +212,8 @@ rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -231,7 +238,8 @@ rppi_gaussian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -256,7 +264,8 @@ rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -283,7 +292,8 @@ rppi_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -308,7 +318,8 @@ rppi_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -333,7 +344,8 @@ rppi_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -360,7 +372,8 @@ rppi_canny_edge_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -385,7 +398,8 @@ rppi_canny_edge_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -410,7 +424,8 @@ rppi_canny_edge_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -445,7 +460,8 @@ rppi_harris_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -478,7 +494,8 @@ rppi_harris_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -511,7 +528,8 @@ rppi_harris_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -587,7 +605,8 @@ rppi_fast_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -614,7 +633,8 @@ rppi_fast_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -641,7 +661,8 @@ rppi_fast_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -675,7 +696,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -707,7 +729,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -739,7 +762,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -832,7 +856,8 @@ rppi_hough_lines_u8_pln1_batchPD_host(RppPtr_t srcPtr, linesMax, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -865,7 +890,8 @@ rppi_hog_u8_pln1_batchPD_host(RppPtr_t srcPtr, numOfBins, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -892,7 +918,8 @@ rppi_remap_u8_pln1_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -917,7 +944,8 @@ rppi_remap_u8_pln3_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -942,7 +970,8 @@ rppi_remap_u8_pkd3_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1047,7 +1076,8 @@ rppi_convert_bit_depth_u8s8_pln1_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1069,7 +1099,8 @@ rppi_convert_bit_depth_u8u16_pln1_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1091,7 +1122,8 @@ rppi_convert_bit_depth_u8s16_pln1_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1113,7 +1145,8 @@ rppi_convert_bit_depth_u8s8_pln3_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1135,7 +1168,8 @@ rppi_convert_bit_depth_u8u16_pln3_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1157,7 +1191,8 @@ rppi_convert_bit_depth_u8s16_pln3_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1179,7 +1214,8 @@ rppi_convert_bit_depth_u8s8_pkd3_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1201,7 +1237,8 @@ rppi_convert_bit_depth_u8u16_pkd3_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1223,7 +1260,8 @@ rppi_convert_bit_depth_u8s16_pkd3_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_filter_operations.cpp b/src/modules/rppi_filter_operations.cpp index 277d54fa6..d6a37d0f0 100644 --- a/src/modules/rppi_filter_operations.cpp +++ b/src/modules/rppi_filter_operations.cpp @@ -60,7 +60,8 @@ rppi_box_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_box_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_box_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_sobel_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_sobel_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_sobel_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -244,7 +250,8 @@ rppi_median_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -274,7 +281,8 @@ rppi_median_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -304,7 +312,8 @@ rppi_median_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -336,7 +345,8 @@ rppi_non_max_suppression_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -366,7 +376,8 @@ rppi_non_max_suppression_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -396,7 +407,8 @@ rppi_non_max_suppression_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -430,7 +442,8 @@ rppi_gaussian_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -462,7 +475,8 @@ rppi_gaussian_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -494,7 +508,8 @@ rppi_gaussian_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -526,7 +541,8 @@ rppi_nonlinear_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -556,7 +572,8 @@ rppi_nonlinear_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -586,7 +603,8 @@ rppi_nonlinear_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -620,7 +638,8 @@ rppi_custom_convolution_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -652,7 +671,8 @@ rppi_custom_convolution_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -684,7 +704,8 @@ rppi_custom_convolution_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_fused_functions.cpp b/src/modules/rppi_fused_functions.cpp index e22c94cb8..be115985b 100644 --- a/src/modules/rppi_fused_functions.cpp +++ b/src/modules/rppi_fused_functions.cpp @@ -72,7 +72,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -88,7 +89,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -104,7 +106,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::I8) { @@ -120,7 +123,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -722,7 +726,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -740,7 +745,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -758,7 +764,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::I8) { @@ -776,7 +783,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -878,7 +886,8 @@ RppStatus resize_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; diff --git a/src/modules/rppi_geometry_transforms.cpp b/src/modules/rppi_geometry_transforms.cpp index 92f9e5591..5dd641a86 100644 --- a/src/modules/rppi_geometry_transforms.cpp +++ b/src/modules/rppi_geometry_transforms.cpp @@ -60,7 +60,8 @@ rppi_flip_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_flip_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_flip_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -164,7 +167,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -178,7 +182,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -192,7 +197,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -206,7 +212,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -221,7 +228,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -235,7 +243,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensorInType == RPPTensorDataType::I8) { @@ -249,7 +258,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -398,7 +408,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -415,7 +426,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -432,7 +444,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::I8) { @@ -449,7 +462,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -554,7 +568,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -569,7 +584,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -584,7 +600,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } else if (tensor_type == RPPTensorDataType::I8) { @@ -599,7 +616,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } return RPP_SUCCESS; @@ -707,7 +725,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -725,7 +744,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -743,7 +763,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -761,7 +782,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle).GetNumThreads()); } } @@ -854,7 +876,8 @@ rppi_fisheye_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -882,7 +905,8 @@ rppi_fisheye_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -910,7 +934,8 @@ rppi_fisheye_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -944,7 +969,8 @@ rppi_lens_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -976,7 +1002,8 @@ rppi_lens_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1008,7 +1035,8 @@ rppi_lens_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1045,7 +1073,8 @@ rppi_scale_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1080,7 +1109,8 @@ rppi_scale_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1115,7 +1145,8 @@ rppi_scale_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1152,7 +1183,8 @@ rppi_warp_perspective_u8_pln1_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1187,7 +1219,8 @@ rppi_warp_perspective_u8_pln3_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1222,7 +1255,8 @@ rppi_warp_perspective_u8_pkd3_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_image_augmentations.cpp b/src/modules/rppi_image_augmentations.cpp index 33150abc3..4ed3a65c5 100644 --- a/src/modules/rppi_image_augmentations.cpp +++ b/src/modules/rppi_image_augmentations.cpp @@ -62,7 +62,8 @@ rppi_brightness_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -94,7 +95,8 @@ rppi_brightness_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -126,7 +128,8 @@ rppi_brightness_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -158,7 +161,8 @@ rppi_gamma_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -188,7 +192,8 @@ rppi_gamma_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -218,7 +223,8 @@ rppi_gamma_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -252,7 +258,8 @@ rppi_blend_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -284,7 +291,8 @@ rppi_blend_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -316,7 +324,8 @@ rppi_blend_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -348,7 +357,8 @@ rppi_blur_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -378,7 +388,8 @@ rppi_blur_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -408,7 +419,8 @@ rppi_blur_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -442,7 +454,8 @@ rppi_contrast_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -474,7 +487,8 @@ rppi_contrast_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -506,7 +520,8 @@ rppi_contrast_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -536,7 +551,8 @@ rppi_pixelate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -564,7 +580,8 @@ rppi_pixelate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -592,7 +609,8 @@ rppi_pixelate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -624,7 +642,8 @@ rppi_jitter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -654,7 +673,8 @@ rppi_jitter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -684,7 +704,8 @@ rppi_jitter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -716,7 +737,8 @@ rppi_snow_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -746,7 +768,8 @@ rppi_snow_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -776,7 +799,8 @@ rppi_snow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -808,7 +832,8 @@ rppi_noise_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -838,7 +863,8 @@ rppi_noise_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -868,7 +894,8 @@ rppi_noise_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -905,7 +932,8 @@ rppi_random_shadow_u8_pln1_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -940,7 +968,8 @@ rppi_random_shadow_u8_pln3_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -975,7 +1004,8 @@ rppi_random_shadow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1000,7 +1030,8 @@ rppi_fog_u8_pln1_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1023,7 +1054,8 @@ rppi_fog_u8_pln3_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1046,7 +1078,8 @@ rppi_fog_u8_pkd3_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1077,7 +1110,8 @@ rppi_rain_u8_pln1_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1106,7 +1140,8 @@ rppi_rain_u8_pln3_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1135,7 +1170,8 @@ rppi_rain_u8_pkd3_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1178,7 +1214,8 @@ rppi_random_crop_letterbox_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1219,7 +1256,8 @@ rppi_random_crop_letterbox_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1260,7 +1298,8 @@ rppi_random_crop_letterbox_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1292,7 +1331,8 @@ rppi_exposure_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1322,7 +1362,8 @@ rppi_exposure_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1352,7 +1393,8 @@ rppi_exposure_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1375,7 +1417,8 @@ rppi_histogram_balance_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1396,7 +1439,8 @@ rppi_histogram_balance_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -1417,7 +1461,8 @@ rppi_histogram_balance_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_logical_operations.cpp b/src/modules/rppi_logical_operations.cpp index f321763b6..d5f0e1204 100644 --- a/src/modules/rppi_logical_operations.cpp +++ b/src/modules/rppi_logical_operations.cpp @@ -60,7 +60,8 @@ rppi_bitwise_AND_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_bitwise_AND_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_bitwise_AND_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -150,7 +153,8 @@ rppi_bitwise_NOT_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -178,7 +182,8 @@ rppi_bitwise_NOT_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -206,7 +211,8 @@ rppi_bitwise_NOT_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -238,7 +244,8 @@ rppi_exclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -268,7 +275,8 @@ rppi_exclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -298,7 +306,8 @@ rppi_exclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -330,7 +339,8 @@ rppi_inclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -360,7 +370,8 @@ rppi_inclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -390,7 +401,8 @@ rppi_inclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_morphological_operations.cpp b/src/modules/rppi_morphological_operations.cpp index 86272fa9d..7ab7cad1b 100644 --- a/src/modules/rppi_morphological_operations.cpp +++ b/src/modules/rppi_morphological_operations.cpp @@ -60,7 +60,8 @@ rppi_erode_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_erode_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_erode_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_dilate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_dilate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_dilate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/src/modules/rppi_statistical_operations.cpp b/src/modules/rppi_statistical_operations.cpp index 111042173..de4be2328 100644 --- a/src/modules/rppi_statistical_operations.cpp +++ b/src/modules/rppi_statistical_operations.cpp @@ -62,7 +62,8 @@ rppi_thresholding_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -94,7 +95,8 @@ rppi_thresholding_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -126,7 +128,8 @@ rppi_thresholding_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -158,7 +161,8 @@ rppi_min_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -188,7 +192,8 @@ rppi_min_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -218,7 +223,8 @@ rppi_min_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -250,7 +256,8 @@ rppi_max_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -280,7 +287,8 @@ rppi_max_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -310,7 +318,8 @@ rppi_max_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -333,7 +342,8 @@ rppi_min_max_loc_u8_pln1_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -354,7 +364,8 @@ rppi_min_max_loc_u8_pln3_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -375,7 +386,8 @@ rppi_min_max_loc_u8_pkd3_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -398,7 +410,8 @@ rppi_integral_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -419,7 +432,8 @@ rppi_integral_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -440,7 +454,8 @@ rppi_integral_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -463,7 +478,8 @@ rppi_histogram_equalization_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -484,7 +500,8 @@ rppi_histogram_equalization_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } @@ -505,7 +522,8 @@ rppi_histogram_equalization_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle).GetNumThreads()); return RPP_SUCCESS; } diff --git a/utilities/rpp-unittests/HOST_NEW/testAllScript.sh b/utilities/rpp-unittests/HOST_NEW/testAllScript.sh index a11a9c5c9..6bb5ffd45 100755 --- a/utilities/rpp-unittests/HOST_NEW/testAllScript.sh +++ b/utilities/rpp-unittests/HOST_NEW/testAllScript.sh @@ -170,7 +170,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<7;bitDepth++)) + for ((bitDepth=0;bitDepth<1;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<2;outputFormatToggle++)) @@ -246,7 +246,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<7;bitDepth++)) + for ((bitDepth=0;bitDepth<1;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<1;outputFormatToggle++)) @@ -322,7 +322,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<7;bitDepth++)) + for ((bitDepth=0;bitDepth<1;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<2;outputFormatToggle++)) @@ -397,7 +397,7 @@ then echo "--------------------------------" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" printf "Running a New Functionality...\n" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" echo "--------------------------------" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" - for ((bitDepth=0;bitDepth<7;bitDepth++)) + for ((bitDepth=0;bitDepth<1;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" echo "./uniqueFunctionalities_host $bitDepth $case" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" From d26f1f3c38c40a093186440a3261e9e149c3305c Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 20 Mar 2023 11:10:27 -0700 Subject: [PATCH 05/15] passed rpp handle as a parameter to all host kernels --- .../cpu/host_advanced_augmentations.hpp | 44 ++++--- .../cpu/host_arithmetic_operations.hpp | 27 ++-- .../cpu/host_color_model_conversions.hpp | 38 ++++-- src/modules/cpu/host_computer_vision.hpp | 35 +++-- src/modules/cpu/host_filter_operations.hpp | 21 ++- src/modules/cpu/host_fused_functions.hpp | 69 ++++++---- src/modules/cpu/host_geometry_transforms.hpp | 30 +++-- src/modules/cpu/host_image_augmentations.hpp | 53 +++++--- src/modules/cpu/host_logical_operations.hpp | 12 +- .../cpu/host_morphological_transforms.hpp | 6 +- .../cpu/host_statistical_operations.hpp | 41 ++++-- src/modules/cpu/kernel/blend.hpp | 12 +- src/modules/cpu/kernel/brightness.hpp | 12 +- src/modules/cpu/kernel/color_cast.hpp | 12 +- src/modules/cpu/kernel/color_jitter.hpp | 12 +- src/modules/cpu/kernel/color_to_greyscale.hpp | 12 +- src/modules/cpu/kernel/color_twist.hpp | 12 +- src/modules/cpu/kernel/contrast.hpp | 12 +- src/modules/cpu/kernel/copy.hpp | 44 ++++--- src/modules/cpu/kernel/crop.hpp | 12 +- .../cpu/kernel/crop_mirror_normalize.hpp | 18 ++- src/modules/cpu/kernel/exposure.hpp | 12 +- src/modules/cpu/kernel/flip.hpp | 12 +- src/modules/cpu/kernel/gamma_correction.hpp | 12 +- src/modules/cpu/kernel/gridmask.hpp | 12 +- src/modules/cpu/kernel/noise_gaussian.hpp | 12 +- .../cpu/kernel/noise_salt_and_pepper.hpp | 12 +- src/modules/cpu/kernel/noise_shot.hpp | 12 +- src/modules/cpu/kernel/non_linear_blend.hpp | 24 ++-- src/modules/cpu/kernel/resize.hpp | 45 ++++--- src/modules/cpu/kernel/resize_crop_mirror.hpp | 20 +-- .../cpu/kernel/resize_mirror_normalize.hpp | 30 +++-- src/modules/cpu/kernel/spatter.hpp | 12 +- src/modules/cpu/kernel/swap_channels.hpp | 12 +- src/modules/cpu/kernel/warp_affine.hpp | 64 ++++++---- src/modules/rppi_advanced_augmentations.cpp | 52 ++++---- src/modules/rppi_arithmetic_operations.cpp | 54 ++++---- src/modules/rppi_color_model_conversions.cpp | 46 +++---- src/modules/rppi_computer_vision.cpp | 76 +++++------ src/modules/rppi_filter_operations.cpp | 42 +++--- src/modules/rppi_fused_functions.cpp | 46 +++---- src/modules/rppi_geometry_transforms.cpp | 68 +++++----- src/modules/rppi_image_augmentations.cpp | 90 ++++++------- src/modules/rppi_logical_operations.cpp | 24 ++-- src/modules/rppi_morphological_operations.cpp | 12 +- src/modules/rppi_statistical_operations.cpp | 36 +++--- .../rppt_tensor_color_augmentations.cpp | 64 +++++----- .../rppt_tensor_data_exchange_operations.cpp | 24 ++-- .../rppt_tensor_effects_augmentations.cpp | 52 ++++---- .../rppt_tensor_geometric_augmentations.cpp | 120 ++++++++++-------- 50 files changed, 949 insertions(+), 682 deletions(-) diff --git a/src/modules/cpu/host_advanced_augmentations.hpp b/src/modules/cpu/host_advanced_augmentations.hpp index 2c11dae0d..37d62173b 100644 --- a/src/modules/cpu/host_advanced_augmentations.hpp +++ b/src/modules/cpu/host_advanced_augmentations.hpp @@ -34,8 +34,9 @@ RppStatus water_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s Rpp32f *batch_freq_x, Rpp32f *batch_freq_y, Rpp32f *batch_phase_x, Rpp32f *batch_phase_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -289,8 +290,9 @@ template RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -578,8 +580,9 @@ RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_sr RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -830,8 +833,9 @@ RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, Rppi RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1108,7 +1112,7 @@ RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, Rppi RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1134,7 +1138,7 @@ RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSiz srcPtr2_8uTemp++; } - non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel, numThreads); + non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1162,8 +1166,9 @@ template RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1383,8 +1388,9 @@ RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1576,8 +1582,9 @@ RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1793,7 +1800,7 @@ RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1813,7 +1820,7 @@ RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiS srcPtr_8uTemp++; } - color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel, numThreads); + color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1840,8 +1847,9 @@ template RppStatus erase_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_anchor_box_info, T *batch_colors, Rpp32u *batch_box_offset, Rpp32u *batch_num_of_boxes, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2006,8 +2014,9 @@ RppStatus crop_and_patch_host_batch(T* srcPtr1, RppiSize *batch_srcSize1, RppiSi Rpp32u *batch_src1x1, Rpp32u *batch_src1y1, Rpp32u *batch_src1x2, Rpp32u *batch_src1y2, Rpp32u *batch_src2x1, Rpp32u *batch_src2y1, Rpp32u *batch_src2x2, Rpp32u *batch_src2y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2258,10 +2267,11 @@ template RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2336,7 +2346,8 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -2433,8 +2444,9 @@ RppStatus glitch_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ Rpp32u *batch_x_offset_g, Rpp32u *batch_y_offset_g, Rpp32u *batch_x_offset_b, Rpp32u *batch_y_offset_b, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_arithmetic_operations.hpp b/src/modules/cpu/host_arithmetic_operations.hpp index e15ed7ce8..ca5dc5b23 100644 --- a/src/modules/cpu/host_arithmetic_operations.hpp +++ b/src/modules/cpu/host_arithmetic_operations.hpp @@ -30,8 +30,9 @@ THE SOFTWARE. template RppStatus absolute_difference_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -235,8 +236,9 @@ template RppStatus accumulate_weighted_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -420,8 +422,9 @@ RppStatus accumulate_weighted_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus accumulate_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -599,8 +602,9 @@ RppStatus accumulate_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus add_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -800,8 +804,9 @@ RppStatus add_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus subtract_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -985,8 +990,9 @@ RppStatus subtract_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus magnitude_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1177,8 +1183,9 @@ RppStatus magnitude_host(T* srcPtr1, T* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus multiply_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1362,9 +1369,10 @@ RppStatus multiply_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus phase_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32f multiplier = 255 / 1.570796; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1568,8 +1576,9 @@ RppStatus phase_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus accumulate_squared_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_color_model_conversions.hpp b/src/modules/cpu/host_color_model_conversions.hpp index bbeb190b6..eada8570a 100644 --- a/src/modules/cpu/host_color_model_conversions.hpp +++ b/src/modules/cpu/host_color_model_conversions.hpp @@ -31,8 +31,9 @@ template RppStatus channel_extract_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_extractChannelNumber, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -146,8 +147,9 @@ RppStatus channel_extract_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus channel_combine_host_batch(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -466,10 +468,11 @@ template RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -513,7 +516,8 @@ RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -604,11 +608,12 @@ template RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32s *batch_adjustmentValue, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { if (channel == 1) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -703,7 +708,8 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS { if(chnFormat == RPPI_CHN_PLANAR) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -860,7 +866,8 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS } else if (chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -1098,8 +1105,9 @@ template RppStatus vignette_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2387,8 +2395,9 @@ template RppStatus hueRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_hueShift, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3300,8 +3309,9 @@ template RppStatus saturationRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3499,8 +3509,9 @@ RppStatus tensor_look_up_table_host(T* srcPtr, T* dstPtr, T* lutPtr, template RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -3529,8 +3540,9 @@ RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize template RppStatus color_convert_hsv_to_rgb_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) diff --git a/src/modules/cpu/host_computer_vision.hpp b/src/modules/cpu/host_computer_vision.hpp index 1f2404f07..927335e66 100644 --- a/src/modules/cpu/host_computer_vision.hpp +++ b/src/modules/cpu/host_computer_vision.hpp @@ -30,8 +30,9 @@ THE SOFTWARE. template RppStatus data_object_copy_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -110,8 +111,9 @@ RppStatus data_object_copy_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus local_binary_pattern_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -467,7 +469,7 @@ template RppStatus convert_bit_depth_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, Rpp32u conversionType, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { T *srcPtrTemp; U *dstPtrTemp; @@ -561,8 +563,9 @@ template RppStatus remap_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_rowRemapTable, Rpp32u *batch_colRemapTable, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -732,8 +735,9 @@ template RppStatus gaussian_image_pyramid_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -838,8 +842,9 @@ template RppStatus canny_edge_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, T *batch_maxThreshold, T *batch_minThreshold, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -1363,8 +1368,9 @@ template RppStatus laplacian_image_pyramid_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -1501,8 +1507,9 @@ RppStatus harris_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_src Rpp32u *batch_kernelSize, Rpp32f *batch_kValue, Rpp32f *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -2057,8 +2064,9 @@ RppStatus reconstruction_laplacian_image_pyramid_host_batch(T* batch_srcPtr1, Rp T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -2170,8 +2178,9 @@ RppStatus hough_lines_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiS Rpp32f *batch_rho, Rpp32f *batch_theta, Rpp32u *batch_threshold, Rpp32u *batch_lineLength, Rpp32u *batch_lineGap, Rpp32u *batch_linesMax, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -2704,8 +2713,9 @@ RppStatus fast_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSi Rpp32u *batch_numOfPixels, T *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -3370,8 +3380,9 @@ template RppStatus hog_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_binsTensor, Rpp32u *batch_binsTensorLength, RppiSize *batch_kernelSize, RppiSize *batch_windowSize, Rpp32u *batch_windowStride, Rpp32u *batch_numOfBins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) diff --git a/src/modules/cpu/host_filter_operations.hpp b/src/modules/cpu/host_filter_operations.hpp index 114910191..af03ce41b 100644 --- a/src/modules/cpu/host_filter_operations.hpp +++ b/src/modules/cpu/host_filter_operations.hpp @@ -31,8 +31,9 @@ template RppStatus box_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -264,8 +265,9 @@ template RppStatus median_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -619,8 +621,9 @@ template RppStatus gaussian_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -856,8 +859,9 @@ template RppStatus nonlinear_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1211,8 +1215,9 @@ template RppStatus non_max_suppression_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1572,8 +1577,9 @@ template RppStatus sobel_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_sobelType, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1916,8 +1922,9 @@ template RppStatus custom_convolution_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_kernel, RppiSize *batch_rppiKernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_fused_functions.hpp b/src/modules/cpu/host_fused_functions.hpp index aaa256331..fd7feb3b2 100644 --- a/src/modules/cpu/host_fused_functions.hpp +++ b/src/modules/cpu/host_fused_functions.hpp @@ -33,8 +33,9 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -78,7 +79,8 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -787,8 +789,9 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -832,7 +835,8 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -1590,8 +1594,9 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1635,7 +1640,8 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -2429,8 +2435,9 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2506,7 +2513,8 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -2586,8 +2594,9 @@ RppStatus crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2919,8 +2928,9 @@ RppStatus crop_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, R Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3553,8 +3563,9 @@ RppStatus crop_mirror_normalize_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3833,8 +3844,9 @@ RppStatus crop_mirror_normalize_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4159,8 +4171,9 @@ RppStatus crop_mirror_normalize_u8_f_host_batch(T* srcPtr, RppiSize *batch_srcSi Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4564,7 +4577,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u srcBufferSize = nbatchSize * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel; Rpp32u dstBufferSize = nbatchSize * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * channel; @@ -4586,7 +4599,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ crop_mirror_normalize_f32_host_batch(srcPtrf32, batch_srcSize, batch_srcSizeMax, dstPtrf32, batch_dstSize, batch_dstSizeMax, batch_crop_pos_x, batch_crop_pos_y, batch_mean, batch_stdDev, batch_mirrorFlag, outputFormatToggle, - nbatchSize, chnFormat, channel, numThreads); + nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; Rpp32f *dstPtrf32Temp; @@ -4612,8 +4625,9 @@ template RppStatus crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4770,8 +4784,9 @@ template RppStatus crop_host_u_f_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4954,8 +4969,9 @@ template RppStatus crop_host_u_i_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -5136,8 +5152,9 @@ template RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -5358,8 +5375,9 @@ RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -5581,8 +5599,9 @@ RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcS RppStatus resize_crop_mirror_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -5807,12 +5826,13 @@ template RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { if(chnFormat == RPPI_CHN_PLANAR) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -6045,7 +6065,8 @@ RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_geometry_transforms.hpp b/src/modules/cpu/host_geometry_transforms.hpp index f04d4f17e..21c4d55ed 100644 --- a/src/modules/cpu/host_geometry_transforms.hpp +++ b/src/modules/cpu/host_geometry_transforms.hpp @@ -31,8 +31,9 @@ template RppStatus flip_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_flipAxis, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -710,8 +711,9 @@ RppStatus fisheye_base_host(T* srcPtrTemp, RppiSize srcSize, T* dstPtrTemp, template RppStatus fisheye_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1539,8 +1541,9 @@ template RppStatus lens_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, Rpp32f *batch_zoom, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2251,8 +2254,9 @@ template RppStatus scale_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_percentage, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2587,8 +2591,9 @@ template RppStatus rotate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_angleDeg, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3014,8 +3019,9 @@ RppStatus rotate_host(T* srcPtr, RppiSize srcSize, T* dstPtr, RppiSize dstSize, template RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3186,8 +3192,9 @@ RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ template RppStatus resize_u8_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3384,8 +3391,9 @@ template RppStatus resize_crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3532,8 +3540,9 @@ template RppStatus warp_affine_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_affine, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3926,9 +3935,10 @@ template RppStatus warp_perspective_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_perspective, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { //Rpp32f perspective[9] = {0.707, 0.707, 0, -0.707, 0.707, 0, 0.001, 0.001, 1}; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_image_augmentations.hpp b/src/modules/cpu/host_image_augmentations.hpp index 9d1fc373d..9c0e0b3b9 100644 --- a/src/modules/cpu/host_image_augmentations.hpp +++ b/src/modules/cpu/host_image_augmentations.hpp @@ -35,8 +35,9 @@ template RppStatus brightness_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, Rpp32f *batch_beta, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -324,8 +325,9 @@ template RppStatus contrast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_new_min, Rpp32u *batch_new_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -736,8 +738,9 @@ template RppStatus blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1079,8 +1082,9 @@ template RppStatus gamma_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_gamma, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1287,8 +1291,9 @@ template RppStatus exposure_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_exposureFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1568,8 +1573,9 @@ template RppStatus blur_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -2734,8 +2740,9 @@ RppStatus blur_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus histogram_balance_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3021,8 +3028,9 @@ template RppStatus random_crop_letterbox_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -3180,8 +3188,9 @@ RppStatus pixelate_base_pkd_host(T* srcPtrTemp, Rpp32u elementsInRow, T* dstPtrT template RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -3377,7 +3386,8 @@ RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc } else if(chnFormat == RPPI_CHN_PACKED) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -3964,8 +3974,9 @@ template RppStatus fog_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_fogValue, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4181,7 +4192,7 @@ template RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_noiseProbability, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { if(chnFormat == RPPI_CHN_PLANAR) { @@ -4189,7 +4200,8 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s srcPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -4298,7 +4310,8 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s srcPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -4490,8 +4503,9 @@ template RppStatus snow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -4759,8 +4773,9 @@ RppStatus rain_host(T* srcPtr, RppiSize srcSize,T* dstPtr, template RppStatus rain_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_rainPercentage, Rpp32u *batch_rainWidth, Rpp32u *batch_rainHeight, Rpp32f *batch_transparency, - Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) @@ -4799,8 +4814,9 @@ RppStatus random_shadow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32u *batch_x1, Rpp32u *batch_y1, Rpp32u *batch_x2, Rpp32u *batch_y2, Rpp32u *batch_numberOfShadows, Rpp32u *batch_maxSizeX, Rpp32u *batch_maxSizeY, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -5032,8 +5048,9 @@ template RppStatus jitter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_logical_operations.hpp b/src/modules/cpu/host_logical_operations.hpp index c89f4f2b3..7b81c1af3 100644 --- a/src/modules/cpu/host_logical_operations.hpp +++ b/src/modules/cpu/host_logical_operations.hpp @@ -30,8 +30,9 @@ THE SOFTWARE. template RppStatus bitwise_AND_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -213,8 +214,9 @@ RppStatus bitwise_AND_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus bitwise_NOT_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -398,8 +400,9 @@ RppStatus bitwise_NOT_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus exclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -582,8 +585,9 @@ RppStatus exclusive_OR_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus inclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_morphological_transforms.hpp b/src/modules/cpu/host_morphological_transforms.hpp index eec19a01a..3d3c22009 100644 --- a/src/modules/cpu/host_morphological_transforms.hpp +++ b/src/modules/cpu/host_morphological_transforms.hpp @@ -31,8 +31,9 @@ template RppStatus erode_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -440,8 +441,9 @@ template RppStatus dilate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); diff --git a/src/modules/cpu/host_statistical_operations.hpp b/src/modules/cpu/host_statistical_operations.hpp index 3494dc97c..ac3d3e889 100644 --- a/src/modules/cpu/host_statistical_operations.hpp +++ b/src/modules/cpu/host_statistical_operations.hpp @@ -31,8 +31,9 @@ THE SOFTWARE. template RppStatus min_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -208,8 +209,9 @@ RppStatus min_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus max_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -386,8 +388,9 @@ template RppStatus thresholding_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_min, T *batch_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -582,8 +585,9 @@ template RppStatus histogram_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32u *outputHistogram, Rpp32u bins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -709,8 +713,9 @@ RppStatus histogram_host(T* srcPtr, RppiSize srcSize, Rpp32u* outputHistogram, R template RppStatus histogram_equalization_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -996,8 +1001,9 @@ template RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8u *batch_min, Rpp8u *batch_max, Rpp32u *batch_minLoc, Rpp32u *batch_maxLoc, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1112,7 +1118,7 @@ RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus min_max_loc_host(T* srcPtr, RppiSize srcSize, Rpp8u* min, Rpp8u* max, Rpp32u* minLoc, Rpp32u* maxLoc, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { *min = 255; *max = 0; @@ -1147,8 +1153,9 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1284,8 +1291,9 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); @@ -1325,7 +1333,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b srcPtrChannel = srcPtrImage + (c * imageDimMax); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1364,7 +1373,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b srcPtrChannel = srcPtrImage + (c * imageDimMax); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1436,7 +1446,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u elementsInRowMax = channel * batch_srcSizeMax[batchCount].width; - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1472,7 +1483,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b *mean = *mean / (channel * imageDim); - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1547,8 +1559,9 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus integral_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel, Rpp32u numThreads) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) diff --git a/src/modules/cpu/kernel/blend.hpp b/src/modules/cpu/kernel/blend.hpp index a2111bc11..8b6e8e8c4 100644 --- a/src/modules/cpu/kernel/blend.hpp +++ b/src/modules/cpu/kernel/blend.hpp @@ -33,9 +33,10 @@ RppStatus blend_u8_u8_host_tensor(Rpp8u *srcPtr1, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -278,9 +279,10 @@ RppStatus blend_f32_f32_host_tensor(Rpp32f *srcPtr1, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -502,9 +504,10 @@ RppStatus blend_f16_f16_host_tensor(Rpp16f *srcPtr1, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -772,9 +775,10 @@ RppStatus blend_i8_i8_host_tensor(Rpp8s *srcPtr1, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/brightness.hpp b/src/modules/cpu/kernel/brightness.hpp index cc0bf06e0..fe66d197c 100644 --- a/src/modules/cpu/kernel/brightness.hpp +++ b/src/modules/cpu/kernel/brightness.hpp @@ -33,9 +33,10 @@ RppStatus brightness_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -246,9 +247,10 @@ RppStatus brightness_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -465,9 +467,10 @@ RppStatus brightness_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -720,9 +723,10 @@ RppStatus brightness_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/color_cast.hpp b/src/modules/cpu/kernel/color_cast.hpp index 72c2b75dc..437084950 100644 --- a/src/modules/cpu/kernel/color_cast.hpp +++ b/src/modules/cpu/kernel/color_cast.hpp @@ -33,9 +33,10 @@ RppStatus color_cast_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -287,9 +288,10 @@ RppStatus color_cast_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -541,9 +543,10 @@ RppStatus color_cast_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -851,9 +854,10 @@ RppStatus color_cast_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/color_jitter.hpp b/src/modules/cpu/kernel/color_jitter.hpp index 928a031c4..e864222d8 100644 --- a/src/modules/cpu/kernel/color_jitter.hpp +++ b/src/modules/cpu/kernel/color_jitter.hpp @@ -35,9 +35,10 @@ RppStatus color_jitter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -294,9 +295,10 @@ RppStatus color_jitter_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -553,9 +555,10 @@ RppStatus color_jitter_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -868,9 +871,10 @@ RppStatus color_jitter_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/color_to_greyscale.hpp b/src/modules/cpu/kernel/color_to_greyscale.hpp index 9b0e6c21d..7f058830d 100644 --- a/src/modules/cpu/kernel/color_to_greyscale.hpp +++ b/src/modules/cpu/kernel/color_to_greyscale.hpp @@ -30,8 +30,9 @@ RppStatus color_to_greyscale_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -162,8 +163,9 @@ RppStatus color_to_greyscale_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -298,8 +300,9 @@ RppStatus color_to_greyscale_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -449,8 +452,9 @@ RppStatus color_to_greyscale_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) diff --git a/src/modules/cpu/kernel/color_twist.hpp b/src/modules/cpu/kernel/color_twist.hpp index 423cd2c9d..b1f3d406e 100644 --- a/src/modules/cpu/kernel/color_twist.hpp +++ b/src/modules/cpu/kernel/color_twist.hpp @@ -35,9 +35,10 @@ RppStatus color_twist_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -359,9 +360,10 @@ RppStatus color_twist_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -663,9 +665,10 @@ RppStatus color_twist_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1007,9 +1010,10 @@ RppStatus color_twist_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/contrast.hpp b/src/modules/cpu/kernel/contrast.hpp index 711065c65..d3de3b0b4 100644 --- a/src/modules/cpu/kernel/contrast.hpp +++ b/src/modules/cpu/kernel/contrast.hpp @@ -33,9 +33,10 @@ RppStatus contrast_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -219,9 +220,10 @@ RppStatus contrast_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -406,9 +408,10 @@ RppStatus contrast_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -625,9 +628,10 @@ RppStatus contrast_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/copy.hpp b/src/modules/cpu/kernel/copy.hpp index 678285c8d..499dd3fb4 100644 --- a/src/modules/cpu/kernel/copy.hpp +++ b/src/modules/cpu/kernel/copy.hpp @@ -29,12 +29,13 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -48,7 +49,8 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -103,7 +105,8 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -163,12 +166,13 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -182,7 +186,8 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -238,7 +243,8 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -299,12 +305,13 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -318,7 +325,8 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -390,7 +398,8 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -467,12 +476,13 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -486,7 +496,8 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -541,7 +552,8 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - omp_set_dynamic(0); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { diff --git a/src/modules/cpu/kernel/crop.hpp b/src/modules/cpu/kernel/crop.hpp index 8d95b7de7..d88f85ad9 100644 --- a/src/modules/cpu/kernel/crop.hpp +++ b/src/modules/cpu/kernel/crop.hpp @@ -31,9 +31,10 @@ RppStatus crop_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -181,9 +182,10 @@ RppStatus crop_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -333,9 +335,10 @@ RppStatus crop_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -517,9 +520,10 @@ RppStatus crop_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/crop_mirror_normalize.hpp b/src/modules/cpu/kernel/crop_mirror_normalize.hpp index fe72bd046..f84c8ce72 100644 --- a/src/modules/cpu/kernel/crop_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/crop_mirror_normalize.hpp @@ -34,9 +34,10 @@ RppStatus crop_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -453,9 +454,10 @@ RppStatus crop_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -872,9 +874,10 @@ RppStatus crop_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1338,9 +1341,10 @@ RppStatus crop_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1757,9 +1761,10 @@ RppStatus crop_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -2204,9 +2209,10 @@ RppStatus crop_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/exposure.hpp b/src/modules/cpu/kernel/exposure.hpp index b2d7e8554..80b0fa53a 100644 --- a/src/modules/cpu/kernel/exposure.hpp +++ b/src/modules/cpu/kernel/exposure.hpp @@ -32,9 +32,10 @@ RppStatus exposure_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -217,9 +218,10 @@ RppStatus exposure_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -402,9 +404,10 @@ RppStatus exposure_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -621,9 +624,10 @@ RppStatus exposure_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/flip.hpp b/src/modules/cpu/kernel/flip.hpp index aee3e7aee..b8ed63835 100644 --- a/src/modules/cpu/kernel/flip.hpp +++ b/src/modules/cpu/kernel/flip.hpp @@ -33,9 +33,10 @@ RppStatus flip_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -310,9 +311,10 @@ RppStatus flip_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -588,9 +590,10 @@ RppStatus flip_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -906,9 +909,10 @@ RppStatus flip_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/gamma_correction.hpp b/src/modules/cpu/kernel/gamma_correction.hpp index 7a112405a..e32a1207a 100644 --- a/src/modules/cpu/kernel/gamma_correction.hpp +++ b/src/modules/cpu/kernel/gamma_correction.hpp @@ -32,9 +32,10 @@ RppStatus gamma_correction_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -181,9 +182,10 @@ RppStatus gamma_correction_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -330,9 +332,10 @@ RppStatus gamma_correction_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -479,9 +482,10 @@ RppStatus gamma_correction_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/gridmask.hpp b/src/modules/cpu/kernel/gridmask.hpp index 4e925ef67..54030317d 100644 --- a/src/modules/cpu/kernel/gridmask.hpp +++ b/src/modules/cpu/kernel/gridmask.hpp @@ -35,9 +35,10 @@ RppStatus gridmask_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -440,9 +441,10 @@ RppStatus gridmask_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -827,9 +829,10 @@ RppStatus gridmask_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1267,9 +1270,10 @@ RppStatus gridmask_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/noise_gaussian.hpp b/src/modules/cpu/kernel/noise_gaussian.hpp index 9bb9147bb..125839e3c 100644 --- a/src/modules/cpu/kernel/noise_gaussian.hpp +++ b/src/modules/cpu/kernel/noise_gaussian.hpp @@ -46,9 +46,10 @@ RppStatus gaussian_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -378,9 +379,10 @@ RppStatus gaussian_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -699,9 +701,10 @@ RppStatus gaussian_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1065,9 +1068,10 @@ RppStatus gaussian_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp index 7cdb4775c..c949927fd 100644 --- a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp +++ b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp @@ -52,9 +52,10 @@ RppStatus salt_and_pepper_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -424,9 +425,10 @@ RppStatus salt_and_pepper_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -803,9 +805,10 @@ RppStatus salt_and_pepper_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1227,9 +1230,10 @@ RppStatus salt_and_pepper_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/noise_shot.hpp b/src/modules/cpu/kernel/noise_shot.hpp index b630759f8..b65511c7f 100644 --- a/src/modules/cpu/kernel/noise_shot.hpp +++ b/src/modules/cpu/kernel/noise_shot.hpp @@ -45,9 +45,10 @@ RppStatus shot_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -425,9 +426,10 @@ RppStatus shot_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -814,9 +816,10 @@ RppStatus shot_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1248,9 +1251,10 @@ RppStatus shot_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/non_linear_blend.hpp b/src/modules/cpu/kernel/non_linear_blend.hpp index 7bf86e948..db2180afd 100644 --- a/src/modules/cpu/kernel/non_linear_blend.hpp +++ b/src/modules/cpu/kernel/non_linear_blend.hpp @@ -80,12 +80,14 @@ RppStatus non_linear_blend_u8_u8_host_tensor(Rpp8u *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -446,12 +448,14 @@ RppStatus non_linear_blend_f32_f32_host_tensor(Rpp32f *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -812,12 +816,14 @@ RppStatus non_linear_blend_i8_i8_host_tensor(Rpp8s *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1178,12 +1184,14 @@ RppStatus non_linear_blend_f16_f16_host_tensor(Rpp16f *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize.hpp b/src/modules/cpu/kernel/resize.hpp index 9b66d63cb..4949b84d3 100644 --- a/src/modules/cpu/kernel/resize.hpp +++ b/src/modules/cpu/kernel/resize.hpp @@ -35,11 +35,12 @@ RppStatus resize_nn_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -255,11 +256,12 @@ RppStatus resize_nn_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -467,11 +469,12 @@ RppStatus resize_nn_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -687,11 +690,12 @@ RppStatus resize_nn_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -796,11 +800,12 @@ RppStatus resize_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1036,11 +1041,12 @@ RppStatus resize_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1278,11 +1284,12 @@ RppStatus resize_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1521,11 +1528,12 @@ RppStatus resize_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1766,11 +1774,12 @@ RppStatus resize_separable_host_tensor(T *srcPtr, RpptRoiType roiType, RppLayoutParams srcLayoutParams, RpptInterpolationType interpolationType, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { diff --git a/src/modules/cpu/kernel/resize_crop_mirror.hpp b/src/modules/cpu/kernel/resize_crop_mirror.hpp index 07a357533..0b075f56d 100644 --- a/src/modules/cpu/kernel/resize_crop_mirror.hpp +++ b/src/modules/cpu/kernel/resize_crop_mirror.hpp @@ -33,11 +33,12 @@ RppStatus resize_crop_mirror_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -283,11 +284,12 @@ RppStatus resize_crop_mirror_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -538,11 +540,12 @@ RppStatus resize_crop_mirror_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -793,11 +796,12 @@ RppStatus resize_crop_mirror_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { diff --git a/src/modules/cpu/kernel/resize_mirror_normalize.hpp b/src/modules/cpu/kernel/resize_mirror_normalize.hpp index 613435013..2894d5c74 100644 --- a/src/modules/cpu/kernel/resize_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/resize_mirror_normalize.hpp @@ -35,11 +35,12 @@ RppStatus resize_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -316,11 +317,12 @@ RppStatus resize_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -601,11 +603,12 @@ RppStatus resize_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -886,11 +889,12 @@ RppStatus resize_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1172,10 +1176,11 @@ RppStatus resize_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -1454,10 +1459,11 @@ RppStatus resize_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); +Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { diff --git a/src/modules/cpu/kernel/spatter.hpp b/src/modules/cpu/kernel/spatter.hpp index 621d0a18d..d3b3305d4 100644 --- a/src/modules/cpu/kernel/spatter.hpp +++ b/src/modules/cpu/kernel/spatter.hpp @@ -34,9 +34,10 @@ RppStatus spatter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -387,9 +388,10 @@ RppStatus spatter_f32_f32_host_tensor(Rpp32f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -748,9 +750,10 @@ RppStatus spatter_f16_f16_host_tensor(Rpp16f *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) @@ -1140,9 +1143,10 @@ RppStatus spatter_i8_i8_host_tensor(Rpp8s *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) diff --git a/src/modules/cpu/kernel/swap_channels.hpp b/src/modules/cpu/kernel/swap_channels.hpp index 4c62361d7..a590cbbb2 100644 --- a/src/modules/cpu/kernel/swap_channels.hpp +++ b/src/modules/cpu/kernel/swap_channels.hpp @@ -29,8 +29,9 @@ RppStatus swap_channels_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -226,8 +227,9 @@ RppStatus swap_channels_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -423,8 +425,9 @@ RppStatus swap_channels_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -660,8 +663,9 @@ RppStatus swap_channels_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, RppLayoutParams layoutParams, - Rpp32u numThreads) + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) diff --git a/src/modules/cpu/kernel/warp_affine.hpp b/src/modules/cpu/kernel/warp_affine.hpp index 45a9e840c..4cf5649dd 100644 --- a/src/modules/cpu/kernel/warp_affine.hpp +++ b/src/modules/cpu/kernel/warp_affine.hpp @@ -61,12 +61,14 @@ RppStatus warp_affine_nn_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -285,12 +287,14 @@ RppStatus warp_affine_nn_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -509,12 +513,14 @@ RppStatus warp_affine_nn_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -733,12 +739,14 @@ RppStatus warp_affine_nn_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -874,12 +882,14 @@ RppStatus warp_affine_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1138,12 +1148,14 @@ RppStatus warp_affine_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1402,12 +1414,14 @@ RppStatus warp_affine_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1671,12 +1685,14 @@ RppStatus warp_affine_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; diff --git a/src/modules/rppi_advanced_augmentations.cpp b/src/modules/rppi_advanced_augmentations.cpp index 48cc10d81..e2ed1b0f4 100644 --- a/src/modules/rppi_advanced_augmentations.cpp +++ b/src/modules/rppi_advanced_augmentations.cpp @@ -72,7 +72,7 @@ RppStatus water_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -93,7 +93,7 @@ RppStatus water_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -114,7 +114,7 @@ RppStatus water_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -135,7 +135,7 @@ RppStatus water_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -235,7 +235,7 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -252,7 +252,7 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -269,7 +269,7 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -286,7 +286,7 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -390,7 +390,7 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -409,7 +409,7 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -428,7 +428,7 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -447,7 +447,7 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -531,7 +531,7 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -550,7 +550,7 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -569,7 +569,7 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -588,7 +588,7 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -704,7 +704,7 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -730,7 +730,7 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -756,7 +756,7 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -782,7 +782,7 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -880,7 +880,7 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -896,7 +896,7 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -974,7 +974,7 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -995,7 +995,7 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -1016,7 +1016,7 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -1037,7 +1037,7 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } diff --git a/src/modules/rppi_arithmetic_operations.cpp b/src/modules/rppi_arithmetic_operations.cpp index eab212563..b0c1c10d4 100644 --- a/src/modules/rppi_arithmetic_operations.cpp +++ b/src/modules/rppi_arithmetic_operations.cpp @@ -61,7 +61,7 @@ rppi_add_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_add_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -123,7 +123,7 @@ rppi_add_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -156,7 +156,7 @@ rppi_subtract_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -187,7 +187,7 @@ rppi_subtract_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -218,7 +218,7 @@ rppi_subtract_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -251,7 +251,7 @@ rppi_multiply_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -282,7 +282,7 @@ rppi_multiply_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -313,7 +313,7 @@ rppi_multiply_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -346,7 +346,7 @@ rppi_absolute_difference_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -377,7 +377,7 @@ rppi_absolute_difference_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -408,7 +408,7 @@ rppi_absolute_difference_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -441,7 +441,7 @@ rppi_phase_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -472,7 +472,7 @@ rppi_phase_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -503,7 +503,7 @@ rppi_phase_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -536,7 +536,7 @@ rppi_magnitude_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -567,7 +567,7 @@ rppi_magnitude_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -598,7 +598,7 @@ rppi_magnitude_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -629,7 +629,7 @@ rppi_accumulate_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -658,7 +658,7 @@ rppi_accumulate_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -687,7 +687,7 @@ rppi_accumulate_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -720,7 +720,7 @@ rppi_accumulate_weighted_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -751,7 +751,7 @@ rppi_accumulate_weighted_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -782,7 +782,7 @@ rppi_accumulate_weighted_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -811,7 +811,7 @@ rppi_accumulate_squared_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -838,7 +838,7 @@ rppi_accumulate_squared_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -865,7 +865,7 @@ rppi_accumulate_squared_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_color_model_conversions.cpp b/src/modules/rppi_color_model_conversions.cpp index fc47c272c..d847b7f68 100644 --- a/src/modules/rppi_color_model_conversions.cpp +++ b/src/modules/rppi_color_model_conversions.cpp @@ -61,7 +61,7 @@ rppi_hueRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_hueRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -125,7 +125,7 @@ rppi_saturationRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -156,7 +156,7 @@ rppi_saturationRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -184,7 +184,7 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -196,7 +196,7 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -223,7 +223,7 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -235,7 +235,7 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -269,7 +269,7 @@ rppi_color_temperature_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -300,7 +300,7 @@ rppi_color_temperature_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -331,7 +331,7 @@ rppi_color_temperature_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -364,7 +364,7 @@ rppi_vignette_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -395,7 +395,7 @@ rppi_vignette_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -426,7 +426,7 @@ rppi_vignette_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -452,7 +452,7 @@ rppi_channel_extract_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -476,7 +476,7 @@ rppi_channel_extract_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -500,7 +500,7 @@ rppi_channel_extract_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -528,7 +528,7 @@ rppi_channel_combine_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -554,7 +554,7 @@ rppi_channel_combine_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -580,7 +580,7 @@ rppi_channel_combine_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -613,7 +613,7 @@ rppi_look_up_table_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -644,7 +644,7 @@ rppi_look_up_table_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -675,7 +675,7 @@ rppi_look_up_table_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_computer_vision.cpp b/src/modules/rppi_computer_vision.cpp index 52147dfc4..1bd097ed9 100644 --- a/src/modules/rppi_computer_vision.cpp +++ b/src/modules/rppi_computer_vision.cpp @@ -59,7 +59,7 @@ rppi_local_binary_pattern_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -88,7 +88,7 @@ rppi_local_binary_pattern_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -117,7 +117,7 @@ rppi_local_binary_pattern_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -141,7 +141,7 @@ rppi_data_object_copy_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -163,7 +163,7 @@ rppi_data_object_copy_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -185,7 +185,7 @@ rppi_data_object_copy_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -213,7 +213,7 @@ rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -239,7 +239,7 @@ rppi_gaussian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -265,7 +265,7 @@ rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -293,7 +293,7 @@ rppi_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -319,7 +319,7 @@ rppi_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -345,7 +345,7 @@ rppi_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -373,7 +373,7 @@ rppi_canny_edge_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -399,7 +399,7 @@ rppi_canny_edge_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -425,7 +425,7 @@ rppi_canny_edge_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -461,7 +461,7 @@ rppi_harris_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -495,7 +495,7 @@ rppi_harris_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -529,7 +529,7 @@ rppi_harris_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -606,7 +606,7 @@ rppi_fast_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -634,7 +634,7 @@ rppi_fast_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -662,7 +662,7 @@ rppi_fast_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -697,7 +697,7 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -730,7 +730,7 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -763,7 +763,7 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -857,7 +857,7 @@ rppi_hough_lines_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -891,7 +891,7 @@ rppi_hog_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -919,7 +919,7 @@ rppi_remap_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -945,7 +945,7 @@ rppi_remap_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -971,7 +971,7 @@ rppi_remap_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1077,7 +1077,7 @@ rppi_convert_bit_depth_u8s8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1100,7 +1100,7 @@ rppi_convert_bit_depth_u8u16_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1123,7 +1123,7 @@ rppi_convert_bit_depth_u8s16_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1146,7 +1146,7 @@ rppi_convert_bit_depth_u8s8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1169,7 +1169,7 @@ rppi_convert_bit_depth_u8u16_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1192,7 +1192,7 @@ rppi_convert_bit_depth_u8s16_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1215,7 +1215,7 @@ rppi_convert_bit_depth_u8s8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1238,7 +1238,7 @@ rppi_convert_bit_depth_u8u16_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1261,7 +1261,7 @@ rppi_convert_bit_depth_u8s16_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_filter_operations.cpp b/src/modules/rppi_filter_operations.cpp index d6a37d0f0..7636be81b 100644 --- a/src/modules/rppi_filter_operations.cpp +++ b/src/modules/rppi_filter_operations.cpp @@ -61,7 +61,7 @@ rppi_box_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_box_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -123,7 +123,7 @@ rppi_box_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -156,7 +156,7 @@ rppi_sobel_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -187,7 +187,7 @@ rppi_sobel_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -218,7 +218,7 @@ rppi_sobel_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -251,7 +251,7 @@ rppi_median_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -282,7 +282,7 @@ rppi_median_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -313,7 +313,7 @@ rppi_median_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -346,7 +346,7 @@ rppi_non_max_suppression_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -377,7 +377,7 @@ rppi_non_max_suppression_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -408,7 +408,7 @@ rppi_non_max_suppression_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -443,7 +443,7 @@ rppi_gaussian_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -476,7 +476,7 @@ rppi_gaussian_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -509,7 +509,7 @@ rppi_gaussian_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -542,7 +542,7 @@ rppi_nonlinear_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -573,7 +573,7 @@ rppi_nonlinear_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -604,7 +604,7 @@ rppi_nonlinear_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -639,7 +639,7 @@ rppi_custom_convolution_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -672,7 +672,7 @@ rppi_custom_convolution_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -705,7 +705,7 @@ rppi_custom_convolution_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_fused_functions.cpp b/src/modules/rppi_fused_functions.cpp index be115985b..7b11282cd 100644 --- a/src/modules/rppi_fused_functions.cpp +++ b/src/modules/rppi_fused_functions.cpp @@ -73,7 +73,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -90,7 +90,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -107,7 +107,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -124,7 +124,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -208,7 +208,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -224,7 +224,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -240,7 +240,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -256,7 +256,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -273,7 +273,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -289,7 +289,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -305,7 +305,7 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -460,7 +460,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -479,7 +479,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -498,7 +498,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -517,7 +517,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -537,7 +537,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -556,7 +556,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -575,7 +575,7 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -727,7 +727,7 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -746,7 +746,7 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -765,7 +765,7 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -784,7 +784,7 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -887,7 +887,7 @@ RppStatus resize_mirror_normalize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppi_geometry_transforms.cpp b/src/modules/rppi_geometry_transforms.cpp index 5dd641a86..cd7b02ca8 100644 --- a/src/modules/rppi_geometry_transforms.cpp +++ b/src/modules/rppi_geometry_transforms.cpp @@ -61,7 +61,7 @@ rppi_flip_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_flip_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -123,7 +123,7 @@ rppi_flip_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -168,7 +168,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -183,7 +183,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -198,7 +198,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -213,7 +213,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -229,7 +229,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -244,7 +244,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -259,7 +259,7 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -409,7 +409,7 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -427,7 +427,7 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -445,7 +445,7 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -463,7 +463,7 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -569,7 +569,7 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -585,7 +585,7 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -601,7 +601,7 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -617,7 +617,7 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -726,7 +726,7 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -745,7 +745,7 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -764,7 +764,7 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -783,7 +783,7 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -877,7 +877,7 @@ rppi_fisheye_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -906,7 +906,7 @@ rppi_fisheye_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -935,7 +935,7 @@ rppi_fisheye_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -970,7 +970,7 @@ rppi_lens_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1003,7 +1003,7 @@ rppi_lens_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1036,7 +1036,7 @@ rppi_lens_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1074,7 +1074,7 @@ rppi_scale_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1110,7 +1110,7 @@ rppi_scale_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1146,7 +1146,7 @@ rppi_scale_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1184,7 +1184,7 @@ rppi_warp_perspective_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1220,7 +1220,7 @@ rppi_warp_perspective_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1256,7 +1256,7 @@ rppi_warp_perspective_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_image_augmentations.cpp b/src/modules/rppi_image_augmentations.cpp index 4ed3a65c5..8b4d82a7c 100644 --- a/src/modules/rppi_image_augmentations.cpp +++ b/src/modules/rppi_image_augmentations.cpp @@ -63,7 +63,7 @@ rppi_brightness_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -96,7 +96,7 @@ rppi_brightness_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -129,7 +129,7 @@ rppi_brightness_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -162,7 +162,7 @@ rppi_gamma_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -193,7 +193,7 @@ rppi_gamma_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -224,7 +224,7 @@ rppi_gamma_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -259,7 +259,7 @@ rppi_blend_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -292,7 +292,7 @@ rppi_blend_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -325,7 +325,7 @@ rppi_blend_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -358,7 +358,7 @@ rppi_blur_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -389,7 +389,7 @@ rppi_blur_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -420,7 +420,7 @@ rppi_blur_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -455,7 +455,7 @@ rppi_contrast_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -488,7 +488,7 @@ rppi_contrast_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -521,7 +521,7 @@ rppi_contrast_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -552,7 +552,7 @@ rppi_pixelate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -581,7 +581,7 @@ rppi_pixelate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -610,7 +610,7 @@ rppi_pixelate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -643,7 +643,7 @@ rppi_jitter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -674,7 +674,7 @@ rppi_jitter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -705,7 +705,7 @@ rppi_jitter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -738,7 +738,7 @@ rppi_snow_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -769,7 +769,7 @@ rppi_snow_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -800,7 +800,7 @@ rppi_snow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -833,7 +833,7 @@ rppi_noise_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -864,7 +864,7 @@ rppi_noise_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -895,7 +895,7 @@ rppi_noise_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -933,7 +933,7 @@ rppi_random_shadow_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -969,7 +969,7 @@ rppi_random_shadow_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1005,7 +1005,7 @@ rppi_random_shadow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1031,7 +1031,7 @@ rppi_fog_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1055,7 +1055,7 @@ rppi_fog_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1079,7 +1079,7 @@ rppi_fog_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1111,7 +1111,7 @@ rppi_rain_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1141,7 +1141,7 @@ rppi_rain_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1171,7 +1171,7 @@ rppi_rain_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1215,7 +1215,7 @@ rppi_random_crop_letterbox_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1257,7 +1257,7 @@ rppi_random_crop_letterbox_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1299,7 +1299,7 @@ rppi_random_crop_letterbox_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1332,7 +1332,7 @@ rppi_exposure_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1363,7 +1363,7 @@ rppi_exposure_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1394,7 +1394,7 @@ rppi_exposure_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1418,7 +1418,7 @@ rppi_histogram_balance_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1440,7 +1440,7 @@ rppi_histogram_balance_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1462,7 +1462,7 @@ rppi_histogram_balance_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_logical_operations.cpp b/src/modules/rppi_logical_operations.cpp index d5f0e1204..b64bf4009 100644 --- a/src/modules/rppi_logical_operations.cpp +++ b/src/modules/rppi_logical_operations.cpp @@ -61,7 +61,7 @@ rppi_bitwise_AND_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_bitwise_AND_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -123,7 +123,7 @@ rppi_bitwise_AND_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -154,7 +154,7 @@ rppi_bitwise_NOT_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -183,7 +183,7 @@ rppi_bitwise_NOT_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -212,7 +212,7 @@ rppi_bitwise_NOT_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -245,7 +245,7 @@ rppi_exclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -276,7 +276,7 @@ rppi_exclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -307,7 +307,7 @@ rppi_exclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -340,7 +340,7 @@ rppi_inclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -371,7 +371,7 @@ rppi_inclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -402,7 +402,7 @@ rppi_inclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_morphological_operations.cpp b/src/modules/rppi_morphological_operations.cpp index 7ab7cad1b..7c46303bb 100644 --- a/src/modules/rppi_morphological_operations.cpp +++ b/src/modules/rppi_morphological_operations.cpp @@ -61,7 +61,7 @@ rppi_erode_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -92,7 +92,7 @@ rppi_erode_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -123,7 +123,7 @@ rppi_erode_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -156,7 +156,7 @@ rppi_dilate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -187,7 +187,7 @@ rppi_dilate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -218,7 +218,7 @@ rppi_dilate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_statistical_operations.cpp b/src/modules/rppi_statistical_operations.cpp index de4be2328..70d297e97 100644 --- a/src/modules/rppi_statistical_operations.cpp +++ b/src/modules/rppi_statistical_operations.cpp @@ -63,7 +63,7 @@ rppi_thresholding_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -96,7 +96,7 @@ rppi_thresholding_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -129,7 +129,7 @@ rppi_thresholding_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -162,7 +162,7 @@ rppi_min_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -193,7 +193,7 @@ rppi_min_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -224,7 +224,7 @@ rppi_min_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -257,7 +257,7 @@ rppi_max_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -288,7 +288,7 @@ rppi_max_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -319,7 +319,7 @@ rppi_max_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -343,7 +343,7 @@ rppi_min_max_loc_u8_pln1_host(RppPtr_t srcPtr, maxLoc, RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -365,7 +365,7 @@ rppi_min_max_loc_u8_pln3_host(RppPtr_t srcPtr, maxLoc, RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -387,7 +387,7 @@ rppi_min_max_loc_u8_pkd3_host(RppPtr_t srcPtr, maxLoc, RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -411,7 +411,7 @@ rppi_integral_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -433,7 +433,7 @@ rppi_integral_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -455,7 +455,7 @@ rppi_integral_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -479,7 +479,7 @@ rppi_histogram_equalization_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 1, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -501,7 +501,7 @@ rppi_histogram_equalization_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -523,7 +523,7 @@ rppi_histogram_equalization_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, 3, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppt_tensor_color_augmentations.cpp b/src/modules/rppt_tensor_color_augmentations.cpp index 767e39dc1..ca188b301 100644 --- a/src/modules/rppt_tensor_color_augmentations.cpp +++ b/src/modules/rppt_tensor_color_augmentations.cpp @@ -55,7 +55,7 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -68,7 +68,7 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -81,7 +81,7 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -94,7 +94,7 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -123,7 +123,7 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -135,7 +135,7 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -147,7 +147,7 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -159,7 +159,7 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -190,7 +190,7 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -203,7 +203,7 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -216,7 +216,7 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -229,7 +229,7 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -264,7 +264,7 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -279,7 +279,7 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -294,7 +294,7 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -309,7 +309,7 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -344,7 +344,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -359,7 +359,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -374,7 +374,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -389,7 +389,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -425,7 +425,7 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -438,7 +438,7 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -451,7 +451,7 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -464,7 +464,7 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -493,7 +493,7 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -505,7 +505,7 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -517,7 +517,7 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -529,7 +529,7 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -560,7 +560,7 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -573,7 +573,7 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -586,7 +586,7 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -599,7 +599,7 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_data_exchange_operations.cpp b/src/modules/rppt_tensor_data_exchange_operations.cpp index 274305289..fc2a44e23 100644 --- a/src/modules/rppt_tensor_data_exchange_operations.cpp +++ b/src/modules/rppt_tensor_data_exchange_operations.cpp @@ -47,7 +47,7 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -56,7 +56,7 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -65,7 +65,7 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -74,7 +74,7 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -97,7 +97,7 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -106,7 +106,7 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -115,7 +115,7 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -124,7 +124,7 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -170,7 +170,7 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, dstDescPtr, channelWeights, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -180,7 +180,7 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, dstDescPtr, channelWeights, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -190,7 +190,7 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, dstDescPtr, channelWeights, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -200,7 +200,7 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, dstDescPtr, channelWeights, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp index 31ca1efe1..bb5a22cc4 100644 --- a/src/modules/rppt_tensor_effects_augmentations.cpp +++ b/src/modules/rppt_tensor_effects_augmentations.cpp @@ -60,7 +60,7 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -75,7 +75,7 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -90,7 +90,7 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -105,7 +105,7 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -146,7 +146,7 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -158,7 +158,7 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -170,7 +170,7 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -182,7 +182,7 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -225,7 +225,7 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -241,7 +241,7 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -257,7 +257,7 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -273,7 +273,7 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -310,7 +310,7 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -323,7 +323,7 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -336,7 +336,7 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -349,7 +349,7 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -384,7 +384,7 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -398,7 +398,7 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -412,7 +412,7 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -426,7 +426,7 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -459,7 +459,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -471,7 +472,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -483,7 +485,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -495,7 +498,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index afad9170f..80c365895 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -55,7 +55,7 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -66,7 +66,7 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -77,7 +77,7 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -88,7 +88,7 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -121,7 +121,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -135,7 +135,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -149,7 +149,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -163,7 +163,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -177,7 +177,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -191,7 +191,7 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -225,7 +225,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -236,7 +237,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -247,7 +249,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -258,7 +261,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -272,7 +276,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -283,7 +288,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -294,7 +300,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -305,7 +312,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } @@ -337,7 +345,7 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -350,7 +358,7 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -363,7 +371,7 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -376,7 +384,7 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -408,7 +416,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -420,7 +428,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -432,7 +440,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -444,7 +452,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -459,7 +467,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -471,7 +479,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -483,7 +491,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -495,7 +503,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } else @@ -523,7 +531,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiType, srcLayoutParams, interpolationType, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -538,7 +546,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiType, srcLayoutParams, interpolationType, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -553,7 +561,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiType, srcLayoutParams, interpolationType, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -568,7 +576,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiType, srcLayoutParams, interpolationType, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } } @@ -608,7 +616,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -623,7 +631,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -638,7 +646,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -653,7 +661,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -668,7 +676,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -683,7 +691,7 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -716,7 +724,7 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -729,7 +737,7 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -742,7 +750,7 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -755,7 +763,7 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - rpp::deref(rppHandle).GetNumThreads()); + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -799,7 +807,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -810,7 +819,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -821,7 +831,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -832,7 +843,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -846,7 +858,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -857,7 +870,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -868,7 +882,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -879,7 +894,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } From c17d79d6ac1375928b73dc63cd310fec8889ef34 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 31 Mar 2023 00:29:46 -0700 Subject: [PATCH 06/15] Added numThreads parameter to handle creation in new test suite --- utilities/rpp-unittests/HOST_NEW/testAllScript.sh | 8 ++++---- utilities/test_suite/HOST/Tensor_host.cpp | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/utilities/rpp-unittests/HOST_NEW/testAllScript.sh b/utilities/rpp-unittests/HOST_NEW/testAllScript.sh index 6bb5ffd45..a11a9c5c9 100755 --- a/utilities/rpp-unittests/HOST_NEW/testAllScript.sh +++ b/utilities/rpp-unittests/HOST_NEW/testAllScript.sh @@ -170,7 +170,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<1;bitDepth++)) + for ((bitDepth=0;bitDepth<7;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<2;outputFormatToggle++)) @@ -246,7 +246,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<1;bitDepth++)) + for ((bitDepth=0;bitDepth<7;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<1;outputFormatToggle++)) @@ -322,7 +322,7 @@ do echo "--------------------------------" printf "Running a New Functionality...\n" echo "--------------------------------" - for ((bitDepth=0;bitDepth<1;bitDepth++)) + for ((bitDepth=0;bitDepth<7;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" for ((outputFormatToggle=0;outputFormatToggle<2;outputFormatToggle++)) @@ -397,7 +397,7 @@ then echo "--------------------------------" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" printf "Running a New Functionality...\n" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" echo "--------------------------------" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" - for ((bitDepth=0;bitDepth<1;bitDepth++)) + for ((bitDepth=0;bitDepth<7;bitDepth++)) do printf "\n\n\nRunning New Bit Depth...\n-------------------------\n\n" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" echo "./uniqueFunctionalities_host $bitDepth $case" | tee -a "$DST_FOLDER/uniqueFunctionalities_host_log.txt" diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index 253425f3e..06453d67a 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -380,7 +380,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double maxWallTime = 0, minWallTime = 500, avgWallTime = 0; double cpuTime, wallTime; From db6d14f400f99be14e4050bf5c75025c35a1031c Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 31 Mar 2023 01:10:08 -0700 Subject: [PATCH 07/15] code cleanup --- .../cpu/host_color_model_conversions.hpp | 10 ++--- .../cpu/host_statistical_operations.hpp | 2 +- src/modules/cpu/kernel/copy.hpp | 42 +++++++++---------- src/modules/cpu/kernel/resize.hpp | 18 ++++---- src/modules/cpu/kernel/resize_crop_mirror.hpp | 8 ++-- .../cpu/kernel/resize_mirror_normalize.hpp | 14 ++++--- src/modules/rppi_statistical_operations.cpp | 9 ++-- 7 files changed, 48 insertions(+), 55 deletions(-) diff --git a/src/modules/cpu/host_color_model_conversions.hpp b/src/modules/cpu/host_color_model_conversions.hpp index eada8570a..1135b6be4 100644 --- a/src/modules/cpu/host_color_model_conversions.hpp +++ b/src/modules/cpu/host_color_model_conversions.hpp @@ -610,10 +610,10 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS RppiROI *roiPoints, Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (channel == 1) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -708,8 +708,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS { if(chnFormat == RPPI_CHN_PLANAR) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -866,8 +865,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS } else if (chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_statistical_operations.hpp b/src/modules/cpu/host_statistical_operations.hpp index ac3d3e889..c3dc0a851 100644 --- a/src/modules/cpu/host_statistical_operations.hpp +++ b/src/modules/cpu/host_statistical_operations.hpp @@ -1118,7 +1118,7 @@ RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus min_max_loc_host(T* srcPtr, RppiSize srcSize, Rpp8u* min, Rpp8u* max, Rpp32u* minLoc, Rpp32u* maxLoc, - RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) + RppiChnFormat chnFormat, Rpp32u channel) { *min = 255; *max = 0; diff --git a/src/modules/cpu/kernel/copy.hpp b/src/modules/cpu/kernel/copy.hpp index 499dd3fb4..e9f4655df 100644 --- a/src/modules/cpu/kernel/copy.hpp +++ b/src/modules/cpu/kernel/copy.hpp @@ -31,11 +31,12 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, RppLayoutParams layoutParams, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -49,8 +50,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -105,8 +105,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -168,11 +167,12 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, RppLayoutParams layoutParams, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -186,8 +186,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -243,8 +242,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -307,11 +305,12 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, RppLayoutParams layoutParams, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -325,8 +324,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -398,8 +396,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -478,10 +475,11 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, RppLayoutParams layoutParams, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -496,8 +494,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, // Copy with fused output-layout toggle (NHWC -> NCHW) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { @@ -552,8 +549,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, // Copy with fused output-layout toggle (NCHW -> NHWC) else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { diff --git a/src/modules/cpu/kernel/resize.hpp b/src/modules/cpu/kernel/resize.hpp index 4949b84d3..582905c86 100644 --- a/src/modules/cpu/kernel/resize.hpp +++ b/src/modules/cpu/kernel/resize.hpp @@ -38,8 +38,8 @@ RppStatus resize_nn_u8_u8_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -259,8 +259,8 @@ RppStatus resize_nn_f32_f32_host_tensor(Rpp32f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -472,8 +472,8 @@ RppStatus resize_nn_i8_i8_host_tensor(Rpp8s *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -693,8 +693,8 @@ RppStatus resize_nn_f16_f16_host_tensor(Rpp16f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -803,8 +803,8 @@ RppStatus resize_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1044,8 +1044,8 @@ RppStatus resize_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1287,8 +1287,8 @@ RppStatus resize_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1531,8 +1531,8 @@ RppStatus resize_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1777,8 +1777,8 @@ RppStatus resize_separable_host_tensor(T *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) diff --git a/src/modules/cpu/kernel/resize_crop_mirror.hpp b/src/modules/cpu/kernel/resize_crop_mirror.hpp index 0b075f56d..6cf60a118 100644 --- a/src/modules/cpu/kernel/resize_crop_mirror.hpp +++ b/src/modules/cpu/kernel/resize_crop_mirror.hpp @@ -36,8 +36,8 @@ RppStatus resize_crop_mirror_u8_u8_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -287,8 +287,8 @@ RppStatus resize_crop_mirror_f32_f32_host_tensor(Rpp32f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -543,8 +543,8 @@ RppStatus resize_crop_mirror_f16_f16_host_tensor(Rpp16f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -799,8 +799,8 @@ RppStatus resize_crop_mirror_i8_i8_host_tensor(Rpp8s *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) diff --git a/src/modules/cpu/kernel/resize_mirror_normalize.hpp b/src/modules/cpu/kernel/resize_mirror_normalize.hpp index 2894d5c74..8008d1d1b 100644 --- a/src/modules/cpu/kernel/resize_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/resize_mirror_normalize.hpp @@ -38,8 +38,8 @@ RppStatus resize_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -320,8 +320,8 @@ RppStatus resize_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -606,8 +606,8 @@ RppStatus resize_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -892,8 +892,8 @@ RppStatus resize_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1179,7 +1179,8 @@ RppStatus resize_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -Rpp32u numThreads = handle.GetNumThreads(); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) @@ -1462,7 +1463,8 @@ RppStatus resize_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -Rpp32u numThreads = handle.GetNumThreads(); + Rpp32u numThreads = handle.GetNumThreads(); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) diff --git a/src/modules/rppi_statistical_operations.cpp b/src/modules/rppi_statistical_operations.cpp index 70d297e97..4706a6b97 100644 --- a/src/modules/rppi_statistical_operations.cpp +++ b/src/modules/rppi_statistical_operations.cpp @@ -342,8 +342,7 @@ rppi_min_max_loc_u8_pln1_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PLANAR, - 1, - rpp::deref(rppHandle)); + 1); return RPP_SUCCESS; } @@ -364,8 +363,7 @@ rppi_min_max_loc_u8_pln3_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PLANAR, - 3, - rpp::deref(rppHandle)); + 3); return RPP_SUCCESS; } @@ -386,8 +384,7 @@ rppi_min_max_loc_u8_pkd3_host(RppPtr_t srcPtr, minLoc, maxLoc, RPPI_CHN_PACKED, - 3, - rpp::deref(rppHandle)); + 3); return RPP_SUCCESS; } From fce0408f59cc0da6f1fcbf285240297eeb7af812 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 31 Mar 2023 01:21:13 -0700 Subject: [PATCH 08/15] added rpp handle parameter info in the tensor header files --- include/rppt_tensor_color_augmentations.h | 8 ++++++++ include/rppt_tensor_data_exchange_operations.h | 3 +++ include/rppt_tensor_effects_augmentations.h | 6 ++++++ include/rppt_tensor_filter_augmentations.h | 1 + include/rppt_tensor_geometric_augmentations.h | 8 ++++++++ include/rppt_tensor_morphological_operations.h | 2 ++ 6 files changed, 28 insertions(+) diff --git a/include/rppt_tensor_color_augmentations.h b/include/rppt_tensor_color_augmentations.h index c1f0a038a..b362d16b4 100644 --- a/include/rppt_tensor_color_augmentations.h +++ b/include/rppt_tensor_color_augmentations.h @@ -40,6 +40,7 @@ extern "C" { // *param[in] betaTensor beta values for brightness calculation (1D tensor of size batchSize with 0 <= beta <= 255 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -59,6 +60,7 @@ RppStatus rppt_brightness_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] gammaTensor gamma values for gamma correction calculation (1D tensor of size batchSize with gamma >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -79,6 +81,7 @@ RppStatus rppt_gamma_correction_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp // *param[in] alphaTensor alpha values for alpha-blending (1D tensor of size batchSize with the transparency factor transparency factor 0 <= alpha <= 1 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -101,6 +104,7 @@ RppStatus rppt_blend_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPtr srcDesc // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -123,6 +127,7 @@ RppStatus rppt_color_twist_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -141,6 +146,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[in] alphaTensor alpha values for color casting calculation (1D tensor of size sizeof(Rpp32f) * batchSize with alphaTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -161,6 +167,7 @@ RppStatus rppt_color_cast_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] exposureFactorTensor tensor containing an Rpp32f exposure factor for each image in the batch (exposureFactorTensor[n] >= 0) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -182,6 +189,7 @@ RppStatus rppt_exposure_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] contrastCenterTensor contrast center values for contrast calculation (1D tensor of size batchSize) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_data_exchange_operations.h b/include/rppt_tensor_data_exchange_operations.h index aa0b8aa31..ceff38ff0 100644 --- a/include/rppt_tensor_data_exchange_operations.h +++ b/include/rppt_tensor_data_exchange_operations.h @@ -36,6 +36,7 @@ extern "C" { // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -53,6 +54,7 @@ RppStatus rppt_copy_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -71,6 +73,7 @@ RppStatus rppt_swap_channels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor // *param[in] srcSubpixelLayout A RpptSubpixelLayout type enum to specify source subpixel layout (RGBtype or BGRtype) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h index 1740bc88c..3104d644f 100644 --- a/include/rppt_tensor_effects_augmentations.h +++ b/include/rppt_tensor_effects_augmentations.h @@ -42,6 +42,7 @@ extern "C" { // *param[in] translateVector translateVector for gridmask calculation = grid X and Y translation lengths in pixels (a single RpptUintVector2D x,y value pair that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -62,6 +63,7 @@ RppStatus rppt_gridmask_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] spatterColor RGB values to use for the spatter augmentation (A single set of 3 Rpp8u values as RpptRGB that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -86,6 +88,7 @@ RppStatus rppt_spatter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -107,6 +110,7 @@ RppStatus rppt_salt_and_pepper_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -129,6 +133,7 @@ RppStatus rppt_shot_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -149,6 +154,7 @@ RppStatus rppt_gaussian_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPt // *param[in] stdDevTensor standard deviation values to quantify non-linearity in the blend (1D tensor of size batchSize with stdDevTensor[n] > 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index 231627a71..47e49001f 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -39,6 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for box_filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h index 76bfebf66..11df31121 100644 --- a/include/rppt_tensor_geometric_augmentations.h +++ b/include/rppt_tensor_geometric_augmentations.h @@ -38,6 +38,7 @@ extern "C" { // *param[in] dstDescPtr destination tensor descriptor // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -61,6 +62,7 @@ RppStatus rppt_crop_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -83,6 +85,7 @@ RppStatus rppt_crop_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -104,6 +107,7 @@ RppStatus rppt_warp_affine_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] verticalTensor vertical flag value to set vertical flip on/off for each image in the batch (verticalTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -125,6 +129,7 @@ RppStatus rppt_flip_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] interpolationType resize interpolation type // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -149,6 +154,7 @@ RppStatus rppt_resize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -171,6 +177,7 @@ RppStatus rppt_resize_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -192,6 +199,7 @@ RppStatus rppt_resize_crop_mirror_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, R // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_morphological_operations.h b/include/rppt_tensor_morphological_operations.h index bb647b687..749cb6c9d 100644 --- a/include/rppt_tensor_morphological_operations.h +++ b/include/rppt_tensor_morphological_operations.h @@ -39,6 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for erode (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -58,6 +59,7 @@ RppStatus rppt_erode_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt // *param[in] kernelSize kernel size for dilate (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error From e6a0cbc8328e1117bddab3149a93e1835096fbc0 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 31 Mar 2023 07:47:41 -0700 Subject: [PATCH 09/15] fixed alignment issues --- .../cpu/host_advanced_augmentations.hpp | 4 +--- .../cpu/host_color_model_conversions.hpp | 4 +--- src/modules/cpu/host_fused_functions.hpp | 19 +++++++------------ src/modules/cpu/host_image_augmentations.hpp | 10 ++++------ .../cpu/host_statistical_operations.hpp | 16 ++++------------ src/modules/hip/handlehip.cpp | 2 +- src/modules/rppi_fused_functions.cpp | 8 ++++---- .../rppt_tensor_geometric_augmentations.cpp | 12 ++++++------ 8 files changed, 28 insertions(+), 47 deletions(-) diff --git a/src/modules/cpu/host_advanced_augmentations.hpp b/src/modules/cpu/host_advanced_augmentations.hpp index 37d62173b..44f5c5355 100644 --- a/src/modules/cpu/host_advanced_augmentations.hpp +++ b/src/modules/cpu/host_advanced_augmentations.hpp @@ -2270,7 +2270,6 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; - Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { @@ -2346,8 +2345,7 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_color_model_conversions.hpp b/src/modules/cpu/host_color_model_conversions.hpp index 1135b6be4..e67238968 100644 --- a/src/modules/cpu/host_color_model_conversions.hpp +++ b/src/modules/cpu/host_color_model_conversions.hpp @@ -471,7 +471,6 @@ RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; - Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { @@ -516,8 +515,7 @@ RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_fused_functions.hpp b/src/modules/cpu/host_fused_functions.hpp index fd7feb3b2..9329109ff 100644 --- a/src/modules/cpu/host_fused_functions.hpp +++ b/src/modules/cpu/host_fused_functions.hpp @@ -79,8 +79,7 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -835,8 +834,7 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -1640,8 +1638,7 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -2513,8 +2510,7 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -5828,11 +5824,11 @@ RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rpp32u outputFormatToggle, Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -6065,8 +6061,7 @@ RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_image_augmentations.hpp b/src/modules/cpu/host_image_augmentations.hpp index 9c0e0b3b9..de7ae4968 100644 --- a/src/modules/cpu/host_image_augmentations.hpp +++ b/src/modules/cpu/host_image_augmentations.hpp @@ -3386,8 +3386,7 @@ RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc } else if(chnFormat == RPPI_CHN_PACKED) { - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -4194,14 +4193,14 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s RppiROI *roiPoints, Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { T *srcPtrBufferROI, *dstPtrBufferROI; srcPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { @@ -4310,8 +4309,7 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s srcPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { diff --git a/src/modules/cpu/host_statistical_operations.hpp b/src/modules/cpu/host_statistical_operations.hpp index c3dc0a851..8539ff932 100644 --- a/src/modules/cpu/host_statistical_operations.hpp +++ b/src/modules/cpu/host_statistical_operations.hpp @@ -1332,9 +1332,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b T *srcPtrChannel; srcPtrChannel = srcPtrImage + (c * imageDimMax); - - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1372,9 +1370,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b T *srcPtrChannel; srcPtrChannel = srcPtrImage + (c * imageDimMax); - - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1445,9 +1441,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u elementsInRow = channel * batch_srcSize[batchCount].width; Rpp32u elementsInRowMax = channel * batch_srcSizeMax[batchCount].width; - - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { @@ -1482,9 +1476,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b *mean = *mean / (channel * imageDim); - - Rpp32u numThreads = handle.GetNumThreads(); - omp_set_dynamic(0); + omp_set_dynamic(0); #pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index aa724d7a1..273bf3f98 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -172,7 +172,7 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); - if(this->numThreads == 0) + if(this->numThreads == 0) this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; diff --git a/src/modules/rppi_fused_functions.cpp b/src/modules/rppi_fused_functions.cpp index 7b11282cd..db3bb2524 100644 --- a/src/modules/rppi_fused_functions.cpp +++ b/src/modules/rppi_fused_functions.cpp @@ -73,7 +73,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -90,7 +90,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -107,7 +107,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -124,7 +124,7 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, rpp::deref(rppHandle).GetBatchSize(), chn_format, num_of_channels, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index 80c365895..6a3e6eab7 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -808,7 +808,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -844,7 +844,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -859,7 +859,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -871,7 +871,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -883,7 +883,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -895,7 +895,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, layoutParams, - rpp::deref(rppHandle)); + rpp::deref(rppHandle)); } } From bb6bb37aad42076795bff24055737dfefb0b31eb Mon Sep 17 00:00:00 2001 From: Abishek Date: Fri, 31 Mar 2023 11:13:27 -0700 Subject: [PATCH 10/15] Fix handle documentation --- include/rppt_tensor_color_augmentations.h | 16 ++++++++-------- include/rppt_tensor_data_exchange_operations.h | 6 +++--- include/rppt_tensor_effects_augmentations.h | 12 ++++++------ include/rppt_tensor_filter_augmentations.h | 2 +- include/rppt_tensor_geometric_augmentations.h | 16 ++++++++-------- include/rppt_tensor_morphological_operations.h | 4 ++-- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/rppt_tensor_color_augmentations.h b/include/rppt_tensor_color_augmentations.h index b362d16b4..6b1b79026 100644 --- a/include/rppt_tensor_color_augmentations.h +++ b/include/rppt_tensor_color_augmentations.h @@ -40,7 +40,7 @@ extern "C" { // *param[in] betaTensor beta values for brightness calculation (1D tensor of size batchSize with 0 <= beta <= 255 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -60,7 +60,7 @@ RppStatus rppt_brightness_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] gammaTensor gamma values for gamma correction calculation (1D tensor of size batchSize with gamma >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -81,7 +81,7 @@ RppStatus rppt_gamma_correction_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp // *param[in] alphaTensor alpha values for alpha-blending (1D tensor of size batchSize with the transparency factor transparency factor 0 <= alpha <= 1 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -104,7 +104,7 @@ RppStatus rppt_blend_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPtr srcDesc // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -127,7 +127,7 @@ RppStatus rppt_color_twist_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -146,7 +146,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[in] alphaTensor alpha values for color casting calculation (1D tensor of size sizeof(Rpp32f) * batchSize with alphaTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -167,7 +167,7 @@ RppStatus rppt_color_cast_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] exposureFactorTensor tensor containing an Rpp32f exposure factor for each image in the batch (exposureFactorTensor[n] >= 0) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -189,7 +189,7 @@ RppStatus rppt_exposure_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] contrastCenterTensor contrast center values for contrast calculation (1D tensor of size batchSize) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_data_exchange_operations.h b/include/rppt_tensor_data_exchange_operations.h index ceff38ff0..81ddfa640 100644 --- a/include/rppt_tensor_data_exchange_operations.h +++ b/include/rppt_tensor_data_exchange_operations.h @@ -36,7 +36,7 @@ extern "C" { // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -54,7 +54,7 @@ RppStatus rppt_copy_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -73,7 +73,7 @@ RppStatus rppt_swap_channels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor // *param[in] srcSubpixelLayout A RpptSubpixelLayout type enum to specify source subpixel layout (RGBtype or BGRtype) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h index 3104d644f..4663a3beb 100644 --- a/include/rppt_tensor_effects_augmentations.h +++ b/include/rppt_tensor_effects_augmentations.h @@ -42,7 +42,7 @@ extern "C" { // *param[in] translateVector translateVector for gridmask calculation = grid X and Y translation lengths in pixels (a single RpptUintVector2D x,y value pair that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -63,7 +63,7 @@ RppStatus rppt_gridmask_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] spatterColor RGB values to use for the spatter augmentation (A single set of 3 Rpp8u values as RpptRGB that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -88,7 +88,7 @@ RppStatus rppt_spatter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -110,7 +110,7 @@ RppStatus rppt_salt_and_pepper_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -133,7 +133,7 @@ RppStatus rppt_shot_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -154,7 +154,7 @@ RppStatus rppt_gaussian_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPt // *param[in] stdDevTensor standard deviation values to quantify non-linearity in the blend (1D tensor of size batchSize with stdDevTensor[n] > 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index 47e49001f..727ac53b4 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -39,7 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for box_filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h index 11df31121..3b967c431 100644 --- a/include/rppt_tensor_geometric_augmentations.h +++ b/include/rppt_tensor_geometric_augmentations.h @@ -38,7 +38,7 @@ extern "C" { // *param[in] dstDescPtr destination tensor descriptor // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -62,7 +62,7 @@ RppStatus rppt_crop_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -85,7 +85,7 @@ RppStatus rppt_crop_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -107,7 +107,7 @@ RppStatus rppt_warp_affine_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] verticalTensor vertical flag value to set vertical flip on/off for each image in the batch (verticalTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -129,7 +129,7 @@ RppStatus rppt_flip_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] interpolationType resize interpolation type // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -154,7 +154,7 @@ RppStatus rppt_resize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -177,7 +177,7 @@ RppStatus rppt_resize_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -199,7 +199,7 @@ RppStatus rppt_resize_crop_mirror_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, R // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_morphological_operations.h b/include/rppt_tensor_morphological_operations.h index 749cb6c9d..7bc8f17b9 100644 --- a/include/rppt_tensor_morphological_operations.h +++ b/include/rppt_tensor_morphological_operations.h @@ -39,7 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for erode (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -59,7 +59,7 @@ RppStatus rppt_erode_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt // *param[in] kernelSize kernel size for dilate (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) -// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants +// *param[in] rppHandle HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error From 6fb593981ab9c8fc305f165d841ab1900315e7f3 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 31 Mar 2023 19:30:59 -0700 Subject: [PATCH 11/15] added default value for numThreads added description for numThreadsin readme and test suite files --- Readme.md | 4 +++- include/rpp.h | 2 +- src/include/common/rpp/handle.hpp | 2 +- utilities/rpp-performancetests/HIP_NEW/Single_host.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/Single_host.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp | 2 ++ utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp | 2 ++ utilities/rpp-performancetests/OCL_NEW/Single_host.cpp | 2 ++ utilities/rpp-unittests/HIP_NEW/Single_host.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/Single_host.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp | 2 ++ utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp | 2 ++ utilities/rpp-unittests/OCL_NEW/Single_host.cpp | 2 ++ utilities/test_suite/HOST/Tensor_host.cpp | 2 ++ 22 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Readme.md b/Readme.md index 54ccf42c2..bae6743db 100644 --- a/Readme.md +++ b/Readme.md @@ -139,7 +139,7 @@ AMD ROCm Performance Primitives (**RPP**) library is a comprehensive high-perfor sudo make install ``` -## Build & Install RPP +## Build & Install RPP The ROCm Performance Primitives (RPP) library has support for three backends: HIP, OpenCL, and CPU: @@ -221,6 +221,8 @@ $ sudo make install // Create handle rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/include/rpp.h b/include/rpp.h index 666131107..e4bb6fe7d 100644 --- a/include/rpp.h +++ b/include/rpp.h @@ -103,7 +103,7 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreate(rppHandle_t* handle); // *param[in] nBatchSize Batch size // *param[in] numThreads number of threads to be used for OpenMP pragma // *returns a rppStatus_t enumeration. -extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads); +extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads = 0); /******************** rppDestroy ********************/ diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp index 5ac3bab09..f49f3848e 100644 --- a/src/include/common/rpp/handle.hpp +++ b/src/include/common/rpp/handle.hpp @@ -66,7 +66,7 @@ using rocblas_handle_ptr = RPP_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle struct Handle : rppHandle { Handle(); - Handle(size_t nBatchSize, Rpp32u numThreads); + Handle(size_t nBatchSize, Rpp32u numThreads = 0); Handle(Handle&&) noexcept; ~Handle(); diff --git a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp index 88820af63..0d9222e04 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -645,6 +645,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp index 1c3c889b2..65dfee771 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp @@ -646,6 +646,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp index 7c00bdc20..14a94bcec 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp @@ -748,6 +748,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp index a1160c544..7a8a3e217 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp @@ -595,6 +595,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp index 4e6a2305b..e2eea3981 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp @@ -586,6 +586,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp index 0d4c16aab..91746f9f9 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp @@ -672,6 +672,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp index 1689ef5e4..721fba4ee 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -647,6 +647,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp index 373d3f773..cd614ef19 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp @@ -648,6 +648,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp index a98c6daf0..8c82981b9 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp @@ -751,6 +751,8 @@ int main(int argc, char **argv) } rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp index 62512b0ea..66df4717d 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp @@ -609,6 +609,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp index b1917ba41..6668d9040 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp @@ -599,6 +599,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp index 296fb945f..1563ef2e6 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp @@ -685,6 +685,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp index 2ec955479..c0550f6f1 100644 --- a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp @@ -176,6 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index 06453d67a..54f2496ce 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -380,6 +380,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; + + // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); From 34cc8dcdca14c183e88e10f536f1f0bcfa7c5d99 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 3 Apr 2023 00:49:47 -0700 Subject: [PATCH 12/15] added max limit for numThreads --- src/modules/handlehost.cpp | 8 +++++--- src/modules/hip/handlehip.cpp | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index 053388c3f..d8c078645 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -29,6 +29,7 @@ #include #endif +#include #include "config.h" #include "rpp/logger.hpp" #include "rpp/handle.hpp" @@ -46,9 +47,6 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); - if(this->numThreads == 0) - this->numThreads = this->nBatchSize; - this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.maxSrcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); this->initHandle->mem.mcpu.maxDstSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); @@ -60,6 +58,7 @@ struct HandleImpl Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + numThreads = std::min(numThreads, std::thread::hardware_concurrency()); if(numThreads == 0) numThreads = batchSize; impl->numThreads = numThreads; @@ -69,6 +68,9 @@ Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) Handle::Handle() : impl(new HandleImpl()) { impl->PreInitializeBufferCPU(); + impl->numThreads = std::min(impl->numThreads, std::thread::hardware_concurrency()); + if(impl->numThreads == 0) + impl->numThreads = impl->nBatchSize; RPP_LOG_I(*this); } diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index 273bf3f98..f61e2d30f 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -29,6 +29,7 @@ #include #endif +#include #include "config.h" #include "rpp/device_name.hpp" #include "rpp/errors.hpp" @@ -172,8 +173,6 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); - if(this->numThreads == 0) - this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.srcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); @@ -283,6 +282,7 @@ Handle::Handle(rppAcceleratorQueue_t stream) : impl(new HandleImpl()) Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + numThreads = std::min(numThreads, std::thread::hardware_concurrency()); if(numThreads == 0) numThreads = batchSize; impl->numThreads = numThreads; @@ -303,6 +303,9 @@ Handle::Handle() : impl(new HandleImpl()) #endif this->SetAllocator(nullptr, nullptr, nullptr); impl->PreInitializeBuffer(); + impl->numThreads = std::min(impl->numThreads, std::thread::hardware_concurrency()); + if(impl->numThreads == 0) + impl->numThreads = impl->nBatchSize; RPP_LOG_I(*this); } From 6b69ec1a63b223c77e299f264b53de06ca7d55d6 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Tue, 4 Apr 2023 09:39:33 +0000 Subject: [PATCH 13/15] updated comment for numThreads added in test suite --- Readme.md | 3 ++- utilities/rpp-performancetests/HIP_NEW/Single_host.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/Single_host.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp | 3 ++- utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp | 3 ++- utilities/rpp-performancetests/OCL_NEW/Single_host.cpp | 3 ++- utilities/rpp-unittests/HIP_NEW/Single_host.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Single_host.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp | 3 ++- utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp | 3 ++- utilities/rpp-unittests/OCL_NEW/Single_host.cpp | 3 ++- utilities/test_suite/HOST/Tensor_host.cpp | 3 ++- 20 files changed, 40 insertions(+), 20 deletions(-) diff --git a/Readme.md b/Readme.md index bae6743db..de0ecfe65 100644 --- a/Readme.md +++ b/Readme.md @@ -222,7 +222,8 @@ $ sudo make install // Create handle rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp index 0d9222e04..02277bf39 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -646,7 +646,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp index 65dfee771..20858dc8d 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp @@ -647,7 +647,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp index 14a94bcec..c4046385c 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp @@ -749,7 +749,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp index 7a8a3e217..58b36bd90 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp @@ -596,7 +596,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp index e2eea3981..55ca0d719 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp @@ -587,7 +587,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp index 91746f9f9..0905c2c60 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp @@ -673,7 +673,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp index 721fba4ee..767f6f211 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -648,7 +648,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp index cd614ef19..9503b12cb 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp @@ -649,7 +649,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp index 8c82981b9..63e2e3352 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp @@ -752,7 +752,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp index 66df4717d..40ed47a29 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp @@ -610,7 +610,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp index 6668d9040..81fc02522 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp @@ -600,7 +600,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp index 1563ef2e6..4595f2e84 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp @@ -686,7 +686,8 @@ int main(int argc, char **argv) rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; diff --git a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp index c0550f6f1..7a88bd8b1 100644 --- a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp @@ -177,7 +177,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index 54f2496ce..fc1cbafca 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -381,7 +381,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - // Number of threads to be used for OpenMP pragma. if numThreads value passed is 0, it will be reset to batch size + // Set the number of threads to be used by OpenMP pragma for RPP batch processing on host. + // If numThreads value passed is 0, number of OpenMP threads used by RPP will be set to batch size Rpp32u numThreads = 0; rppCreateWithBatchSize(&handle, noOfImages, numThreads); From bd66615b1ac7b6a335348659a41ce747cfb2165f Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 17 Apr 2023 11:26:32 +0000 Subject: [PATCH 14/15] added default value for numThreads for the internal hip handle creation call --- src/include/common/rpp/handle.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp index f49f3848e..7dc29e49b 100644 --- a/src/include/common/rpp/handle.hpp +++ b/src/include/common/rpp/handle.hpp @@ -84,7 +84,7 @@ struct Handle : rppHandle { // Host handle related Handle(); - Handle(size_t nBatchSize, Rpp32u numThreads); + Handle(size_t nBatchSize, Rpp32u numThreads = 0); Handle(Handle&&) noexcept; ~Handle(); InitHandle* GetInitHandle() const; From e02f0b417ae335469c5a432699f0b4f5577c5960 Mon Sep 17 00:00:00 2001 From: Kiriti Gowda Date: Tue, 2 May 2023 11:10:13 -0700 Subject: [PATCH 15/15] CMakeList - Version Update RPP Version update for backward compatibility failure --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d5cbc2bc7..ae50d00ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ set(CMAKE_CXX_STANDARD 14) project(rpp CXX) # RPP Version -set(VERSION "1.0.0") +set(VERSION "1.1.0") set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory") include(GNUInstallDirs)