diff --git a/Readme.md b/Readme.md index 0b18c7218..54ccf42c2 100644 --- a/Readme.md +++ b/Readme.md @@ -221,7 +221,8 @@ $ sudo make install // Create handle rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); // Call the RPP API for the specific variant required (pkd3/pln3/pln1) rppi_brightness_u8_pkd3_batchPD_host(input, srcSize, maxSize, output, alpha, beta, noOfImages, handle); diff --git a/include/rpp.h b/include/rpp.h index 9346cd869..666131107 100644 --- a/include/rpp.h +++ b/include/rpp.h @@ -101,8 +101,9 @@ extern "C" SHARED_PUBLIC rppStatus_t rppCreate(rppHandle_t* handle); // Function to create a rpp handle for a batch. To be called in the beginning to initialize the rpp environment // *param[in] handle A pointer to rpp handle of type rppHandle_t // *param[in] nBatchSize Batch size +// *param[in] numThreads number of threads to be used for OpenMP pragma // *returns a rppStatus_t enumeration. -extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize); +extern "C" SHARED_PUBLIC rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads); /******************** rppDestroy ********************/ diff --git a/include/rppt_tensor_color_augmentations.h b/include/rppt_tensor_color_augmentations.h index c1f0a038a..b362d16b4 100644 --- a/include/rppt_tensor_color_augmentations.h +++ b/include/rppt_tensor_color_augmentations.h @@ -40,6 +40,7 @@ extern "C" { // *param[in] betaTensor beta values for brightness calculation (1D tensor of size batchSize with 0 <= beta <= 255 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -59,6 +60,7 @@ RppStatus rppt_brightness_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] gammaTensor gamma values for gamma correction calculation (1D tensor of size batchSize with gamma >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -79,6 +81,7 @@ RppStatus rppt_gamma_correction_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp // *param[in] alphaTensor alpha values for alpha-blending (1D tensor of size batchSize with the transparency factor transparency factor 0 <= alpha <= 1 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -101,6 +104,7 @@ RppStatus rppt_blend_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPtr srcDesc // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -123,6 +127,7 @@ RppStatus rppt_color_twist_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] saturationTensor saturation modification parameter for color_jitter calculation (1D tensor of size batchSize with saturationTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -141,6 +146,7 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[in] alphaTensor alpha values for color casting calculation (1D tensor of size sizeof(Rpp32f) * batchSize with alphaTensor[i] >= 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -161,6 +167,7 @@ RppStatus rppt_color_cast_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] exposureFactorTensor tensor containing an Rpp32f exposure factor for each image in the batch (exposureFactorTensor[n] >= 0) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -182,6 +189,7 @@ RppStatus rppt_exposure_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] contrastCenterTensor contrast center values for contrast calculation (1D tensor of size batchSize) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_data_exchange_operations.h b/include/rppt_tensor_data_exchange_operations.h index aa0b8aa31..ceff38ff0 100644 --- a/include/rppt_tensor_data_exchange_operations.h +++ b/include/rppt_tensor_data_exchange_operations.h @@ -36,6 +36,7 @@ extern "C" { // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -53,6 +54,7 @@ RppStatus rppt_copy_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] srcDescPtr source tensor descriptor // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -71,6 +73,7 @@ RppStatus rppt_swap_channels_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr // *param[out] dstPtr destination tensor memory // *param[in] dstDescPtr destination tensor descriptor // *param[in] srcSubpixelLayout A RpptSubpixelLayout type enum to specify source subpixel layout (RGBtype or BGRtype) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h index 1740bc88c..3104d644f 100644 --- a/include/rppt_tensor_effects_augmentations.h +++ b/include/rppt_tensor_effects_augmentations.h @@ -42,6 +42,7 @@ extern "C" { // *param[in] translateVector translateVector for gridmask calculation = grid X and Y translation lengths in pixels (a single RpptUintVector2D x,y value pair that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -62,6 +63,7 @@ RppStatus rppt_gridmask_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t ds // *param[in] spatterColor RGB values to use for the spatter augmentation (A single set of 3 Rpp8u values as RpptRGB that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -86,6 +88,7 @@ RppStatus rppt_spatter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dst // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -107,6 +110,7 @@ RppStatus rppt_salt_and_pepper_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -129,6 +133,7 @@ RppStatus rppt_shot_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] seed A user-defined seed value (single Rpp32u value) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -149,6 +154,7 @@ RppStatus rppt_gaussian_noise_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPt // *param[in] stdDevTensor standard deviation values to quantify non-linearity in the blend (1D tensor of size batchSize with stdDevTensor[n] > 0 for each image in batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index 231627a71..47e49001f 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -39,6 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for box_filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_geometric_augmentations.h b/include/rppt_tensor_geometric_augmentations.h index 76bfebf66..11df31121 100644 --- a/include/rppt_tensor_geometric_augmentations.h +++ b/include/rppt_tensor_geometric_augmentations.h @@ -38,6 +38,7 @@ extern "C" { // *param[in] dstDescPtr destination tensor descriptor // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -61,6 +62,7 @@ RppStatus rppt_crop_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -83,6 +85,7 @@ RppStatus rppt_crop_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -104,6 +107,7 @@ RppStatus rppt_warp_affine_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t // *param[in] verticalTensor vertical flag value to set vertical flip on/off for each image in the batch (verticalTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -125,6 +129,7 @@ RppStatus rppt_flip_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr // *param[in] interpolationType resize interpolation type // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -149,6 +154,7 @@ RppStatus rppt_resize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -171,6 +177,7 @@ RppStatus rppt_resize_mirror_normalize_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescP // *param[in] mirrorTensor mirror flag value to set mirroring on/off for each image in the batch (mirrorTensor[n] = 0/1) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : successful completion // *retval RPP_ERROR : Error @@ -192,6 +199,7 @@ RppStatus rppt_resize_crop_mirror_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, R // *param[in] interpolationType Interpolation type used (RpptInterpolationType::XYWH or RpptRoiType::LTRB) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/include/rppt_tensor_morphological_operations.h b/include/rppt_tensor_morphological_operations.h index bb647b687..749cb6c9d 100644 --- a/include/rppt_tensor_morphological_operations.h +++ b/include/rppt_tensor_morphological_operations.h @@ -39,6 +39,7 @@ extern "C" { // *param[in] kernelSize kernel size for erode (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error @@ -58,6 +59,7 @@ RppStatus rppt_erode_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPt // *param[in] kernelSize kernel size for dilate (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) // *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) // *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) +// *param[in] rppHandle HOST-handle/HIP-handle for "_gpu" variants and Host-handle for "_host" variants // *returns a RppStatus enumeration. // *retval RPP_SUCCESS : succesful completion // *retval RPP_ERROR : Error diff --git a/src/include/common/rpp/handle.hpp b/src/include/common/rpp/handle.hpp index 1940b494f..5ac3bab09 100644 --- a/src/include/common/rpp/handle.hpp +++ b/src/include/common/rpp/handle.hpp @@ -66,12 +66,13 @@ using rocblas_handle_ptr = RPP_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle struct Handle : rppHandle { Handle(); - Handle(size_t nBatchSize); + Handle(size_t nBatchSize, Rpp32u numThreads); Handle(Handle&&) noexcept; ~Handle(); InitHandle* GetInitHandle() const; size_t GetBatchSize() const; + Rpp32u GetNumThreads() const; void SetBatchSize(size_t bSize) const; void rpp_destroy_object_host(); std::unique_ptr impl; @@ -83,11 +84,12 @@ struct Handle : rppHandle { // Host handle related Handle(); - Handle(size_t nBatchSize); + Handle(size_t nBatchSize, Rpp32u numThreads); Handle(Handle&&) noexcept; ~Handle(); InitHandle* GetInitHandle() const; size_t GetBatchSize() const; + Rpp32u GetNumThreads() const; void SetBatchSize(size_t bSize) const; void rpp_destroy_object_host(); diff --git a/src/modules/cpu/host_advanced_augmentations.hpp b/src/modules/cpu/host_advanced_augmentations.hpp index 4cf21203a..44f5c5355 100644 --- a/src/modules/cpu/host_advanced_augmentations.hpp +++ b/src/modules/cpu/host_advanced_augmentations.hpp @@ -34,12 +34,13 @@ RppStatus water_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s Rpp32f *batch_freq_x, Rpp32f *batch_freq_y, Rpp32f *batch_phase_x, Rpp32f *batch_phase_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -169,7 +170,7 @@ RppStatus water_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -289,12 +290,13 @@ template RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -430,7 +432,7 @@ RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -578,12 +580,13 @@ RppStatus non_linear_blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_sr RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -701,7 +704,7 @@ RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -830,12 +833,13 @@ RppStatus non_linear_blend_f32_host_batch(Rpp32f* srcPtr1, Rpp32f* srcPtr2, Rppi RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -966,7 +970,7 @@ RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1108,7 +1112,7 @@ RppStatus non_linear_blend_f16_host_batch(Rpp16f* srcPtr1, Rpp16f* srcPtr2, Rppi RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp32f *batch_std_dev, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1134,7 +1138,7 @@ RppStatus non_linear_blend_i8_host_batch(Rpp8s* srcPtr1, Rpp8s* srcPtr2, RppiSiz srcPtr2_8uTemp++; } - non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel); + non_linear_blend_host_batch(srcPtr1_8u, srcPtr2_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_std_dev, outputFormatToggle, nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1162,12 +1166,13 @@ template RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1269,7 +1274,7 @@ RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1383,12 +1388,13 @@ RppStatus color_cast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1476,7 +1482,7 @@ RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1576,12 +1582,13 @@ RppStatus color_cast_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1681,7 +1688,7 @@ RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1793,7 +1800,7 @@ RppStatus color_cast_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, Rpp RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8s* dstPtr, Rpp8u *batch_r, Rpp8u *batch_g, Rpp8u *batch_b, Rpp32f *batch_alpha, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp64u bufferLength = batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel * nbatchSize; @@ -1813,7 +1820,7 @@ RppStatus color_cast_i8_host_batch(Rpp8s* srcPtr, RppiSize *batch_srcSize, RppiS srcPtr_8uTemp++; } - color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel); + color_cast_host_batch(srcPtr_8u, batch_srcSize, batch_srcSizeMax, dstPtr_8u, batch_r, batch_g, batch_b, batch_alpha, outputFormatToggle, nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; dstPtrTemp = dstPtr; @@ -1840,12 +1847,13 @@ template RppStatus erase_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_anchor_box_info, T *batch_colors, Rpp32u *batch_box_offset, Rpp32u *batch_num_of_boxes, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1922,7 +1930,7 @@ RppStatus erase_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2006,12 +2014,13 @@ RppStatus crop_and_patch_host_batch(T* srcPtr1, RppiSize *batch_srcSize1, RppiSi Rpp32u *batch_src1x1, Rpp32u *batch_src1y1, Rpp32u *batch_src1x2, Rpp32u *batch_src1y2, Rpp32u *batch_src2x1, Rpp32u *batch_src2y1, Rpp32u *batch_src2x2, Rpp32u *batch_src2y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u dstImageDimMax = batch_srcSizeMax1[batchCount].height * batch_srcSizeMax1[batchCount].width; @@ -2135,7 +2144,7 @@ RppStatus crop_and_patch_host_batch(T* srcPtr1, RppiSize *batch_srcSize1, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u dstImageDimMax = batch_srcSizeMax1[batchCount].height * batch_srcSizeMax1[batchCount].width; @@ -2258,14 +2267,14 @@ template RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; - + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2337,7 +2346,7 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2433,12 +2442,13 @@ RppStatus glitch_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ Rpp32u *batch_x_offset_g, Rpp32u *batch_y_offset_g, Rpp32u *batch_x_offset_b, Rpp32u *batch_y_offset_b, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2539,7 +2549,7 @@ RppStatus glitch_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_arithmetic_operations.hpp b/src/modules/cpu/host_arithmetic_operations.hpp index 2a5e6494d..ca5dc5b23 100644 --- a/src/modules/cpu/host_arithmetic_operations.hpp +++ b/src/modules/cpu/host_arithmetic_operations.hpp @@ -30,12 +30,13 @@ THE SOFTWARE. template RppStatus absolute_difference_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -119,7 +120,7 @@ RppStatus absolute_difference_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -235,12 +236,13 @@ template RppStatus accumulate_weighted_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -315,7 +317,7 @@ RppStatus accumulate_weighted_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -420,12 +422,13 @@ RppStatus accumulate_weighted_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus accumulate_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -498,7 +501,7 @@ RppStatus accumulate_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -599,12 +602,13 @@ RppStatus accumulate_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, template RppStatus add_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -687,7 +691,7 @@ RppStatus add_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -800,12 +804,13 @@ RppStatus add_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus subtract_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -888,7 +893,7 @@ RppStatus subtract_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -985,12 +990,13 @@ RppStatus subtract_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus magnitude_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1077,7 +1083,7 @@ RppStatus magnitude_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1177,12 +1183,13 @@ RppStatus magnitude_host(T* srcPtr1, T* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus multiply_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1265,7 +1272,7 @@ RppStatus multiply_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1362,13 +1369,14 @@ RppStatus multiply_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus phase_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32f multiplier = 255 / 1.570796; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1452,7 +1460,7 @@ RppStatus phase_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1568,12 +1576,13 @@ RppStatus phase_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus accumulate_squared_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1641,7 +1650,7 @@ RppStatus accumulate_squared_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_color_model_conversions.hpp b/src/modules/cpu/host_color_model_conversions.hpp index 289c6916c..e67238968 100644 --- a/src/modules/cpu/host_color_model_conversions.hpp +++ b/src/modules/cpu/host_color_model_conversions.hpp @@ -31,12 +31,13 @@ template RppStatus channel_extract_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_extractChannelNumber, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -66,7 +67,7 @@ RppStatus channel_extract_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -146,12 +147,13 @@ RppStatus channel_extract_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus channel_combine_host_batch(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -205,7 +207,7 @@ RppStatus channel_combine_host_batch(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -303,7 +305,7 @@ RppStatus channel_combine_host(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize srcS // if(chnFormat == RPPI_CHN_PLANAR) // { // omp_set_dynamic(0); -// #pragma omp parallel for num_threads(nbatchSize) +// #pragma omp parallel for num_threads(numThreads) // for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) // { // Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -384,7 +386,7 @@ RppStatus channel_combine_host(T* srcPtr1, T* srcPtr2, T* srcPtr3, RppiSize srcS // else if (chnFormat == RPPI_CHN_PACKED) // { // omp_set_dynamic(0); -// #pragma omp parallel for num_threads(nbatchSize) +// #pragma omp parallel for num_threads(numThreads) // for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) // { // Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -466,14 +468,14 @@ template RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_lutPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u lutSize = 256; - + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -514,7 +516,7 @@ RppStatus look_up_table_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -604,12 +606,13 @@ template RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32s *batch_adjustmentValue, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (channel == 1) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -704,7 +707,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -861,7 +864,7 @@ RppStatus color_temperature_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1098,12 +1101,13 @@ template RppStatus vignette_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1267,7 +1271,7 @@ RppStatus vignette_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2387,12 +2391,13 @@ template RppStatus hueRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_hueShift, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2470,7 +2475,7 @@ RppStatus hueRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3300,12 +3305,13 @@ template RppStatus saturationRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3371,7 +3377,7 @@ RppStatus saturationRGB_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3499,10 +3505,11 @@ RppStatus tensor_look_up_table_host(T* srcPtr, T* dstPtr, T* lutPtr, template RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; @@ -3529,10 +3536,11 @@ RppStatus color_convert_rgb_to_hsv_host_batch(T* srcPtr, RppiSize *batch_srcSize template RppStatus color_convert_hsv_to_rgb_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiColorConvertMode convertMode, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; diff --git a/src/modules/cpu/host_computer_vision.hpp b/src/modules/cpu/host_computer_vision.hpp index e8cdd31d5..927335e66 100644 --- a/src/modules/cpu/host_computer_vision.hpp +++ b/src/modules/cpu/host_computer_vision.hpp @@ -30,12 +30,13 @@ THE SOFTWARE. template RppStatus data_object_copy_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -67,7 +68,7 @@ RppStatus data_object_copy_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -110,12 +111,13 @@ RppStatus data_object_copy_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus local_binary_pattern_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -255,7 +257,7 @@ RppStatus local_binary_pattern_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -467,7 +469,7 @@ template RppStatus convert_bit_depth_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, Rpp32u conversionType, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { T *srcPtrTemp; U *dstPtrTemp; @@ -561,12 +563,13 @@ template RppStatus remap_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_rowRemapTable, Rpp32u *batch_colRemapTable, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -621,7 +624,7 @@ RppStatus remap_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -732,10 +735,11 @@ template RppStatus gaussian_image_pyramid_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -838,10 +842,11 @@ template RppStatus canny_edge_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, T *batch_maxThreshold, T *batch_minThreshold, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { T maxThreshold = batch_maxThreshold[batchCount]; @@ -1363,10 +1368,11 @@ template RppStatus laplacian_image_pyramid_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -1501,10 +1507,11 @@ RppStatus harris_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_src Rpp32u *batch_kernelSize, Rpp32f *batch_kValue, Rpp32f *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u gaussianKernelSize = batch_gaussianKernelSize[batchCount]; @@ -2057,10 +2064,11 @@ RppStatus reconstruction_laplacian_image_pyramid_host_batch(T* batch_srcPtr1, Rp T* batch_dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f stdDev = batch_stdDev[batchCount]; @@ -2170,10 +2178,11 @@ RppStatus hough_lines_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiS Rpp32f *batch_rho, Rpp32f *batch_theta, Rpp32u *batch_threshold, Rpp32u *batch_lineLength, Rpp32u *batch_lineGap, Rpp32u *batch_linesMax, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f rho = batch_rho[batchCount]; @@ -2704,10 +2713,11 @@ RppStatus fast_corner_detector_host_batch(T* batch_srcPtr, RppiSize *batch_srcSi Rpp32u *batch_numOfPixels, T *batch_threshold, Rpp32u *batch_nonmaxKernelSize, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u numOfPixels = batch_numOfPixels[batchCount]; @@ -3370,10 +3380,11 @@ template RppStatus hog_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_binsTensor, Rpp32u *batch_binsTensorLength, RppiSize *batch_kernelSize, RppiSize *batch_windowSize, Rpp32u *batch_windowStride, Rpp32u *batch_numOfBins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u binsTensorLength = batch_binsTensorLength[batchCount]; diff --git a/src/modules/cpu/host_filter_operations.hpp b/src/modules/cpu/host_filter_operations.hpp index bee8808cb..af03ce41b 100644 --- a/src/modules/cpu/host_filter_operations.hpp +++ b/src/modules/cpu/host_filter_operations.hpp @@ -31,12 +31,13 @@ template RppStatus box_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -131,7 +132,7 @@ RppStatus box_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -264,12 +265,13 @@ template RppStatus median_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -409,7 +411,7 @@ RppStatus median_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -619,12 +621,13 @@ template RppStatus gaussian_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_stdDev, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -721,7 +724,7 @@ RppStatus gaussian_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -856,12 +859,13 @@ template RppStatus nonlinear_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1001,7 +1005,7 @@ RppStatus nonlinear_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1211,12 +1215,13 @@ template RppStatus non_max_suppression_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1356,7 +1361,7 @@ RppStatus non_max_suppression_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rpp else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1572,12 +1577,13 @@ template RppStatus sobel_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_sobelType, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1715,7 +1721,7 @@ RppStatus sobel_filter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1916,12 +1922,13 @@ template RppStatus custom_convolution_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_kernel, RppiSize *batch_rppiKernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2030,7 +2037,7 @@ RppStatus custom_convolution_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_fused_functions.hpp b/src/modules/cpu/host_fused_functions.hpp index 54357d967..9329109ff 100644 --- a/src/modules/cpu/host_fused_functions.hpp +++ b/src/modules/cpu/host_fused_functions.hpp @@ -33,12 +33,13 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -79,7 +80,7 @@ RppStatus color_twist_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -787,12 +788,13 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -833,7 +835,7 @@ RppStatus color_twist_f32_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1590,12 +1592,13 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1636,7 +1639,7 @@ RppStatus color_twist_f16_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2429,12 +2432,13 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32f *batch_alpha, Rpp32f *batch_beta, Rpp32f *batch_hueShift, Rpp32f *batch_saturationFactor, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2507,7 +2511,7 @@ RppStatus color_twist_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp64u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2586,12 +2590,13 @@ RppStatus crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2689,7 +2694,7 @@ RppStatus crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2919,12 +2924,13 @@ RppStatus crop_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, R Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3102,7 +3108,7 @@ RppStatus crop_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, R else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3553,12 +3559,13 @@ RppStatus crop_mirror_normalize_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3694,7 +3701,7 @@ RppStatus crop_mirror_normalize_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3833,12 +3840,13 @@ RppStatus crop_mirror_normalize_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_s Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3997,7 +4005,7 @@ RppStatus crop_mirror_normalize_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4159,12 +4167,13 @@ RppStatus crop_mirror_normalize_u8_f_host_batch(T* srcPtr, RppiSize *batch_srcSi Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4362,7 +4371,7 @@ RppStatus crop_mirror_normalize_u8_f_host_batch(T* srcPtr, RppiSize *batch_srcSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4564,7 +4573,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { Rpp32u srcBufferSize = nbatchSize * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * channel; Rpp32u dstBufferSize = nbatchSize * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * channel; @@ -4586,7 +4595,7 @@ RppStatus crop_mirror_normalize_u8_i8_host_batch(Rpp8u* srcPtr, RppiSize *batch_ crop_mirror_normalize_f32_host_batch(srcPtrf32, batch_srcSize, batch_srcSizeMax, dstPtrf32, batch_dstSize, batch_dstSizeMax, batch_crop_pos_x, batch_crop_pos_y, batch_mean, batch_stdDev, batch_mirrorFlag, outputFormatToggle, - nbatchSize, chnFormat, channel); + nbatchSize, chnFormat, channel, handle); Rpp8s *dstPtrTemp; Rpp32f *dstPtrf32Temp; @@ -4612,12 +4621,13 @@ template RppStatus crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4692,7 +4702,7 @@ RppStatus crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4770,12 +4780,13 @@ template RppStatus crop_host_u_f_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4864,7 +4875,7 @@ RppStatus crop_host_u_f_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4954,12 +4965,13 @@ template RppStatus crop_host_u_i_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_crop_pos_x, Rpp32u *batch_crop_pos_y, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5046,7 +5058,7 @@ RppStatus crop_host_u_i_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5136,12 +5148,13 @@ template RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5249,7 +5262,7 @@ RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5358,12 +5371,13 @@ RppStatus resize_crop_mirror_host_batch(T* srcPtr, RppiSize *batch_srcSize, Rppi RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5471,7 +5485,7 @@ RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5581,12 +5595,13 @@ RppStatus resize_crop_mirror_f32_host_batch(Rpp32f* srcPtr, RppiSize *batch_srcS RppStatus resize_crop_mirror_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp16f* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5696,7 +5711,7 @@ RppStatus resize_crop_mirror_f16_host_batch(Rpp16f* srcPtr, RppiSize *batch_srcS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -5807,13 +5822,14 @@ template RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stdDev, Rpp32u *batch_mirrorFlag, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -6046,7 +6062,7 @@ RppStatus resize_mirror_normalize_host_batch(T* srcPtr, RppiSize *batch_srcSize, { T *dstPtrCopy = (T*) calloc(channel * batch_dstSizeMax[0].height * batch_dstSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u srcImageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_geometry_transforms.hpp b/src/modules/cpu/host_geometry_transforms.hpp index 7c2c1e473..21c4d55ed 100644 --- a/src/modules/cpu/host_geometry_transforms.hpp +++ b/src/modules/cpu/host_geometry_transforms.hpp @@ -31,12 +31,13 @@ template RppStatus flip_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_flipAxis, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -234,7 +235,7 @@ RppStatus flip_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -710,12 +711,13 @@ RppStatus fisheye_base_host(T* srcPtrTemp, RppiSize srcSize, T* dstPtrTemp, template RppStatus fisheye_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -919,7 +921,7 @@ RppStatus fisheye_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1539,12 +1541,13 @@ template RppStatus lens_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, Rpp32f *batch_zoom, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1754,7 +1757,7 @@ RppStatus lens_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2251,12 +2254,13 @@ template RppStatus scale_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_percentage, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2359,7 +2363,7 @@ RppStatus scale_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2587,12 +2591,13 @@ template RppStatus rotate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32f *batch_angleDeg, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -2739,7 +2744,7 @@ RppStatus rotate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3014,12 +3019,13 @@ RppStatus rotate_host(T* srcPtr, RppiSize srcSize, T* dstPtr, RppiSize dstSize, template RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3101,7 +3107,7 @@ RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3186,12 +3192,13 @@ RppStatus resize_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ template RppStatus resize_u8_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3280,7 +3287,7 @@ RppStatus resize_u8_i8_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = roiPoints[batchCount].x; @@ -3384,12 +3391,13 @@ template RppStatus resize_crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3452,7 +3460,7 @@ RppStatus resize_crop_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3532,12 +3540,13 @@ template RppStatus warp_affine_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_affine, Rpp32u outputFormatToggle, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3672,7 +3681,7 @@ RppStatus warp_affine_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -3926,13 +3935,14 @@ template RppStatus warp_perspective_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, RppiROI *roiPoints, Rpp32f *batch_perspective, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { //Rpp32f perspective[9] = {0.707, 0.707, 0, -0.707, 0.707, 0, 0.001, 0.001, 1}; + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; @@ -4049,7 +4059,7 @@ RppStatus warp_perspective_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f x1 = roiPoints[batchCount].x; diff --git a/src/modules/cpu/host_image_augmentations.hpp b/src/modules/cpu/host_image_augmentations.hpp index 98af4b133..de7ae4968 100644 --- a/src/modules/cpu/host_image_augmentations.hpp +++ b/src/modules/cpu/host_image_augmentations.hpp @@ -35,12 +35,13 @@ template RppStatus brightness_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, Rpp32f *batch_beta, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -152,7 +153,7 @@ RppStatus brightness_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *ba else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -324,12 +325,13 @@ template RppStatus contrast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_new_min, Rpp32u *batch_new_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -454,7 +456,7 @@ RppStatus contrast_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -736,12 +738,13 @@ template RppStatus blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_alpha, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -873,7 +876,7 @@ RppStatus blend_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, Rppi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1079,12 +1082,13 @@ template RppStatus gamma_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_gamma, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1167,7 +1171,7 @@ RppStatus gamma_correction_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1287,12 +1291,13 @@ template RppStatus exposure_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_exposureFactor, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1401,7 +1406,7 @@ RppStatus exposure_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1568,12 +1573,13 @@ template RppStatus blur_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1854,7 +1860,7 @@ RppStatus blur_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2734,12 +2740,13 @@ RppStatus blur_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus histogram_balance_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -2857,7 +2864,7 @@ RppStatus histogram_balance_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiS else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3021,10 +3028,11 @@ template RppStatus random_crop_letterbox_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiSize *batch_dstSize, RppiSize *batch_dstSizeMax, Rpp32u *batch_x1, Rpp32u *batch_x2, Rpp32u *batch_y1, Rpp32u *batch_y2, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u x1 = batch_x1[batchCount]; @@ -3180,12 +3188,13 @@ RppStatus pixelate_base_pkd_host(T* srcPtrTemp, Rpp32u elementsInRow, T* dstPtrT template RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3378,7 +3387,7 @@ RppStatus pixelate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batc else if(chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -3964,12 +3973,13 @@ template RppStatus fog_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_fogValue, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4025,7 +4035,7 @@ RppStatus fog_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4181,8 +4191,9 @@ template RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_noiseProbability, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { T *srcPtrBufferROI, *dstPtrBufferROI; @@ -4190,7 +4201,7 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4299,7 +4310,7 @@ RppStatus noise_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s dstPtrBufferROI = (T*) calloc(channel * batch_srcSizeMax[0].height * batch_srcSizeMax[0].width * nbatchSize, sizeof(T)); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4490,12 +4501,13 @@ template RppStatus snow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_strength, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4562,7 +4574,7 @@ RppStatus snow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_sr else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4759,10 +4771,11 @@ RppStatus rain_host(T* srcPtr, RppiSize srcSize,T* dstPtr, template RppStatus rain_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32f *batch_rainPercentage, Rpp32u *batch_rainWidth, Rpp32u *batch_rainHeight, Rpp32f *batch_transparency, - Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel) + Rpp32u nbatchSize, RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32f rainPercentage = batch_rainPercentage[batchCount]; @@ -4799,12 +4812,13 @@ RppStatus random_shadow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize Rpp32u *batch_x1, Rpp32u *batch_y1, Rpp32u *batch_x2, Rpp32u *batch_y2, Rpp32u *batch_numberOfShadows, Rpp32u *batch_maxSizeX, Rpp32u *batch_maxSizeY, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -4875,7 +4889,7 @@ RppStatus random_shadow_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5032,12 +5046,13 @@ template RppStatus jitter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -5133,7 +5148,7 @@ RppStatus jitter_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_logical_operations.hpp b/src/modules/cpu/host_logical_operations.hpp index b54c0d46f..7b81c1af3 100644 --- a/src/modules/cpu/host_logical_operations.hpp +++ b/src/modules/cpu/host_logical_operations.hpp @@ -30,12 +30,13 @@ THE SOFTWARE. template RppStatus bitwise_AND_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -117,7 +118,7 @@ RppStatus bitwise_AND_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -213,12 +214,13 @@ RppStatus bitwise_AND_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus bitwise_NOT_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -294,7 +296,7 @@ RppStatus bitwise_NOT_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -398,12 +400,13 @@ RppStatus bitwise_NOT_host(T* srcPtr, RppiSize srcSize, T* dstPtr, template RppStatus exclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -486,7 +489,7 @@ RppStatus exclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -582,12 +585,13 @@ RppStatus exclusive_OR_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus inclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -670,7 +674,7 @@ RppStatus inclusive_OR_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSiz else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_morphological_transforms.hpp b/src/modules/cpu/host_morphological_transforms.hpp index 42c51081a..3d3c22009 100644 --- a/src/modules/cpu/host_morphological_transforms.hpp +++ b/src/modules/cpu/host_morphological_transforms.hpp @@ -31,12 +31,13 @@ template RppStatus erode_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -204,7 +205,7 @@ RppStatus erode_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_s else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -440,12 +441,13 @@ template RppStatus dilate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u *batch_kernelSize, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -588,7 +590,7 @@ RppStatus dilate_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_ else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; diff --git a/src/modules/cpu/host_statistical_operations.hpp b/src/modules/cpu/host_statistical_operations.hpp index 494b28163..8539ff932 100644 --- a/src/modules/cpu/host_statistical_operations.hpp +++ b/src/modules/cpu/host_statistical_operations.hpp @@ -31,12 +31,13 @@ THE SOFTWARE. template RppStatus min_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -115,7 +116,7 @@ RppStatus min_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -208,12 +209,13 @@ RppStatus min_host(T* srcPtr1, U* srcPtr2, RppiSize srcSize, T* dstPtr, template RppStatus max_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -292,7 +294,7 @@ RppStatus max_host_batch(T* srcPtr1, T* srcPtr2, RppiSize *batch_srcSize, RppiSi else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -386,12 +388,13 @@ template RppStatus thresholding_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, T *batch_min, T *batch_max, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -478,7 +481,7 @@ RppStatus thresholding_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize * else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -582,12 +585,13 @@ template RppStatus histogram_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32u *outputHistogram, Rpp32u bins, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -641,7 +645,7 @@ RppStatus histogram_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *bat else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -709,12 +713,13 @@ RppStatus histogram_host(T* srcPtr, RppiSize srcSize, Rpp32u* outputHistogram, R template RppStatus histogram_equalization_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, T* dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if (chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -832,7 +837,7 @@ RppStatus histogram_equalization_host_batch(T* srcPtr, RppiSize *batch_srcSize, else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -996,12 +1001,13 @@ template RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp8u *batch_min, Rpp8u *batch_max, Rpp32u *batch_minLoc, Rpp32u *batch_maxLoc, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1056,7 +1062,7 @@ RppStatus min_max_loc_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp8u *min = batch_min + batchCount; @@ -1147,12 +1153,13 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1216,7 +1223,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDim = batch_srcSize[batchCount].height * batch_srcSize[batchCount].width; @@ -1284,12 +1291,13 @@ template RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, Rpp32f *batch_mean, Rpp32f *batch_stddev, RppiROI *roiPoints, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); if(chnFormat == RPPI_CHN_PLANAR) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1324,9 +1332,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b T *srcPtrChannel; srcPtrChannel = srcPtrImage + (c * imageDimMax); - omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1363,9 +1370,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b T *srcPtrChannel; srcPtrChannel = srcPtrImage + (c * imageDimMax); - omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1402,7 +1408,7 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b else if (chnFormat == RPPI_CHN_PACKED) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u imageDimMax = batch_srcSizeMax[batchCount].height * batch_srcSizeMax[batchCount].width; @@ -1435,9 +1441,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b Rpp32u elementsInRow = channel * batch_srcSize[batchCount].width; Rpp32u elementsInRowMax = channel * batch_srcSizeMax[batchCount].width; - - omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1471,9 +1476,8 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b *mean = *mean / (channel * imageDim); - - omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int i = 0; i < batch_srcSize[batchCount].height; i++) { T *srcPtrTemp; @@ -1547,10 +1551,11 @@ RppStatus mean_stddev_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *b template RppStatus integral_host_batch(T* batch_srcPtr, RppiSize *batch_srcSize, RppiSize *batch_srcSizeMax, U* batch_dstPtr, Rpp32u nbatchSize, - RppiChnFormat chnFormat, Rpp32u channel) + RppiChnFormat chnFormat, Rpp32u channel, rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(nbatchSize) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < nbatchSize; batchCount ++) { Rpp32u loc = 0; diff --git a/src/modules/cpu/kernel/blend.hpp b/src/modules/cpu/kernel/blend.hpp index 156ad9763..8b6e8e8c4 100644 --- a/src/modules/cpu/kernel/blend.hpp +++ b/src/modules/cpu/kernel/blend.hpp @@ -32,12 +32,14 @@ RppStatus blend_u8_u8_host_tensor(Rpp8u *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -276,12 +278,14 @@ RppStatus blend_f32_f32_host_tensor(Rpp32f *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -499,12 +503,14 @@ RppStatus blend_f16_f16_host_tensor(Rpp16f *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -768,12 +774,14 @@ RppStatus blend_i8_i8_host_tensor(Rpp8s *srcPtr1, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/brightness.hpp b/src/modules/cpu/kernel/brightness.hpp index 44c7ff07e..fe66d197c 100644 --- a/src/modules/cpu/kernel/brightness.hpp +++ b/src/modules/cpu/kernel/brightness.hpp @@ -32,12 +32,14 @@ RppStatus brightness_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -244,12 +246,14 @@ RppStatus brightness_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -462,12 +466,14 @@ RppStatus brightness_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -716,12 +722,14 @@ RppStatus brightness_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *betaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_cast.hpp b/src/modules/cpu/kernel/color_cast.hpp index 601720689..437084950 100644 --- a/src/modules/cpu/kernel/color_cast.hpp +++ b/src/modules/cpu/kernel/color_cast.hpp @@ -32,12 +32,14 @@ RppStatus color_cast_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -285,12 +287,14 @@ RppStatus color_cast_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -538,12 +542,14 @@ RppStatus color_cast_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -847,12 +853,14 @@ RppStatus color_cast_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *alphaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_jitter.hpp b/src/modules/cpu/kernel/color_jitter.hpp index 8dd432961..e864222d8 100644 --- a/src/modules/cpu/kernel/color_jitter.hpp +++ b/src/modules/cpu/kernel/color_jitter.hpp @@ -34,12 +34,14 @@ RppStatus color_jitter_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -292,12 +294,14 @@ RppStatus color_jitter_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -550,12 +554,14 @@ RppStatus color_jitter_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -864,12 +870,14 @@ RppStatus color_jitter_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/color_to_greyscale.hpp b/src/modules/cpu/kernel/color_to_greyscale.hpp index 3f5b9f445..7f058830d 100644 --- a/src/modules/cpu/kernel/color_to_greyscale.hpp +++ b/src/modules/cpu/kernel/color_to_greyscale.hpp @@ -29,10 +29,12 @@ RppStatus color_to_greyscale_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -160,10 +162,12 @@ RppStatus color_to_greyscale_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -295,10 +299,12 @@ RppStatus color_to_greyscale_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -445,10 +451,12 @@ RppStatus color_to_greyscale_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, Rpp32f *channelWeights, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/cpu/kernel/color_twist.hpp b/src/modules/cpu/kernel/color_twist.hpp index 409e86e36..b1f3d406e 100644 --- a/src/modules/cpu/kernel/color_twist.hpp +++ b/src/modules/cpu/kernel/color_twist.hpp @@ -34,12 +34,14 @@ RppStatus color_twist_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -357,12 +359,14 @@ RppStatus color_twist_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -660,12 +664,14 @@ RppStatus color_twist_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1003,12 +1009,14 @@ RppStatus color_twist_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *saturationTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/contrast.hpp b/src/modules/cpu/kernel/contrast.hpp index 5855584bc..d3de3b0b4 100644 --- a/src/modules/cpu/kernel/contrast.hpp +++ b/src/modules/cpu/kernel/contrast.hpp @@ -32,12 +32,14 @@ RppStatus contrast_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -217,12 +219,14 @@ RppStatus contrast_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -403,12 +407,14 @@ RppStatus contrast_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -621,12 +627,14 @@ RppStatus contrast_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *contrastCenterTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/copy.hpp b/src/modules/cpu/kernel/copy.hpp index 4b94cf168..e9f4655df 100644 --- a/src/modules/cpu/kernel/copy.hpp +++ b/src/modules/cpu/kernel/copy.hpp @@ -28,13 +28,16 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -48,7 +51,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -103,7 +106,7 @@ RppStatus copy_u8_u8_host_tensor(Rpp8u *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -161,13 +164,16 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr srcDescPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -181,7 +187,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -237,7 +243,7 @@ RppStatus copy_f32_f32_host_tensor(Rpp32f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -296,13 +302,16 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr srcDescPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -316,7 +325,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -388,7 +397,7 @@ RppStatus copy_f16_f16_host_tensor(Rpp16f *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -463,13 +472,16 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr srcDescPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); + // Copy without fused output-layout toggle (NHWC -> NHWC or NCHW -> NCHW) if ((srcDescPtr->c == 1) || (srcDescPtr->layout == dstDescPtr->layout)) { - omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; @@ -483,7 +495,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; @@ -538,7 +550,7 @@ RppStatus copy_i8_i8_host_tensor(Rpp8s *srcPtr, else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) { omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/cpu/kernel/crop.hpp b/src/modules/cpu/kernel/crop.hpp index bed2b1131..d88f85ad9 100644 --- a/src/modules/cpu/kernel/crop.hpp +++ b/src/modules/cpu/kernel/crop.hpp @@ -30,12 +30,14 @@ RppStatus crop_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -179,12 +181,14 @@ RppStatus crop_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -330,12 +334,14 @@ RppStatus crop_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -513,12 +519,14 @@ RppStatus crop_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr dstDescPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/crop_mirror_normalize.hpp b/src/modules/cpu/kernel/crop_mirror_normalize.hpp index f0d70579b..f84c8ce72 100644 --- a/src/modules/cpu/kernel/crop_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/crop_mirror_normalize.hpp @@ -33,12 +33,14 @@ RppStatus crop_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -451,12 +453,14 @@ RppStatus crop_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -869,12 +873,14 @@ RppStatus crop_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1334,12 +1340,14 @@ RppStatus crop_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1752,12 +1760,14 @@ RppStatus crop_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -2198,12 +2208,14 @@ RppStatus crop_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/exposure.hpp b/src/modules/cpu/kernel/exposure.hpp index 40dd072a9..80b0fa53a 100644 --- a/src/modules/cpu/kernel/exposure.hpp +++ b/src/modules/cpu/kernel/exposure.hpp @@ -31,12 +31,14 @@ RppStatus exposure_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -215,12 +217,14 @@ RppStatus exposure_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -399,12 +403,14 @@ RppStatus exposure_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -617,12 +623,14 @@ RppStatus exposure_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *exposureFactorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/flip.hpp b/src/modules/cpu/kernel/flip.hpp index 7463fc84b..b8ed63835 100644 --- a/src/modules/cpu/kernel/flip.hpp +++ b/src/modules/cpu/kernel/flip.hpp @@ -32,12 +32,14 @@ RppStatus flip_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -308,12 +310,14 @@ RppStatus flip_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -585,12 +589,14 @@ RppStatus flip_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -902,12 +908,14 @@ RppStatus flip_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *verticalTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/gamma_correction.hpp b/src/modules/cpu/kernel/gamma_correction.hpp index b6993332b..e32a1207a 100644 --- a/src/modules/cpu/kernel/gamma_correction.hpp +++ b/src/modules/cpu/kernel/gamma_correction.hpp @@ -31,12 +31,14 @@ RppStatus gamma_correction_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -179,12 +181,14 @@ RppStatus gamma_correction_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -327,12 +331,14 @@ RppStatus gamma_correction_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -475,12 +481,14 @@ RppStatus gamma_correction_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *gammaTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/gridmask.hpp b/src/modules/cpu/kernel/gridmask.hpp index 745b38243..54030317d 100644 --- a/src/modules/cpu/kernel/gridmask.hpp +++ b/src/modules/cpu/kernel/gridmask.hpp @@ -34,12 +34,14 @@ RppStatus gridmask_u8_u8_host_tensor(Rpp8u *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -438,12 +440,14 @@ RppStatus gridmask_f32_f32_host_tensor(Rpp32f *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -824,12 +828,14 @@ RppStatus gridmask_f16_f16_host_tensor(Rpp16f *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1263,12 +1269,14 @@ RppStatus gridmask_i8_i8_host_tensor(Rpp8s *srcPtr, RpptUintVector2D translateVector, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_gaussian.hpp b/src/modules/cpu/kernel/noise_gaussian.hpp index d7cfe4d56..125839e3c 100644 --- a/src/modules/cpu/kernel/noise_gaussian.hpp +++ b/src/modules/cpu/kernel/noise_gaussian.hpp @@ -45,12 +45,14 @@ RppStatus gaussian_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -376,12 +378,14 @@ RppStatus gaussian_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -696,12 +700,14 @@ RppStatus gaussian_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1061,12 +1067,14 @@ RppStatus gaussian_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowStateBoxMuller *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp index 6c57125e2..c949927fd 100644 --- a/src/modules/cpu/kernel/noise_salt_and_pepper.hpp +++ b/src/modules/cpu/kernel/noise_salt_and_pepper.hpp @@ -51,12 +51,14 @@ RppStatus salt_and_pepper_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -422,12 +424,14 @@ RppStatus salt_and_pepper_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -800,12 +804,14 @@ RppStatus salt_and_pepper_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1223,12 +1229,14 @@ RppStatus salt_and_pepper_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/noise_shot.hpp b/src/modules/cpu/kernel/noise_shot.hpp index e87344c4e..b65511c7f 100644 --- a/src/modules/cpu/kernel/noise_shot.hpp +++ b/src/modules/cpu/kernel/noise_shot.hpp @@ -44,12 +44,14 @@ RppStatus shot_noise_u8_u8_host_tensor(Rpp8u *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -423,12 +425,14 @@ RppStatus shot_noise_f32_f32_host_tensor(Rpp32f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -811,12 +815,14 @@ RppStatus shot_noise_f16_f16_host_tensor(Rpp16f *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1244,12 +1250,14 @@ RppStatus shot_noise_i8_i8_host_tensor(Rpp8s *srcPtr, RpptXorwowState *xorwowInitialStatePtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/non_linear_blend.hpp b/src/modules/cpu/kernel/non_linear_blend.hpp index 7bf86e948..db2180afd 100644 --- a/src/modules/cpu/kernel/non_linear_blend.hpp +++ b/src/modules/cpu/kernel/non_linear_blend.hpp @@ -80,12 +80,14 @@ RppStatus non_linear_blend_u8_u8_host_tensor(Rpp8u *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -446,12 +448,14 @@ RppStatus non_linear_blend_f32_f32_host_tensor(Rpp32f *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -812,12 +816,14 @@ RppStatus non_linear_blend_i8_i8_host_tensor(Rpp8s *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1178,12 +1184,14 @@ RppStatus non_linear_blend_f16_f16_host_tensor(Rpp16f *srcPtr1, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle &handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize.hpp b/src/modules/cpu/kernel/resize.hpp index 35a2fe72a..582905c86 100644 --- a/src/modules/cpu/kernel/resize.hpp +++ b/src/modules/cpu/kernel/resize.hpp @@ -34,12 +34,14 @@ RppStatus resize_nn_u8_u8_host_tensor(Rpp8u *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -253,12 +255,14 @@ RppStatus resize_nn_f32_f32_host_tensor(Rpp32f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -464,12 +468,14 @@ RppStatus resize_nn_i8_i8_host_tensor(Rpp8s *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -683,12 +689,14 @@ RppStatus resize_nn_f16_f16_host_tensor(Rpp16f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -791,12 +799,14 @@ RppStatus resize_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1030,12 +1040,14 @@ RppStatus resize_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1271,12 +1283,14 @@ RppStatus resize_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1513,12 +1527,14 @@ RppStatus resize_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, RpptImagePatchPtr dstImgSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1757,12 +1773,14 @@ RppStatus resize_separable_host_tensor(T *srcPtr, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, RppLayoutParams srcLayoutParams, - RpptInterpolationType interpolationType) + RpptInterpolationType interpolationType, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize_crop_mirror.hpp b/src/modules/cpu/kernel/resize_crop_mirror.hpp index a02c1129b..6cf60a118 100644 --- a/src/modules/cpu/kernel/resize_crop_mirror.hpp +++ b/src/modules/cpu/kernel/resize_crop_mirror.hpp @@ -32,12 +32,14 @@ RppStatus resize_crop_mirror_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -281,12 +283,14 @@ RppStatus resize_crop_mirror_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -535,12 +539,14 @@ RppStatus resize_crop_mirror_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -789,12 +795,14 @@ RppStatus resize_crop_mirror_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/resize_mirror_normalize.hpp b/src/modules/cpu/kernel/resize_mirror_normalize.hpp index 783648be3..8008d1d1b 100644 --- a/src/modules/cpu/kernel/resize_mirror_normalize.hpp +++ b/src/modules/cpu/kernel/resize_mirror_normalize.hpp @@ -34,12 +34,14 @@ RppStatus resize_mirror_normalize_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -314,12 +316,14 @@ RppStatus resize_mirror_normalize_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -598,12 +602,14 @@ RppStatus resize_mirror_normalize_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -882,12 +888,14 @@ RppStatus resize_mirror_normalize_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1167,11 +1175,14 @@ RppStatus resize_mirror_normalize_u8_f32_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1448,11 +1459,14 @@ RppStatus resize_mirror_normalize_u8_f16_host_tensor(Rpp8u *srcPtr, Rpp32u *mirrorTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/spatter.hpp b/src/modules/cpu/kernel/spatter.hpp index c78c1d386..d3b3305d4 100644 --- a/src/modules/cpu/kernel/spatter.hpp +++ b/src/modules/cpu/kernel/spatter.hpp @@ -33,12 +33,14 @@ RppStatus spatter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -385,12 +387,14 @@ RppStatus spatter_f32_f32_host_tensor(Rpp32f *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -745,12 +749,14 @@ RppStatus spatter_f16_f16_host_tensor(Rpp16f *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; @@ -1136,12 +1142,14 @@ RppStatus spatter_i8_i8_host_tensor(Rpp8s *srcPtr, RpptRGB spatterColor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi; diff --git a/src/modules/cpu/kernel/swap_channels.hpp b/src/modules/cpu/kernel/swap_channels.hpp index 68c153028..a590cbbb2 100644 --- a/src/modules/cpu/kernel/swap_channels.hpp +++ b/src/modules/cpu/kernel/swap_channels.hpp @@ -28,10 +28,12 @@ RppStatus swap_channels_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8u *srcPtrImage, *dstPtrImage; @@ -224,10 +226,12 @@ RppStatus swap_channels_f32_f32_host_tensor(Rpp32f *srcPtr, RpptDescPtr srcDescPtr, Rpp32f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp32f *srcPtrImage, *dstPtrImage; @@ -420,10 +424,12 @@ RppStatus swap_channels_f16_f16_host_tensor(Rpp16f *srcPtr, RpptDescPtr srcDescPtr, Rpp16f *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp16f *srcPtrImage, *dstPtrImage; @@ -656,10 +662,12 @@ RppStatus swap_channels_i8_i8_host_tensor(Rpp8s *srcPtr, RpptDescPtr srcDescPtr, Rpp8s *dstPtr, RpptDescPtr dstDescPtr, - RppLayoutParams layoutParams) + RppLayoutParams layoutParams, + rpp::Handle& handle) { + Rpp32u numThreads = handle.GetNumThreads(); omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { Rpp8s *srcPtrImage, *dstPtrImage; diff --git a/src/modules/cpu/kernel/warp_affine.hpp b/src/modules/cpu/kernel/warp_affine.hpp index 45a9e840c..4cf5649dd 100644 --- a/src/modules/cpu/kernel/warp_affine.hpp +++ b/src/modules/cpu/kernel/warp_affine.hpp @@ -61,12 +61,14 @@ RppStatus warp_affine_nn_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -285,12 +287,14 @@ RppStatus warp_affine_nn_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -509,12 +513,14 @@ RppStatus warp_affine_nn_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -733,12 +739,14 @@ RppStatus warp_affine_nn_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -874,12 +882,14 @@ RppStatus warp_affine_bilinear_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1138,12 +1148,14 @@ RppStatus warp_affine_bilinear_f32_f32_host_tensor(Rpp32f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1402,12 +1414,14 @@ RppStatus warp_affine_bilinear_i8_i8_host_tensor(Rpp8s *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; @@ -1671,12 +1685,14 @@ RppStatus warp_affine_bilinear_f16_f16_host_tensor(Rpp16f *srcPtr, Rpp32f *affineTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams srcLayoutParams) + RppLayoutParams srcLayoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); -omp_set_dynamic(0); -#pragma omp parallel for num_threads(dstDescPtr->n) + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) { RpptROI roi, roiLTRB; diff --git a/src/modules/handle_api.cpp b/src/modules/handle_api.cpp index 2e4680601..17b62a4d9 100644 --- a/src/modules/handle_api.cpp +++ b/src/modules/handle_api.cpp @@ -58,9 +58,9 @@ extern "C" rppStatus_t rppCreate(rppHandle_t* handle) return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(); }); } -extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize) +extern "C" rppStatus_t rppCreateWithBatchSize(rppHandle_t* handle, size_t nBatchSize, Rpp32u numThreads) { - return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize); }); + return rpp::try_([&] { rpp::deref(handle) = new rpp::Handle(nBatchSize, numThreads); }); } extern "C" rppStatus_t rppDestroy(rppHandle_t handle) diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index 5bb0fb5e8..053388c3f 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -40,11 +40,14 @@ namespace rpp { struct HandleImpl { size_t nBatchSize = 1; + Rpp32u numThreads = 0; InitHandle* initHandle = nullptr; void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); + if(this->numThreads == 0) + this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.maxSrcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); @@ -54,9 +57,12 @@ struct HandleImpl } }; -Handle::Handle(size_t batchSize) : impl(new HandleImpl()) +Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + if(numThreads == 0) + numThreads = batchSize; + impl->numThreads = numThreads; impl->PreInitializeBufferCPU(); } @@ -81,6 +87,11 @@ size_t Handle::GetBatchSize() const return this->impl->nBatchSize; } +Rpp32u Handle::GetNumThreads() const +{ + return this->impl->numThreads; +} + void Handle::SetBatchSize(size_t bSize) const { this->impl->nBatchSize = bSize; diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index 795ebff54..273bf3f98 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -129,6 +129,7 @@ struct HandleImpl bool enable_profiling = false; float profiling_result = 0.0; size_t nBatchSize = 1; + Rpp32u numThreads = 0; InitHandle* initHandle = nullptr; HandleImpl() : ctx(get_ctx()) {} @@ -171,6 +172,8 @@ struct HandleImpl void PreInitializeBufferCPU() { this->initHandle = new InitHandle(); + if(this->numThreads == 0) + this->numThreads = this->nBatchSize; this->initHandle->nbatchSize = this->nBatchSize; this->initHandle->mem.mcpu.srcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); @@ -277,9 +280,12 @@ Handle::Handle(rppAcceleratorQueue_t stream) : impl(new HandleImpl()) RPP_LOG_I(*this); } -Handle::Handle(size_t batchSize) : impl(new HandleImpl()) +Handle::Handle(size_t batchSize, Rpp32u numThreads) : impl(new HandleImpl()) { impl->nBatchSize = batchSize; + if(numThreads == 0) + numThreads = batchSize; + impl->numThreads = numThreads; this->SetAllocator(nullptr, nullptr, nullptr); impl->PreInitializeBufferCPU(); } @@ -385,6 +391,11 @@ size_t Handle::GetBatchSize() const return this->impl->nBatchSize; } +Rpp32u Handle::GetNumThreads() const +{ + return this->impl->numThreads; +} + void Handle::SetBatchSize(size_t bSize) const { this->impl->nBatchSize = bSize; diff --git a/src/modules/rppi_advanced_augmentations.cpp b/src/modules/rppi_advanced_augmentations.cpp index b99dd5371..e2ed1b0f4 100644 --- a/src/modules/rppi_advanced_augmentations.cpp +++ b/src/modules/rppi_advanced_augmentations.cpp @@ -71,7 +71,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -91,7 +92,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -111,7 +113,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -131,7 +134,8 @@ RppStatus water_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -230,7 +234,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -246,7 +251,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -262,7 +268,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -278,7 +285,8 @@ RppStatus non_linear_blend_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -381,7 +389,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -399,7 +408,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -417,7 +427,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -435,7 +446,8 @@ RppStatus color_cast_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -518,7 +530,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -536,7 +549,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -554,7 +568,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -572,7 +587,8 @@ RppStatus erase_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -687,7 +703,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -712,7 +729,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -737,7 +755,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -762,7 +781,8 @@ RppStatus crop_and_patch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -859,7 +879,8 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -874,7 +895,8 @@ RppStatus lut_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -951,7 +973,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -971,7 +994,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -991,7 +1015,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -1011,7 +1036,8 @@ RppStatus glitch_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } diff --git a/src/modules/rppi_arithmetic_operations.cpp b/src/modules/rppi_arithmetic_operations.cpp index 5daf8be86..b0c1c10d4 100644 --- a/src/modules/rppi_arithmetic_operations.cpp +++ b/src/modules/rppi_arithmetic_operations.cpp @@ -60,7 +60,8 @@ rppi_add_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_add_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_add_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_subtract_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_subtract_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_subtract_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -244,7 +250,8 @@ rppi_multiply_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -274,7 +281,8 @@ rppi_multiply_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -304,7 +312,8 @@ rppi_multiply_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -336,7 +345,8 @@ rppi_absolute_difference_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -366,7 +376,8 @@ rppi_absolute_difference_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -396,7 +407,8 @@ rppi_absolute_difference_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -428,7 +440,8 @@ rppi_phase_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -458,7 +471,8 @@ rppi_phase_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -488,7 +502,8 @@ rppi_phase_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -520,7 +535,8 @@ rppi_magnitude_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -550,7 +566,8 @@ rppi_magnitude_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -580,7 +597,8 @@ rppi_magnitude_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -610,7 +628,8 @@ rppi_accumulate_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -638,7 +657,8 @@ rppi_accumulate_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -666,7 +686,8 @@ rppi_accumulate_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -698,7 +719,8 @@ rppi_accumulate_weighted_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -728,7 +750,8 @@ rppi_accumulate_weighted_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -758,7 +781,8 @@ rppi_accumulate_weighted_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -786,7 +810,8 @@ rppi_accumulate_squared_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -812,7 +837,8 @@ rppi_accumulate_squared_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -838,7 +864,8 @@ rppi_accumulate_squared_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_color_model_conversions.cpp b/src/modules/rppi_color_model_conversions.cpp index efd9244f9..d847b7f68 100644 --- a/src/modules/rppi_color_model_conversions.cpp +++ b/src/modules/rppi_color_model_conversions.cpp @@ -60,7 +60,8 @@ rppi_hueRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_hueRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -122,7 +124,8 @@ rppi_saturationRGB_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_saturationRGB_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -179,7 +183,8 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -190,7 +195,8 @@ rppi_color_convert_u8_pln3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -216,7 +222,8 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); } else if(convert_mode == RppiColorConvertMode::HSV_RGB) { @@ -227,7 +234,8 @@ rppi_color_convert_u8_pkd3_batchPS_host(RppPtr_t srcPtr, convert_mode, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -260,7 +268,8 @@ rppi_color_temperature_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -290,7 +299,8 @@ rppi_color_temperature_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -320,7 +330,8 @@ rppi_color_temperature_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -352,7 +363,8 @@ rppi_vignette_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -382,7 +394,8 @@ rppi_vignette_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -412,7 +425,8 @@ rppi_vignette_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -437,7 +451,8 @@ rppi_channel_extract_u8_pln1_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -460,7 +475,8 @@ rppi_channel_extract_u8_pln3_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -483,7 +499,8 @@ rppi_channel_extract_u8_pkd3_batchPD_host(RppPtr_t srcPtr, extractChannelNumber, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -510,7 +527,8 @@ rppi_channel_combine_u8_pln1_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -535,7 +553,8 @@ rppi_channel_combine_u8_pln3_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -560,7 +579,8 @@ rppi_channel_combine_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -592,7 +612,8 @@ rppi_look_up_table_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -622,7 +643,8 @@ rppi_look_up_table_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -652,7 +674,8 @@ rppi_look_up_table_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_computer_vision.cpp b/src/modules/rppi_computer_vision.cpp index 512e5ebc2..1bd097ed9 100644 --- a/src/modules/rppi_computer_vision.cpp +++ b/src/modules/rppi_computer_vision.cpp @@ -58,7 +58,8 @@ rppi_local_binary_pattern_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -86,7 +87,8 @@ rppi_local_binary_pattern_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -114,7 +116,8 @@ rppi_local_binary_pattern_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -137,7 +140,8 @@ rppi_data_object_copy_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -158,7 +162,8 @@ rppi_data_object_copy_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -179,7 +184,8 @@ rppi_data_object_copy_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -206,7 +212,8 @@ rppi_gaussian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -231,7 +238,8 @@ rppi_gaussian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -256,7 +264,8 @@ rppi_gaussian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -283,7 +292,8 @@ rppi_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -308,7 +318,8 @@ rppi_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -333,7 +344,8 @@ rppi_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr, kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -360,7 +372,8 @@ rppi_canny_edge_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -385,7 +398,8 @@ rppi_canny_edge_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -410,7 +424,8 @@ rppi_canny_edge_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, maxThreshold, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -445,7 +460,8 @@ rppi_harris_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -478,7 +494,8 @@ rppi_harris_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -511,7 +528,8 @@ rppi_harris_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -587,7 +605,8 @@ rppi_fast_corner_detector_u8_pln1_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -614,7 +633,8 @@ rppi_fast_corner_detector_u8_pln3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -641,7 +661,8 @@ rppi_fast_corner_detector_u8_pkd3_batchPD_host(RppPtr_t srcPtr, nonmaxKernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -675,7 +696,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln1_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -707,7 +729,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pln3_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -739,7 +762,8 @@ rppi_reconstruction_laplacian_image_pyramid_u8_pkd3_batchPD_host(RppPtr_t srcPtr kernelSize, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -832,7 +856,8 @@ rppi_hough_lines_u8_pln1_batchPD_host(RppPtr_t srcPtr, linesMax, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -865,7 +890,8 @@ rppi_hog_u8_pln1_batchPD_host(RppPtr_t srcPtr, numOfBins, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -892,7 +918,8 @@ rppi_remap_u8_pln1_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -917,7 +944,8 @@ rppi_remap_u8_pln3_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -942,7 +970,8 @@ rppi_remap_u8_pkd3_batchPD_host(RppPtr_t srcPtr, colRemapTable, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1047,7 +1076,8 @@ rppi_convert_bit_depth_u8s8_pln1_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1069,7 +1099,8 @@ rppi_convert_bit_depth_u8u16_pln1_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1091,7 +1122,8 @@ rppi_convert_bit_depth_u8s16_pln1_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1113,7 +1145,8 @@ rppi_convert_bit_depth_u8s8_pln3_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1135,7 +1168,8 @@ rppi_convert_bit_depth_u8u16_pln3_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1157,7 +1191,8 @@ rppi_convert_bit_depth_u8s16_pln3_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1179,7 +1214,8 @@ rppi_convert_bit_depth_u8s8_pkd3_batchPD_host(RppPtr_t srcPtr, 1, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1201,7 +1237,8 @@ rppi_convert_bit_depth_u8u16_pkd3_batchPD_host(RppPtr_t srcPtr, 2, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1223,7 +1260,8 @@ rppi_convert_bit_depth_u8s16_pkd3_batchPD_host(RppPtr_t srcPtr, 3, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_filter_operations.cpp b/src/modules/rppi_filter_operations.cpp index 277d54fa6..7636be81b 100644 --- a/src/modules/rppi_filter_operations.cpp +++ b/src/modules/rppi_filter_operations.cpp @@ -60,7 +60,8 @@ rppi_box_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_box_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_box_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_sobel_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_sobel_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_sobel_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -244,7 +250,8 @@ rppi_median_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -274,7 +281,8 @@ rppi_median_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -304,7 +312,8 @@ rppi_median_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -336,7 +345,8 @@ rppi_non_max_suppression_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -366,7 +376,8 @@ rppi_non_max_suppression_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -396,7 +407,8 @@ rppi_non_max_suppression_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -430,7 +442,8 @@ rppi_gaussian_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -462,7 +475,8 @@ rppi_gaussian_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -494,7 +508,8 @@ rppi_gaussian_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -526,7 +541,8 @@ rppi_nonlinear_filter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -556,7 +572,8 @@ rppi_nonlinear_filter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -586,7 +603,8 @@ rppi_nonlinear_filter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -620,7 +638,8 @@ rppi_custom_convolution_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -652,7 +671,8 @@ rppi_custom_convolution_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -684,7 +704,8 @@ rppi_custom_convolution_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_fused_functions.cpp b/src/modules/rppi_fused_functions.cpp index a7b6b9170..db3bb2524 100644 --- a/src/modules/rppi_fused_functions.cpp +++ b/src/modules/rppi_fused_functions.cpp @@ -72,7 +72,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -88,7 +89,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -104,7 +106,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -120,7 +123,8 @@ RppStatus color_twist_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -203,7 +207,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -218,7 +223,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -233,7 +239,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -248,7 +255,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -264,7 +272,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -279,7 +288,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -294,7 +304,8 @@ RppStatus crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -448,7 +459,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -466,7 +478,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -484,7 +497,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -502,7 +516,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -521,7 +536,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -539,7 +555,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -557,7 +574,8 @@ RppStatus crop_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -708,7 +726,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -726,7 +745,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -744,7 +764,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -762,7 +783,8 @@ RppStatus resize_crop_mirror_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -864,7 +886,8 @@ RppStatus resize_mirror_normalize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppi_geometry_transforms.cpp b/src/modules/rppi_geometry_transforms.cpp index 92f9e5591..cd7b02ca8 100644 --- a/src/modules/rppi_geometry_transforms.cpp +++ b/src/modules/rppi_geometry_transforms.cpp @@ -60,7 +60,8 @@ rppi_flip_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_flip_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_flip_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -164,7 +167,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP16) { @@ -178,7 +182,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::FP32) { @@ -192,7 +197,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorOutType == RPPTensorDataType::I8) { @@ -206,7 +212,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (tensorInType == RPPTensorDataType::FP16) @@ -221,7 +228,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::FP32) { @@ -235,7 +243,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensorInType == RPPTensorDataType::I8) { @@ -249,7 +258,8 @@ RppStatus resize_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -398,7 +408,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -415,7 +426,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -432,7 +444,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -449,7 +462,8 @@ RppStatus resize_crop_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -554,7 +568,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP16) { @@ -569,7 +584,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::FP32) { @@ -584,7 +600,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } else if (tensor_type == RPPTensorDataType::I8) { @@ -599,7 +616,8 @@ RppStatus rotate_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -707,7 +725,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP16) @@ -725,7 +744,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::FP32) @@ -743,7 +763,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } else if (in_tensor_type == RPPTensorDataType::I8) @@ -761,7 +782,8 @@ RppStatus warp_affine_host_helper(RppiChnFormat chn_format, outputFormatToggle, rpp::deref(rppHandle).GetBatchSize(), chn_format, - num_of_channels); + num_of_channels, + rpp::deref(rppHandle)); } } @@ -854,7 +876,8 @@ rppi_fisheye_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -882,7 +905,8 @@ rppi_fisheye_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -910,7 +934,8 @@ rppi_fisheye_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -944,7 +969,8 @@ rppi_lens_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -976,7 +1002,8 @@ rppi_lens_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1008,7 +1035,8 @@ rppi_lens_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1045,7 +1073,8 @@ rppi_scale_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1080,7 +1109,8 @@ rppi_scale_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1115,7 +1145,8 @@ rppi_scale_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1152,7 +1183,8 @@ rppi_warp_perspective_u8_pln1_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1187,7 +1219,8 @@ rppi_warp_perspective_u8_pln3_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1222,7 +1255,8 @@ rppi_warp_perspective_u8_pkd3_batchPD_host(RppPtr_t srcPtr, perspectiveMatrix, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_image_augmentations.cpp b/src/modules/rppi_image_augmentations.cpp index 33150abc3..8b4d82a7c 100644 --- a/src/modules/rppi_image_augmentations.cpp +++ b/src/modules/rppi_image_augmentations.cpp @@ -62,7 +62,8 @@ rppi_brightness_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -94,7 +95,8 @@ rppi_brightness_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -126,7 +128,8 @@ rppi_brightness_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -158,7 +161,8 @@ rppi_gamma_correction_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -188,7 +192,8 @@ rppi_gamma_correction_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -218,7 +223,8 @@ rppi_gamma_correction_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -252,7 +258,8 @@ rppi_blend_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -284,7 +291,8 @@ rppi_blend_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -316,7 +324,8 @@ rppi_blend_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -348,7 +357,8 @@ rppi_blur_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -378,7 +388,8 @@ rppi_blur_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -408,7 +419,8 @@ rppi_blur_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -442,7 +454,8 @@ rppi_contrast_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -474,7 +487,8 @@ rppi_contrast_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -506,7 +520,8 @@ rppi_contrast_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -536,7 +551,8 @@ rppi_pixelate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -564,7 +580,8 @@ rppi_pixelate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -592,7 +609,8 @@ rppi_pixelate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -624,7 +642,8 @@ rppi_jitter_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -654,7 +673,8 @@ rppi_jitter_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -684,7 +704,8 @@ rppi_jitter_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -716,7 +737,8 @@ rppi_snow_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -746,7 +768,8 @@ rppi_snow_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -776,7 +799,8 @@ rppi_snow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -808,7 +832,8 @@ rppi_noise_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -838,7 +863,8 @@ rppi_noise_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -868,7 +894,8 @@ rppi_noise_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -905,7 +932,8 @@ rppi_random_shadow_u8_pln1_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -940,7 +968,8 @@ rppi_random_shadow_u8_pln3_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -975,7 +1004,8 @@ rppi_random_shadow_u8_pkd3_batchPD_host(RppPtr_t srcPtr, maxSizeY, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1000,7 +1030,8 @@ rppi_fog_u8_pln1_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1023,7 +1054,8 @@ rppi_fog_u8_pln3_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1046,7 +1078,8 @@ rppi_fog_u8_pkd3_batchPD_host(RppPtr_t srcPtr, fogValue, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1077,7 +1110,8 @@ rppi_rain_u8_pln1_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1106,7 +1140,8 @@ rppi_rain_u8_pln3_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1135,7 +1170,8 @@ rppi_rain_u8_pkd3_batchPD_host(RppPtr_t srcPtr, transperancy, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1178,7 +1214,8 @@ rppi_random_crop_letterbox_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1219,7 +1256,8 @@ rppi_random_crop_letterbox_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1260,7 +1298,8 @@ rppi_random_crop_letterbox_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1292,7 +1331,8 @@ rppi_exposure_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1322,7 +1362,8 @@ rppi_exposure_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1352,7 +1393,8 @@ rppi_exposure_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1375,7 +1417,8 @@ rppi_histogram_balance_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1396,7 +1439,8 @@ rppi_histogram_balance_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -1417,7 +1461,8 @@ rppi_histogram_balance_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_logical_operations.cpp b/src/modules/rppi_logical_operations.cpp index f321763b6..b64bf4009 100644 --- a/src/modules/rppi_logical_operations.cpp +++ b/src/modules/rppi_logical_operations.cpp @@ -60,7 +60,8 @@ rppi_bitwise_AND_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_bitwise_AND_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_bitwise_AND_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -150,7 +153,8 @@ rppi_bitwise_NOT_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -178,7 +182,8 @@ rppi_bitwise_NOT_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -206,7 +211,8 @@ rppi_bitwise_NOT_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -238,7 +244,8 @@ rppi_exclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -268,7 +275,8 @@ rppi_exclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -298,7 +306,8 @@ rppi_exclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -330,7 +339,8 @@ rppi_inclusive_OR_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -360,7 +370,8 @@ rppi_inclusive_OR_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -390,7 +401,8 @@ rppi_inclusive_OR_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_morphological_operations.cpp b/src/modules/rppi_morphological_operations.cpp index 86272fa9d..7c46303bb 100644 --- a/src/modules/rppi_morphological_operations.cpp +++ b/src/modules/rppi_morphological_operations.cpp @@ -60,7 +60,8 @@ rppi_erode_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -90,7 +91,8 @@ rppi_erode_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -120,7 +122,8 @@ rppi_erode_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -152,7 +155,8 @@ rppi_dilate_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -182,7 +186,8 @@ rppi_dilate_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -212,7 +217,8 @@ rppi_dilate_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppi_statistical_operations.cpp b/src/modules/rppi_statistical_operations.cpp index 111042173..4706a6b97 100644 --- a/src/modules/rppi_statistical_operations.cpp +++ b/src/modules/rppi_statistical_operations.cpp @@ -62,7 +62,8 @@ rppi_thresholding_u8_pln1_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -94,7 +95,8 @@ rppi_thresholding_u8_pln3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -126,7 +128,8 @@ rppi_thresholding_u8_pkd3_batchPD_host(RppPtr_t srcPtr, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -158,7 +161,8 @@ rppi_min_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -188,7 +192,8 @@ rppi_min_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -218,7 +223,8 @@ rppi_min_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -250,7 +256,8 @@ rppi_max_u8_pln1_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -280,7 +287,8 @@ rppi_max_u8_pln3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -310,7 +318,8 @@ rppi_max_u8_pkd3_batchPD_host(RppPtr_t srcPtr1, rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.roiPoints, rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -398,7 +407,8 @@ rppi_integral_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -419,7 +429,8 @@ rppi_integral_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -440,7 +451,8 @@ rppi_integral_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -463,7 +475,8 @@ rppi_histogram_equalization_u8_pln1_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 1); + 1, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -484,7 +497,8 @@ rppi_histogram_equalization_u8_pln3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PLANAR, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } @@ -505,7 +519,8 @@ rppi_histogram_equalization_u8_pkd3_batchPD_host(RppPtr_t srcPtr, static_cast(dstPtr), rpp::deref(rppHandle).GetBatchSize(), RPPI_CHN_PACKED, - 3); + 3, + rpp::deref(rppHandle)); return RPP_SUCCESS; } diff --git a/src/modules/rppt_tensor_color_augmentations.cpp b/src/modules/rppt_tensor_color_augmentations.cpp index a9be73918..ca188b301 100644 --- a/src/modules/rppt_tensor_color_augmentations.cpp +++ b/src/modules/rppt_tensor_color_augmentations.cpp @@ -54,7 +54,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -66,7 +67,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -78,7 +80,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -90,7 +93,8 @@ RppStatus rppt_brightness_host(RppPtr_t srcPtr, betaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -118,7 +122,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -129,7 +134,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -140,7 +146,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -151,7 +158,8 @@ RppStatus rppt_gamma_correction_host(RppPtr_t srcPtr, gammaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -181,7 +189,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -193,7 +202,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -205,7 +215,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -217,7 +228,8 @@ RppStatus rppt_blend_host(RppPtr_t srcPtr1, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -251,7 +263,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -265,7 +278,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -279,7 +293,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -293,7 +308,8 @@ RppStatus rppt_color_twist_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -327,7 +343,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -341,7 +358,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -355,7 +373,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -369,7 +388,8 @@ RppStatus rppt_color_jitter_host(RppPtr_t srcPtr, saturationTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -404,7 +424,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -416,7 +437,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -428,7 +450,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -440,7 +463,8 @@ RppStatus rppt_color_cast_host(RppPtr_t srcPtr, alphaTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -468,7 +492,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -479,7 +504,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -490,7 +516,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -501,7 +528,8 @@ RppStatus rppt_exposure_host(RppPtr_t srcPtr, exposureFactorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -531,7 +559,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -543,7 +572,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -555,7 +585,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -567,7 +598,8 @@ RppStatus rppt_contrast_host(RppPtr_t srcPtr, contrastCenterTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_data_exchange_operations.cpp b/src/modules/rppt_tensor_data_exchange_operations.cpp index b592baa30..fc2a44e23 100644 --- a/src/modules/rppt_tensor_data_exchange_operations.cpp +++ b/src/modules/rppt_tensor_data_exchange_operations.cpp @@ -46,7 +46,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -54,7 +55,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -62,7 +64,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -70,7 +73,8 @@ RppStatus rppt_copy_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -92,7 +96,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -100,7 +105,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -108,7 +114,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -116,7 +123,8 @@ RppStatus rppt_swap_channels_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -161,7 +169,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -170,7 +179,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, (Rpp16f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -179,7 +189,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, (Rpp32f*) (static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -188,7 +199,8 @@ RppStatus rppt_color_to_greyscale_host(RppPtr_t srcPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, channelWeights, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp index fac4ce1cf..bb5a22cc4 100644 --- a/src/modules/rppt_tensor_effects_augmentations.cpp +++ b/src/modules/rppt_tensor_effects_augmentations.cpp @@ -59,7 +59,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -73,7 +74,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -87,7 +89,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -101,7 +104,8 @@ RppStatus rppt_gridmask_host(RppPtr_t srcPtr, translateVector, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -141,7 +145,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -152,7 +157,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -163,7 +169,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -174,7 +181,8 @@ RppStatus rppt_spatter_host(RppPtr_t srcPtr, spatterColor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -216,7 +224,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -231,7 +240,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -246,7 +256,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -261,7 +272,8 @@ RppStatus rppt_salt_and_pepper_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -297,7 +309,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -309,7 +322,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -321,7 +335,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -333,7 +348,8 @@ RppStatus rppt_shot_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -367,7 +383,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -380,7 +397,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -393,7 +411,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -406,7 +425,8 @@ RppStatus rppt_gaussian_noise_host(RppPtr_t srcPtr, xorwowInitialState, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -439,7 +459,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -451,7 +472,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -463,7 +485,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -475,7 +498,8 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, stdDevTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index 418b8de57..6a3e6eab7 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -54,7 +54,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -64,7 +65,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -74,7 +76,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -84,7 +87,8 @@ RppStatus rppt_crop_host(RppPtr_t srcPtr, dstDescPtr, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -116,7 +120,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -129,7 +134,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -142,7 +148,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -155,7 +162,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -168,7 +176,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -181,7 +190,8 @@ RppStatus rppt_crop_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -215,7 +225,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -226,7 +237,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -237,7 +249,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -248,7 +261,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -262,7 +276,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -273,7 +288,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -284,7 +300,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -295,7 +312,8 @@ RppStatus rppt_warp_affine_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } @@ -326,7 +344,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -338,7 +357,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -350,7 +370,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -362,7 +383,8 @@ RppStatus rppt_flip_host(RppPtr_t srcPtr, verticalTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -393,7 +415,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -404,7 +427,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -415,7 +439,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -426,7 +451,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -440,7 +466,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -451,7 +478,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -462,7 +490,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -473,7 +502,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, dstImgSizes, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } } else @@ -500,7 +530,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -514,7 +545,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -528,7 +560,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -542,7 +575,8 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, roiTensorPtrSrc, roiType, srcLayoutParams, - interpolationType); + interpolationType, + rpp::deref(rppHandle)); } } @@ -581,7 +615,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -595,7 +630,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -609,7 +645,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -623,7 +660,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -637,7 +675,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -651,7 +690,8 @@ RppStatus rppt_resize_mirror_normalize_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -683,7 +723,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -695,7 +736,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -707,7 +749,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -719,7 +762,8 @@ RppStatus rppt_resize_crop_mirror_host(RppPtr_t srcPtr, mirrorTensor, roiTensorPtrSrc, roiType, - srcLayoutParams); + srcLayoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; @@ -741,7 +785,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, return RPP_ERROR_NOT_IMPLEMENTED; RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); - + // Compute affine transformation matrix from rotate angle Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; for(int idx = 0; idx < srcDescPtr->n; idx++) @@ -763,7 +807,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -774,7 +819,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -785,7 +831,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -796,7 +843,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } else if(interpolationType == RpptInterpolationType::BILINEAR) @@ -810,7 +858,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { @@ -821,7 +870,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { @@ -832,7 +882,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { @@ -843,7 +894,8 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, affineTensor, roiTensorPtrSrc, roiType, - layoutParams); + layoutParams, + rpp::deref(rppHandle)); } } @@ -1413,7 +1465,7 @@ RppStatus rppt_rotate_gpu(RppPtr_t srcPtr, #ifdef HIP_COMPILE if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) return RPP_ERROR_NOT_IMPLEMENTED; - + // Compute affine transformation matrix from rotate angle Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; for(int idx = 0; idx < srcDescPtr->n; idx++) diff --git a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HIP_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp index a886c7c1c..88820af63 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -645,7 +645,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp index 5876a7c39..1c3c889b2 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln1.cpp @@ -646,7 +646,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp index 8b10a76c1..7c00bdc20 100644 --- a/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/BatchPD_host_pln3.cpp @@ -748,7 +748,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp index 82803cc85..a1160c544 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pkd3.cpp @@ -595,7 +595,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp index cb11db44f..4e6a2305b 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln1.cpp @@ -586,7 +586,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp index 9cc4cfb9a..0d4c16aab 100644 --- a/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-performancetests/HOST_NEW/Tensor_host_pln3.cpp @@ -672,7 +672,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double max_time_used = 0, min_time_used = 500, avg_time_used = 0; diff --git a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-performancetests/OCL_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/HIP_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HIP_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp index 8b57a81a0..1689ef5e4 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pkd3.cpp @@ -647,7 +647,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp index 64ba48248..373d3f773 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln1.cpp @@ -648,7 +648,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp index 1269fa409..a98c6daf0 100644 --- a/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/BatchPD_host_pln3.cpp @@ -751,7 +751,8 @@ int main(int argc, char **argv) } rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/HOST_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp index f7025fb56..62512b0ea 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pkd3.cpp @@ -609,7 +609,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp index 0b1c6ca05..b1917ba41 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln1.cpp @@ -599,7 +599,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp index 5cfdbd69f..296fb945f 100644 --- a/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp +++ b/utilities/rpp-unittests/HOST_NEW/Tensor_host_pln3.cpp @@ -685,7 +685,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double start_omp, end_omp; double cpu_time_used, omp_time_used; diff --git a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp index 1329d043a..2ec955479 100644 --- a/utilities/rpp-unittests/OCL_NEW/Single_host.cpp +++ b/utilities/rpp-unittests/OCL_NEW/Single_host.cpp @@ -70,7 +70,7 @@ int main(int argc, char **argv) int i = 0, j = 0; int minHeight = 30000, minWidth = 30000, maxHeight = 0, maxWidth = 0; - + unsigned long long ioBufferSize = 0; static int noOfImages = 128; @@ -176,7 +176,8 @@ int main(int argc, char **argv) closedir(dr2); rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); clock_t start, end; double cpu_time_used; diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index 253425f3e..06453d67a 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -380,7 +380,8 @@ int main(int argc, char **argv) // Run case-wise RPP API and measure time rppHandle_t handle; - rppCreateWithBatchSize(&handle, noOfImages); + Rpp32u numThreads = 0; + rppCreateWithBatchSize(&handle, noOfImages, numThreads); double maxWallTime = 0, minWallTime = 500, avgWallTime = 0; double cpuTime, wallTime;