Skip to content

Commit

Permalink
Merge pull request ROCm#4 from MCW-Dev/rpp-rali-3
Browse files Browse the repository at this point in the history
Rpp rali 3
  • Loading branch information
Lokesh Bonta authored May 27, 2020
2 parents cd301fa + 8a52bf1 commit 4bbf888
Show file tree
Hide file tree
Showing 27 changed files with 20,250 additions and 13,294 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
.vscode
build/
build*
sample_test
build.sh
temp/
Gtest
opencv_test
opencv_test
cmake-3.16.0-rc2-Linux-x86_64/
stb-test/
hip_build/
50 changes: 50 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
language: cpp
sudo: required
dist: xenial
os:
- linux
compiler:
- g++

services:
- docker

branches:
- master

install:
- sudo apt update
- sudo apt --yes install cmake git wget unzip libnuma-dev
- wget -qO - http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | sudo apt-key add -
- sudo sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main > /etc/apt/sources.list.d/rocm.list'
- sudo apt update
- sudo apt --yes install rocm-dkms

before_script:
- docker pull kiritigowda/ubuntu-18.04:latest-rocm
- docker run -it -d --name rocm_image --network host kiritigowda/ubuntu-18.04:latest-rocm bash
- docker cp ../rpp rocm_image:/rpp

script:
# OpenCL Backend
- docker exec rocm_image cmake -H/rpp -B/_buildOCL -DCMAKE_BUILD_TYPE=Release -DBACKEND=OCL
- docker exec rocm_image cmake --build /_buildOCL
# HIP Backend - Turn OFF HIP - (TBD - Turn ON all tests)
##- docker exec rocm_image cmake -H/rpp -B/_buildHIP -DCMAKE_BUILD_TYPE=Release -DBACKEND=HIP
##- docker exec rocm_image cmake --build /_buildHIP
# HIP Backend - Static
##- docker exec rocm_image cmake -H/rpp -B/_buildHIPStatic -DCMAKE_BUILD_TYPE=Release -DBACKEND=HIP -DCOMPILE=STATIC
##- docker exec rocm_image cmake --build /_buildHIPStatic
# HIP Backend - HSACOO
##- docker exec rocm_image cmake -H/rpp -B/_buildHIPHSACOO -DCMAKE_BUILD_TYPE=Release -DBACKEND=HIP -DCOMPILE=HSACOO
##- docker exec rocm_image cmake --build /_buildHIPHSACOO
# HIP Backend - HIPRTC
##- docker exec rocm_image cmake -H/rpp -B/_buildHIPRTC -DCMAKE_BUILD_TYPE=Release -DBACKEND=HIP -DCOMPILE=HIPRTC
##- docker exec rocm_image cmake --build /_buildHIPRTC

after_success:

notifications:
email:
- kiriti.nageshgowda@amd.com
- pavel.tcherniaev@amd.com
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

cmake_minimum_required(VERSION 3.5)
project(amd_rpp)
set(VERSION "0.3")
set(VERSION "0.4")

# ROCm Path
set(ROCM_PATH /opt/rocm CACHE PATH "ROCm installation path")
Expand Down
1 change: 0 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,4 +242,3 @@ data alpha = scalar:FLOAT32,1.0 #contrast control
data beta = scalar:INT32,30 #brightness control
node org.rpp.Brightness luma output alpha beta
```
### Modes to test
173 changes: 136 additions & 37 deletions include/rppi_fused_functions.h

Large diffs are not rendered by default.

102 changes: 102 additions & 0 deletions include/rppi_geometry_transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -1351,6 +1351,19 @@ RppStatus
RppStatus
rppi_resize_u8_pkd3_batchPD_ROID_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,RppiROI *roiPoints ,Rpp32u nbatchSize ,rppHandle_t rppHandle );

RppStatus
rppi_resize_f16_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f16_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f16_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f32_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f32_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f32_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );


// ----------------------------------------
// GPU rotate functions declaration
Expand Down Expand Up @@ -1621,6 +1634,43 @@ RppStatus
RppStatus
rppi_rotate_u8_pkd3_batchPD_ROID_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,RppiROI *roiPoints ,Rpp32u nbatchSize ,rppHandle_t rppHandle );// ----------------------------------------

RppStatus
rppi_rotate_f16_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f16_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f16_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );

RppStatus
rppi_rotate_f16_pln3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f16_pln1_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f16_pkd3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pkd3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pln1_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_rotate_f32_pln3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize,
RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32f *angleDeg ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
// ----------------------------------------
// GPU resize_crop functions declaration
// ----------------------------------------
Expand Down Expand Up @@ -1895,6 +1945,20 @@ RppStatus
rppi_resize_crop_u8_pkd3_batchDD_ROID_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,RppiROI *roiPoints ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_u8_pkd3_batchPD_ROID_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,RppiROI *roiPoints ,Rpp32u nbatchSize ,rppHandle_t rppHandle );

RppStatus
rppi_resize_crop_f16_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f16_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f16_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f32_pln1_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f32_pln3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f32_pkd3_batchPD_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32u *xRoiBegin ,Rpp32u *xRoiEnd ,Rpp32u *yRoiBegin ,Rpp32u *yRoiEnd ,Rpp32u nbatchSize ,rppHandle_t rppHandle );

// ----------------------------------------
// GPU warp_affine functions declaration
// ----------------------------------------
Expand Down Expand Up @@ -2452,6 +2516,44 @@ RppStatus
RppStatus
rppi_warp_perspective_u8_pkd3_batchPD_ROID_host(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,RppiSize maxDstSize ,Rpp32f *perspectiveMatrix ,RppiROI *roiPoints ,Rpp32u nbatchSize ,rppHandle_t rppHandle );

// Float based Calls
RppStatus
rppi_resize_f32_pln1_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f16_pln1_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f32_pln3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f16_pln3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f32_pkd3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_f16_pkd3_batchPD_gpu(RppPtr_t srcPtr ,RppiSize *srcSize ,RppiSize maxSrcSize ,RppPtr_t dstPtr ,RppiSize *dstSize ,
RppiSize maxDstSize ,Rpp32u nbatchSize ,rppHandle_t rppHandle );
RppStatus
rppi_resize_crop_f32_pkd3_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);
RppStatus
rppi_resize_crop_f16_pkd3_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);
RppStatus
rppi_resize_crop_f32_pln3_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);
RppStatus
rppi_resize_crop_f16_pln3_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);
RppStatus
rppi_resize_crop_f32_pln1_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);
RppStatus
rppi_resize_crop_f16_pln1_batchPD_gpu(RppPtr_t srcPtr, RppiSize *srcSize, RppiSize maxSrcSize, RppPtr_t dstPtr, RppiSize *dstSize, RppiSize maxDstSize, Rpp32u *xRoiBegin, Rpp32u *xRoiEnd,
Rpp32u *yRoiBegin, Rpp32u *yRoiEnd, Rpp32u nbatchSize, rppHandle_t rppHandle);

#ifdef __cplusplus
}
#endif
Expand Down
31 changes: 31 additions & 0 deletions src/include/cl/rpp_cl_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,41 @@
#include <math.h>
#include <algorithm>
#include <rppdefs.h>
#include <half.hpp>
#include <half.hpp>
using half_float::half;
typedef half Rpp16f;

#define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS

enum class RPPTensorDataType
{
U8 = 0,
FP32,
FP16,
};

inline void make_data_type(RPPTensorDataType data_type)
{
if(data_type == RPPTensorDataType::U8)
{
typedef Rpp8u data_type_t;
return;
}
else if(data_type == RPPTensorDataType::FP32)
{
typedef Rpp32f data_type_t;
return;
}
else if(data_type == RPPTensorDataType::FP16)
{
typedef Rpp16f data_type_t;
return;
}
}


inline RppStatus generate_gaussian_kernel_gpu(Rpp32f stdDev, Rpp32f* kernel, Rpp32u kernelSize)
{
Rpp32f s, sum = 0.0, multiplier;
Expand Down
96 changes: 71 additions & 25 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@

#include <math.h>
#include <algorithm>
#include <typeinfo>
#include <cstring>
#include <rppdefs.h>
#include <omp.h>
#include <half.hpp>
using half_float::half;
typedef half Rpp16f;
#include "rpp_cpu_simd.hpp"

#define PI 3.14159265
Expand Down Expand Up @@ -633,41 +637,83 @@ inline RppStatus resize_kernel_host(T* srcPtr, RppiSize srcSize, T* dstPtr, Rppi
srcPtrTemp = srcPtr;
dstPtrTemp = dstPtr;

for (int c = 0; c < channel; c++)
if (typeid(Rpp16f) == typeid(T))
{
for (int i = 0; i < dstSize.height; i++)
for (int c = 0; c < channel; c++)
{
srcLocationRow = ((Rpp32f) i) / hRatio;
srcLocationRowFloor = (Rpp32s) RPPFLOOR(srcLocationRow);
Rpp32f weightedHeight = srcLocationRow - srcLocationRowFloor;
if (srcLocationRowFloor > (srcSize.height - 2))
for (int i = 0; i < dstSize.height; i++)
{
srcLocationRowFloor = srcSize.height - 2;
}
srcLocationRow = ((Rpp32f) i) / hRatio;
srcLocationRowFloor = (Rpp32s) RPPFLOOR(srcLocationRow);
Rpp32f weightedHeight = srcLocationRow - srcLocationRowFloor;
if (srcLocationRowFloor > (srcSize.height - 2))
{
srcLocationRowFloor = srcSize.height - 2;
}

srcPtrTopRow = srcPtrTemp + srcLocationRowFloor * srcSize.width;
srcPtrBottomRow = srcPtrTopRow + srcSize.width;
#pragma omp simd
for (int j = 0; j < dstSize.width; j++)
{
srcLocationColumn = ((Rpp32f) j) / wRatio;
srcLocationColumnFloor = (Rpp32s) RPPFLOOR(srcLocationColumn);
Rpp32f weightedWidth = srcLocationColumn - srcLocationColumnFloor;
srcPtrTopRow = srcPtrTemp + srcLocationRowFloor * srcSize.width;
srcPtrBottomRow = srcPtrTopRow + srcSize.width;

if (srcLocationColumnFloor > (srcSize.width - 2))
for (int j = 0; j < dstSize.width; j++)
{
srcLocationColumn = ((Rpp32f) j) / wRatio;
srcLocationColumnFloor = (Rpp32s) RPPFLOOR(srcLocationColumn);
Rpp32f weightedWidth = srcLocationColumn - srcLocationColumnFloor;

if (srcLocationColumnFloor > (srcSize.width - 2))
{
srcLocationColumnFloor = srcSize.width - 2;
}
pixel = ((*(srcPtrTopRow + srcLocationColumnFloor)) * (1 - weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrTopRow + srcLocationColumnFloor + 1)) * (1 - weightedHeight) * (weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor)) * (weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor + 1)) * (weightedHeight) * (weightedWidth));

*dstPtrTemp = (T) pixel;
dstPtrTemp ++;
}
}
srcPtrTemp += srcSize.height * srcSize.width;
}
}
else
{
for (int c = 0; c < channel; c++)
{
for (int i = 0; i < dstSize.height; i++)
{
srcLocationRow = ((Rpp32f) i) / hRatio;
srcLocationRowFloor = (Rpp32s) RPPFLOOR(srcLocationRow);
Rpp32f weightedHeight = srcLocationRow - srcLocationRowFloor;
if (srcLocationRowFloor > (srcSize.height - 2))
{
srcLocationColumnFloor = srcSize.width - 2;
srcLocationRowFloor = srcSize.height - 2;
}
pixel = ((*(srcPtrTopRow + srcLocationColumnFloor)) * (1 - weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrTopRow + srcLocationColumnFloor + 1)) * (1 - weightedHeight) * (weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor)) * (weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor + 1)) * (weightedHeight) * (weightedWidth));

*dstPtrTemp = (T) pixel;
dstPtrTemp ++;
srcPtrTopRow = srcPtrTemp + srcLocationRowFloor * srcSize.width;
srcPtrBottomRow = srcPtrTopRow + srcSize.width;
#pragma omp simd
for (int j = 0; j < dstSize.width; j++)
{
srcLocationColumn = ((Rpp32f) j) / wRatio;
srcLocationColumnFloor = (Rpp32s) RPPFLOOR(srcLocationColumn);
Rpp32f weightedWidth = srcLocationColumn - srcLocationColumnFloor;

if (srcLocationColumnFloor > (srcSize.width - 2))
{
srcLocationColumnFloor = srcSize.width - 2;
}
pixel = ((*(srcPtrTopRow + srcLocationColumnFloor)) * (1 - weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrTopRow + srcLocationColumnFloor + 1)) * (1 - weightedHeight) * (weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor)) * (weightedHeight) * (1 - weightedWidth))
+ ((*(srcPtrBottomRow + srcLocationColumnFloor + 1)) * (weightedHeight) * (weightedWidth));

*dstPtrTemp = (T) pixel;
dstPtrTemp ++;
}
}
srcPtrTemp += srcSize.height * srcSize.width;
}
srcPtrTemp += srcSize.height * srcSize.width;
}
}
else if (chnFormat == RPPI_CHN_PACKED)
Expand Down
Loading

0 comments on commit 4bbf888

Please sign in to comment.