Skip to content

Commit

Permalink
RPP external API cleanup (#68)
Browse files Browse the repository at this point in the history
* initial commit

* cleaned up all the external
API

* minor change

* formatted some changes after review

* files format tabs to spaces

* Fix arithmetic_operations

* Fix parentheses formatting

* Fix image_augmentations

* Fix logical_operations

* Fix parentheses formatting

* Fix morphological_operations

* Fix filter_operations

* Fix statistical_operations

* Fix host statistical_operations

* Fix parentheses formatting

* Fix computer_vision and move tmm out from arithmetic

* Fix color_model_conveersions

* Change help to reflect batch processing

* Fix fused_functions

* Add parameter documentation for fused

* Fix advanced_augmentations

* Fix geometric_transforms

* Fix tensor_transpose grouping mismatch

* Remove support functions and utilities

* Fix codacy paramIndex issue

* Codacy fixes

Co-authored-by: shobana-mcw <shobana@multicorewareinc.com>
  • Loading branch information
r-abishek and shobana-mcw authored Aug 12, 2021
1 parent 1e77550 commit 45ebba3
Show file tree
Hide file tree
Showing 31 changed files with 19,701 additions and 215,181 deletions.
3 changes: 1 addition & 2 deletions include/rppi.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ extern "C" {
#include "rppi_filter_operations.h"
#include "rppi_geometry_transforms.h"
#include "rppi_logical_operations.h"
#include "rppi_support_functions.h"
#include "rppi_statistical_operations.h"
#include "rppi_morphological_transforms.h"
#include "rppi_computer_vision.h"
#include "rppi_fused_functions.h"
#include "rppi_advanced_augmentations.h"


#ifdef __cplusplus
}
#endif

#endif /* RPPI_H */
995 changes: 285 additions & 710 deletions include/rppi_advanced_augmentations.h

Large diffs are not rendered by default.

2,808 changes: 247 additions & 2,561 deletions include/rppi_arithmetic_operations.h

Large diffs are not rendered by default.

1,834 changes: 189 additions & 1,645 deletions include/rppi_color_model_conversions.h

Large diffs are not rendered by default.

1,713 changes: 343 additions & 1,370 deletions include/rppi_computer_vision.h

Large diffs are not rendered by default.

2,109 changes: 156 additions & 1,953 deletions include/rppi_filter_operations.h

Large diffs are not rendered by default.

897 changes: 235 additions & 662 deletions include/rppi_fused_functions.h

Large diffs are not rendered by default.

2,996 changes: 308 additions & 2,688 deletions include/rppi_geometry_transforms.h

Large diffs are not rendered by default.

4,036 changes: 345 additions & 3,691 deletions include/rppi_image_augmentations.h

Large diffs are not rendered by default.

1,159 changes: 87 additions & 1,072 deletions include/rppi_logical_operations.h

Large diffs are not rendered by default.

595 changes: 44 additions & 551 deletions include/rppi_morphological_transforms.h

Large diffs are not rendered by default.

1,310 changes: 168 additions & 1,142 deletions include/rppi_statistical_operations.h

Large diffs are not rendered by default.

27 changes: 0 additions & 27 deletions include/rppi_support_functions.h

This file was deleted.

53 changes: 0 additions & 53 deletions src/modules/cl/cl_arithmetic_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,56 +603,3 @@ tensor_matrix_multiply_cl(cl_mem srcPtr1, cl_mem srcPtr2, Rpp32u *tensorDimensio

return RPP_SUCCESS;
}

/******************** tensor_transpose ********************/

RppStatus
tensor_transpose_cl(cl_mem srcPtr, cl_mem dstPtr, Rpp32u* in_dims, Rpp32u *perm, RPPTensorDataType data_type, rpp::Handle& handle)
{
unsigned int out_dims[4];
out_dims[0] = in_dims[perm[0]];
out_dims[1] = in_dims[perm[1]];
out_dims[2] = in_dims[perm[2]];
out_dims[3] = in_dims[perm[3]];
unsigned int in_strides[4], out_strides[4];
in_strides[0] = in_dims[1] * in_dims[2] * in_dims[3];
in_strides[1] = in_dims[2] * in_dims[3];
in_strides[2] = in_dims[3];
in_strides[3] = 1;
out_strides[0] = out_dims[1] * out_dims[2] * out_dims[3];
out_strides[1] = out_dims[2] * out_dims[3];
out_strides[2] = out_dims[3];
out_strides[3] = 1;
cl_mem d_perm, d_out_strides, d_in_strides, d_out_dims;
cl_context theContext;
cl_int err;
clGetCommandQueueInfo(handle.GetStream(), CL_QUEUE_CONTEXT, sizeof(cl_context), &theContext, NULL);
d_perm = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_in_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_out_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_out_dims = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_perm, CL_TRUE, 0, sizeof(unsigned int) * 4, perm, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_in_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, in_strides, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_out_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, out_strides, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_out_dims, CL_TRUE, 0, sizeof(unsigned int) * 4, out_dims, 0, NULL, NULL);
std::vector<size_t> vld{16, 16, 1};
std::vector<size_t> vgd{out_dims[0], out_dims[1], out_dims[2] * out_dims[3]};
std::string kernel_name = "tensor_transpose";
if(data_type == RPPTensorDataType::U8)
kernel_name = "tensor_transpose";
if(data_type == RPPTensorDataType::FP32)
kernel_name = "tensor_transpose_fp32";
if(data_type == RPPTensorDataType::FP16)
kernel_name = "tensor_transpose_fp16";
if(data_type == RPPTensorDataType::I8)
kernel_name = "tensor_transpose_int8";

handle.AddKernel("", "", "tensor.cl", kernel_name, vld, vgd, "")(srcPtr,
dstPtr,
d_out_dims,
d_perm,
d_out_strides,
d_in_strides);

return RPP_SUCCESS;
}
55 changes: 54 additions & 1 deletion src/modules/cl/cl_computer_vision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1941,4 +1941,57 @@ remap_cl_batch(cl_mem srcPtr, cl_mem dstPtr, Rpp32u *rowRemapTable, Rpp32u *colR
clReleaseMemObject(rowRemapTableGPU);
clReleaseMemObject(colRemapTableGPU);
return RPP_SUCCESS;
}
}

/******************** tensor_transpose ********************/

RppStatus
tensor_transpose_cl(cl_mem srcPtr, cl_mem dstPtr, Rpp32u* in_dims, Rpp32u *perm, RPPTensorDataType data_type, rpp::Handle& handle)
{
unsigned int out_dims[4];
out_dims[0] = in_dims[perm[0]];
out_dims[1] = in_dims[perm[1]];
out_dims[2] = in_dims[perm[2]];
out_dims[3] = in_dims[perm[3]];
unsigned int in_strides[4], out_strides[4];
in_strides[0] = in_dims[1] * in_dims[2] * in_dims[3];
in_strides[1] = in_dims[2] * in_dims[3];
in_strides[2] = in_dims[3];
in_strides[3] = 1;
out_strides[0] = out_dims[1] * out_dims[2] * out_dims[3];
out_strides[1] = out_dims[2] * out_dims[3];
out_strides[2] = out_dims[3];
out_strides[3] = 1;
cl_mem d_perm, d_out_strides, d_in_strides, d_out_dims;
cl_context theContext;
cl_int err;
clGetCommandQueueInfo(handle.GetStream(), CL_QUEUE_CONTEXT, sizeof(cl_context), &theContext, NULL);
d_perm = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_in_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_out_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
d_out_dims = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_perm, CL_TRUE, 0, sizeof(unsigned int) * 4, perm, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_in_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, in_strides, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_out_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, out_strides, 0, NULL, NULL);
clEnqueueWriteBuffer(handle.GetStream(), d_out_dims, CL_TRUE, 0, sizeof(unsigned int) * 4, out_dims, 0, NULL, NULL);
std::vector<size_t> vld{16, 16, 1};
std::vector<size_t> vgd{out_dims[0], out_dims[1], out_dims[2] * out_dims[3]};
std::string kernel_name = "tensor_transpose";
if(data_type == RPPTensorDataType::U8)
kernel_name = "tensor_transpose";
if(data_type == RPPTensorDataType::FP32)
kernel_name = "tensor_transpose_fp32";
if(data_type == RPPTensorDataType::FP16)
kernel_name = "tensor_transpose_fp16";
if(data_type == RPPTensorDataType::I8)
kernel_name = "tensor_transpose_int8";

handle.AddKernel("", "", "tensor.cl", kernel_name, vld, vgd, "")(srcPtr,
dstPtr,
d_out_dims,
d_perm,
d_out_strides,
d_in_strides);

return RPP_SUCCESS;
}
35 changes: 0 additions & 35 deletions src/modules/cpu/host_advanced_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2403,41 +2403,6 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src
return RPP_SUCCESS;
}

template <typename T>
RppStatus transpose_host(T* srcPtr, T* dstPtr, Rpp32u *perm, Rpp32u *shape)
{
T *dstPtrTemp;
dstPtrTemp = dstPtr;

Rpp32u numElements[4] = {
shape[1] * shape[2] * shape[3],
shape[2] * shape[3],
shape[3],
1
};

for (int i = 0; i < shape[perm[0]]; i++)
{
for (int j = 0; j < shape[perm[1]]; j++)
{
for (int k = 0; k < shape[perm[2]]; k++)
{
for (int l = 0; l < shape[perm[3]]; l++)
{
*dstPtrTemp = *(srcPtr + (
(i * numElements[perm[0]]) +
(j * numElements[perm[1]]) +
(k * numElements[perm[2]]) +
(l * numElements[perm[3]])
));
dstPtrTemp++;
}
}
}
}
return RPP_SUCCESS;
}

/**************** glitch ***************/

template <typename T>
Expand Down
Loading

0 comments on commit 45ebba3

Please sign in to comment.