RPP external API cleanup (#68)

* initial commit * cleaned up all the external API * minor change * formatted some changes after review * files format tabs to spaces * Fix arithmetic_operations * Fix parentheses formatting * Fix image_augmentations * Fix logical_operations * Fix parentheses formatting * Fix morphological_operations * Fix filter_operations * Fix statistical_operations * Fix host statistical_operations * Fix parentheses formatting * Fix computer_vision and move tmm out from arithmetic * Fix color_model_conveersions * Change help to reflect batch processing * Fix fused_functions * Add parameter documentation for fused * Fix advanced_augmentations * Fix geometric_transforms * Fix tensor_transpose grouping mismatch * Remove support functions and utilities * Fix codacy paramIndex issue * Codacy fixes Co-authored-by: shobana-mcw <shobana@multicorewareinc.com>
ROCm · Aug 12, 2021 · 45ebba3 · 45ebba3
1 parent 1e77550
commit 45ebba3
Show file tree

Hide file tree

Showing 31 changed files with 19,701 additions and 215,181 deletions.
diff --git a/include/rppi.h b/include/rppi.h
@@ -12,15 +12,14 @@ extern "C" {
 #include "rppi_filter_operations.h"
 #include "rppi_geometry_transforms.h"
 #include "rppi_logical_operations.h"
-#include "rppi_support_functions.h"
 #include "rppi_statistical_operations.h"
 #include "rppi_morphological_transforms.h"
 #include "rppi_computer_vision.h"
 #include "rppi_fused_functions.h"
 #include "rppi_advanced_augmentations.h"
 
-
 #ifdef __cplusplus
 }
 #endif
+
 #endif /* RPPI_H */
diff --git a/include/rppi_advanced_augmentations.h b/include/rppi_advanced_augmentations.h
diff --git a/include/rppi_arithmetic_operations.h b/include/rppi_arithmetic_operations.h
diff --git a/include/rppi_color_model_conversions.h b/include/rppi_color_model_conversions.h
diff --git a/include/rppi_computer_vision.h b/include/rppi_computer_vision.h
diff --git a/include/rppi_filter_operations.h b/include/rppi_filter_operations.h
diff --git a/include/rppi_fused_functions.h b/include/rppi_fused_functions.h
diff --git a/include/rppi_geometry_transforms.h b/include/rppi_geometry_transforms.h
diff --git a/include/rppi_image_augmentations.h b/include/rppi_image_augmentations.h
diff --git a/include/rppi_logical_operations.h b/include/rppi_logical_operations.h
diff --git a/include/rppi_morphological_transforms.h b/include/rppi_morphological_transforms.h
diff --git a/include/rppi_statistical_operations.h b/include/rppi_statistical_operations.h
diff --git a/include/rppi_support_functions.h b/include/rppi_support_functions.h
diff --git a/src/modules/cl/cl_arithmetic_operations.cpp b/src/modules/cl/cl_arithmetic_operations.cpp
@@ -603,56 +603,3 @@ tensor_matrix_multiply_cl(cl_mem srcPtr1, cl_mem srcPtr2, Rpp32u *tensorDimensio
 
     return RPP_SUCCESS;
 }
-
-/******************** tensor_transpose ********************/
-
-RppStatus
-tensor_transpose_cl(cl_mem srcPtr, cl_mem dstPtr,  Rpp32u* in_dims, Rpp32u *perm, RPPTensorDataType data_type, rpp::Handle& handle)
-{
-    unsigned int out_dims[4];
-    out_dims[0] = in_dims[perm[0]];
-    out_dims[1] = in_dims[perm[1]];
-    out_dims[2] = in_dims[perm[2]];
-    out_dims[3] = in_dims[perm[3]];
-    unsigned int in_strides[4], out_strides[4];
-    in_strides[0] = in_dims[1] * in_dims[2] * in_dims[3];
-    in_strides[1] = in_dims[2] * in_dims[3];
-    in_strides[2] = in_dims[3];
-    in_strides[3] = 1;
-    out_strides[0] = out_dims[1] * out_dims[2] * out_dims[3];
-    out_strides[1] = out_dims[2] * out_dims[3];
-    out_strides[2] = out_dims[3];
-    out_strides[3] = 1;
-    cl_mem d_perm, d_out_strides, d_in_strides, d_out_dims;
-    cl_context theContext;
-    cl_int err;
-    clGetCommandQueueInfo(handle.GetStream(), CL_QUEUE_CONTEXT, sizeof(cl_context), &theContext, NULL);
-    d_perm = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
-    d_in_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
-    d_out_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
-    d_out_dims = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
-    clEnqueueWriteBuffer(handle.GetStream(), d_perm, CL_TRUE, 0, sizeof(unsigned int) * 4, perm, 0, NULL, NULL);
-    clEnqueueWriteBuffer(handle.GetStream(), d_in_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, in_strides, 0, NULL, NULL);
-    clEnqueueWriteBuffer(handle.GetStream(), d_out_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, out_strides, 0, NULL, NULL);
-    clEnqueueWriteBuffer(handle.GetStream(), d_out_dims, CL_TRUE, 0, sizeof(unsigned int) * 4, out_dims, 0, NULL, NULL);
-    std::vector<size_t> vld{16, 16, 1};
-    std::vector<size_t> vgd{out_dims[0], out_dims[1], out_dims[2] * out_dims[3]};
-    std::string kernel_name = "tensor_transpose";
-    if(data_type == RPPTensorDataType::U8)
-        kernel_name = "tensor_transpose";
-    if(data_type == RPPTensorDataType::FP32)
-        kernel_name = "tensor_transpose_fp32";
-    if(data_type == RPPTensorDataType::FP16)
-        kernel_name = "tensor_transpose_fp16";
-    if(data_type == RPPTensorDataType::I8)
-        kernel_name = "tensor_transpose_int8";
-
-    handle.AddKernel("", "", "tensor.cl", kernel_name, vld, vgd, "")(srcPtr,
-                                                                     dstPtr,
-                                                                     d_out_dims,
-                                                                     d_perm,
-                                                                     d_out_strides,
-                                                                     d_in_strides);
-
-    return RPP_SUCCESS;
-}
diff --git a/src/modules/cl/cl_computer_vision.cpp b/src/modules/cl/cl_computer_vision.cpp
@@ -1941,4 +1941,57 @@ remap_cl_batch(cl_mem srcPtr, cl_mem dstPtr, Rpp32u *rowRemapTable, Rpp32u *colR
     clReleaseMemObject(rowRemapTableGPU);
     clReleaseMemObject(colRemapTableGPU);
     return RPP_SUCCESS;
-}
+}
+
+/******************** tensor_transpose ********************/
+
+RppStatus
+tensor_transpose_cl(cl_mem srcPtr, cl_mem dstPtr,  Rpp32u* in_dims, Rpp32u *perm, RPPTensorDataType data_type, rpp::Handle& handle)
+{
+    unsigned int out_dims[4];
+    out_dims[0] = in_dims[perm[0]];
+    out_dims[1] = in_dims[perm[1]];
+    out_dims[2] = in_dims[perm[2]];
+    out_dims[3] = in_dims[perm[3]];
+    unsigned int in_strides[4], out_strides[4];
+    in_strides[0] = in_dims[1] * in_dims[2] * in_dims[3];
+    in_strides[1] = in_dims[2] * in_dims[3];
+    in_strides[2] = in_dims[3];
+    in_strides[3] = 1;
+    out_strides[0] = out_dims[1] * out_dims[2] * out_dims[3];
+    out_strides[1] = out_dims[2] * out_dims[3];
+    out_strides[2] = out_dims[3];
+    out_strides[3] = 1;
+    cl_mem d_perm, d_out_strides, d_in_strides, d_out_dims;
+    cl_context theContext;
+    cl_int err;
+    clGetCommandQueueInfo(handle.GetStream(), CL_QUEUE_CONTEXT, sizeof(cl_context), &theContext, NULL);
+    d_perm = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
+    d_in_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
+    d_out_strides = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
+    d_out_dims = clCreateBuffer(theContext, CL_MEM_READ_ONLY, sizeof(unsigned int) * 4, NULL, NULL);
+    clEnqueueWriteBuffer(handle.GetStream(), d_perm, CL_TRUE, 0, sizeof(unsigned int) * 4, perm, 0, NULL, NULL);
+    clEnqueueWriteBuffer(handle.GetStream(), d_in_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, in_strides, 0, NULL, NULL);
+    clEnqueueWriteBuffer(handle.GetStream(), d_out_strides, CL_TRUE, 0, sizeof(unsigned int) * 4, out_strides, 0, NULL, NULL);
+    clEnqueueWriteBuffer(handle.GetStream(), d_out_dims, CL_TRUE, 0, sizeof(unsigned int) * 4, out_dims, 0, NULL, NULL);
+    std::vector<size_t> vld{16, 16, 1};
+    std::vector<size_t> vgd{out_dims[0], out_dims[1], out_dims[2] * out_dims[3]};
+    std::string kernel_name = "tensor_transpose";
+    if(data_type == RPPTensorDataType::U8)
+        kernel_name = "tensor_transpose";
+    if(data_type == RPPTensorDataType::FP32)
+        kernel_name = "tensor_transpose_fp32";
+    if(data_type == RPPTensorDataType::FP16)
+        kernel_name = "tensor_transpose_fp16";
+    if(data_type == RPPTensorDataType::I8)
+        kernel_name = "tensor_transpose_int8";
+
+    handle.AddKernel("", "", "tensor.cl", kernel_name, vld, vgd, "")(srcPtr,
+                                                                     dstPtr,
+                                                                     d_out_dims,
+                                                                     d_perm,
+                                                                     d_out_strides,
+                                                                     d_in_strides);
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/cpu/host_advanced_augmentations.hpp b/src/modules/cpu/host_advanced_augmentations.hpp
@@ -2403,41 +2403,6 @@ RppStatus lut_host_batch(T* srcPtr, RppiSize *batch_srcSize, RppiSize *batch_src
     return RPP_SUCCESS;
 }
 
-template <typename T>
-RppStatus transpose_host(T* srcPtr, T* dstPtr, Rpp32u *perm, Rpp32u *shape)
-{
-    T *dstPtrTemp;
-    dstPtrTemp = dstPtr;
-
-    Rpp32u numElements[4] = {
-        shape[1] * shape[2] * shape[3],
-        shape[2] * shape[3],
-        shape[3],
-        1
-    };
-
-    for (int i = 0; i < shape[perm[0]]; i++)
-    {
-        for (int j = 0; j < shape[perm[1]]; j++)
-        {
-            for (int k = 0; k < shape[perm[2]]; k++)
-            {
-                for (int l = 0; l < shape[perm[3]]; l++)
-                {
-                    *dstPtrTemp = *(srcPtr + (
-                        (i * numElements[perm[0]]) +
-                        (j * numElements[perm[1]]) +
-                        (k * numElements[perm[2]]) +
-                        (l * numElements[perm[3]])
-                    ));
-                    dstPtrTemp++;
-                }
-            }
-        }
-    }
-    return RPP_SUCCESS;
-}
-
 /**************** glitch ***************/
 
 template <typename T>