cherry-pick #1353 and #1509

cherry-pick #1353 and #1509 and resolve the build issues
open-mmlab · Dec 13, 2022 · 78901a2 · 78901a2
2 parents e9912d9 + 5b96f20
commit 78901a2
Show file tree

Hide file tree

Showing 130 changed files with 2,975 additions and 3,949 deletions.
diff --git a/csrc/mmdeploy/CMakeLists.txt b/csrc/mmdeploy/CMakeLists.txt
@@ -13,6 +13,7 @@ if (MMDEPLOY_BUILD_SDK)
     add_subdirectory(device)
     add_subdirectory(graph)
     add_subdirectory(model)
+    add_subdirectory(operation)
     add_subdirectory(preprocess)
     add_subdirectory(net)
     add_subdirectory(codebase)

diff --git a/csrc/mmdeploy/codebase/mmaction/CMakeLists.txt b/csrc/mmdeploy/codebase/mmaction/CMakeLists.txt
@@ -7,7 +7,8 @@ mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 add_subdirectory(cpu)
 add_subdirectory(cuda)
 target_link_libraries(${PROJECT_NAME} PRIVATE
-    mmdeploy::transform
+    mmdeploy_operation
+    mmdeploy_transform
     mmdeploy_opencv_utils)
 
 add_library(mmdeploy::mmaction ALIAS ${PROJECT_NAME})

diff --git a/csrc/mmdeploy/codebase/mmaction/cpu/format_shape_impl.cpp b/csrc/mmdeploy/codebase/mmaction/cpu/format_shape_impl.cpp
@@ -5,79 +5,18 @@
 
 using namespace std;
 
-namespace mmdeploy {
-namespace cpu {
+namespace mmdeploy::mmaction::cpu {
 
-class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
+class FormatShapeImpl : public FormatShapeOp {
  public:
-  explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
+  explicit FormatShapeImpl(std::string input_format) : FormatShapeOp(std::move(input_format)) {}
 
  protected:
-  Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
-    int N = tensors.size();
-    int H = tensors[0].shape(1);
-    int W = tensors[0].shape(2);
-    int C = tensors[0].shape(3);
+  Device host_{0, 0};
 
-    std::vector<Tensor> host_tensors;
-    host_tensors.reserve(N);
-    for (int i = 0; i < N; i++) {
-      OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensors[i], kHost, stream_));
-      host_tensors.push_back(std::move(src_tensor));
-    }
-    OUTCOME_TRY(stream_.Wait());
-
-    TensorDesc desc = {kHost, DataType::kFLOAT, {N, H, W, C}};
-    Tensor imgs(desc);
-    int offset = 0;
-    int n_item = H * W * C;
-    int copy_size = n_item * sizeof(float);
-    for (int i = 0; i < N; i++) {
-      auto src_buffer = host_tensors[i].buffer();
-      auto dst_buffer = imgs.buffer();
-      OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
-      offset += copy_size;
-    }
-    OUTCOME_TRY(stream_.Wait());
-
-    Tensor dst;
-    if (arg_.input_format == "NCHW") {
-      OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
-    }
-    if (arg_.input_format == "NCTHW") {
-      OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
-    }
-    TensorShape expand_dim = dst.shape();
-    expand_dim.insert(expand_dim.begin(), 1);
-    dst.Reshape(expand_dim);
-
-    return dst;
-  }
+  const Device& GetDevice() { return host_; }
 
-  Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
-    int N = src.shape(0);
-    int H = src.shape(1);
-    int W = src.shape(2);
-    int C = src.shape(3);
-    return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
-  };
-
-  Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
-    int N = src.shape(0);
-    int H = src.shape(1);
-    int W = src.shape(2);
-    int C = src.shape(3);
-    int L = clip_len;
-    if (N % L != 0) {
-      return Status(eInvalidArgument);
-    }
-    int M = N / L;
-    src.Reshape({M, L, H, W, C});
-
-    return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
-  };
-
-  Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
+  Result<Tensor> Transpose(Tensor& src, const TensorShape& src_dims,
                            const std::vector<int>& permutation) {
     Tensor dst(src.desc());
     TensorShape shape(src.shape().size());
@@ -119,11 +58,10 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
     } while (i >= 0);
     return dst;
   }
-
-  constexpr static Device kHost{0, 0};
 };
 
-MMDEPLOY_REGISTER_TRANSFORM_IMPL(::mmdeploy::FormatShapeImpl, (cpu, 0), FormatShapeImpl);
+MMDEPLOY_REGISTER_FACTORY_FUNC(FormatShapeOp, (cpu, 0), [](std::string input_format) {
+  return std::make_unique<FormatShapeImpl>(std::move(input_format));
+});
 
-}  // namespace cpu
-}  // namespace mmdeploy
+}  // namespace mmdeploy::mmaction::cpu
diff --git a/csrc/mmdeploy/codebase/mmaction/cuda/CMakeLists.txt b/csrc/mmdeploy/codebase/mmaction/cuda/CMakeLists.txt
@@ -11,8 +11,8 @@ if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
     target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
 endif ()
 target_include_directories(${PROJECT_NAME} PRIVATE
-            ${CUDA_INCLUDE_DIRS})
+    ${CUDA_INCLUDE_DIRS})
 target_link_libraries(${PROJECT_NAME} PRIVATE
-        mmdeploy::core)
+    mmdeploy::core)
 target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
 mmdeploy_export(${PROJECT_NAME})
diff --git a/csrc/mmdeploy/codebase/mmaction/cuda/format_shape_impl.cpp b/csrc/mmdeploy/codebase/mmaction/cuda/format_shape_impl.cpp
@@ -6,75 +6,20 @@
 
 using namespace std;
 
-namespace mmdeploy {
-namespace cuda {
+namespace mmdeploy::mmaction::cuda {
 
 template <typename T>
 void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
                int total, cudaStream_t stream);
 
-class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
+class FormatShapeImpl : public FormatShapeOp {
  public:
-  explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
+  explicit FormatShapeImpl(std::string input_format) : FormatShapeOp(std::move(input_format)) {}
 
  protected:
-  Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
-    int N = tensors.size();
-    int H = tensors[0].shape(1);
-    int W = tensors[0].shape(2);
-    int C = tensors[0].shape(3);
+  const Device& GetDevice() { return device(); }
 
-    auto t0 = std::chrono::high_resolution_clock::now();
-    TensorDesc desc = {device_, DataType::kFLOAT, {N, H, W, C}};
-    Tensor imgs(desc);
-    int offset = 0;
-    int n_item = H * W * C;
-    int copy_size = n_item * sizeof(float);
-    for (int i = 0; i < N; i++) {
-      auto src_buffer = tensors[i].buffer();
-      auto dst_buffer = imgs.buffer();
-      OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
-      offset += copy_size;
-    }
-
-    Tensor dst;
-    if (arg_.input_format == "NCHW") {
-      OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
-    }
-    if (arg_.input_format == "NCTHW") {
-      OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
-    }
-    TensorShape expand_dim = dst.shape();
-    expand_dim.insert(expand_dim.begin(), 1);
-    dst.Reshape(expand_dim);
-
-    return dst;
-  }
-
-  Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
-    int N = src.shape(0);
-    int H = src.shape(1);
-    int W = src.shape(2);
-    int C = src.shape(3);
-    return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
-  };
-
-  Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
-    int N = src.shape(0);
-    int H = src.shape(1);
-    int W = src.shape(2);
-    int C = src.shape(3);
-    int L = clip_len;
-    if (N % L != 0) {
-      return Status(eInvalidArgument);
-    }
-    int M = N / L;
-    src.Reshape({M, L, H, W, C});
-
-    return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
-  };
-
-  Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
+  Result<Tensor> Transpose(Tensor& src, const TensorShape& src_dims,
                            const std::vector<int>& permutation) {
     Tensor dst(src.desc());
     TensorShape shape(src.shape().size());
@@ -83,7 +28,7 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
     }
     dst.Reshape(shape);
 
-    int ndim = src_dims.size();
+    auto ndim = src_dims.size();
     std::vector<int> dst_dims(ndim);
     for (int i = 0; i < ndim; i++) {
       dst_dims[i] = src_dims[permutation[i]];
@@ -104,17 +49,18 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
 
     Buffer _src_strides(Device("cuda"), sizeof(int) * ndim);
     Buffer _dst_strides(Device("cuda"), sizeof(int) * ndim);
-    OUTCOME_TRY(stream_.Copy(src_strides.data(), _src_strides));
-    OUTCOME_TRY(stream_.Copy(dst_strides.data(), _dst_strides));
+    OUTCOME_TRY(stream().Copy(src_strides.data(), _src_strides));
+    OUTCOME_TRY(stream().Copy(dst_strides.data(), _dst_strides));
 
-    ::mmdeploy::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides), dst.data<float>(),
-                                GetNative<int*>(_dst_strides), ndim, src.size(),
-                                (cudaStream_t)stream_.GetNative());
+    ::mmdeploy::mmaction::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides),
+                                          dst.data<float>(), GetNative<int*>(_dst_strides), ndim,
+                                          src.size(), (cudaStream_t)stream().GetNative());
     return dst;
   }
 };
 
-MMDEPLOY_REGISTER_TRANSFORM_IMPL(::mmdeploy::FormatShapeImpl, (cuda, 0), FormatShapeImpl);
+MMDEPLOY_REGISTER_FACTORY_FUNC(FormatShapeOp, (cuda, 0), [](std::string input_format) {
+  return std::make_unique<FormatShapeImpl>(std::move(input_format));
+});
 
-}  // namespace cuda
-}  // namespace mmdeploy
+}  // namespace mmdeploy::mmaction::cuda
diff --git a/csrc/mmdeploy/codebase/mmaction/cuda/transpose.cu b/csrc/mmdeploy/codebase/mmaction/cuda/transpose.cu
@@ -3,14 +3,15 @@
 #include <stdint.h>
 #include <stdio.h>
 
-namespace mmdeploy {
-namespace cuda {
+namespace mmdeploy::mmaction::cuda {
 
 template <typename T>
 __global__ void transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides,
                           int ndim, int total) {
   int u = blockIdx.x * blockDim.x + threadIdx.x;
-  if (u >= total) return;
+  if (u >= total) {
+    return;
+  }
 
   int remaining = u;
   int v = 0;
@@ -34,5 +35,4 @@ void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_stri
 template void Transpose<float>(const float* src, const int* src_strides, float* dst,
                                const int* dst_strides, int ndim, int total, cudaStream_t stream);
 
-}  // namespace cuda
-}  // namespace mmdeploy
+}  // namespace mmdeploy::mmaction::cuda