Skip to content

Commit

Permalink
cherry-pick #1353 and #1509
Browse files Browse the repository at this point in the history
cherry-pick #1353 and #1509 and resolve the build issues
  • Loading branch information
lvhan028 authored Dec 13, 2022
2 parents e9912d9 + 5b96f20 commit 78901a2
Show file tree
Hide file tree
Showing 130 changed files with 2,975 additions and 3,949 deletions.
1 change: 1 addition & 0 deletions csrc/mmdeploy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ if (MMDEPLOY_BUILD_SDK)
add_subdirectory(device)
add_subdirectory(graph)
add_subdirectory(model)
add_subdirectory(operation)
add_subdirectory(preprocess)
add_subdirectory(net)
add_subdirectory(codebase)
Expand Down
3 changes: 2 additions & 1 deletion csrc/mmdeploy/codebase/mmaction/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
add_subdirectory(cpu)
add_subdirectory(cuda)
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::transform
mmdeploy_operation
mmdeploy_transform
mmdeploy_opencv_utils)

add_library(mmdeploy::mmaction ALIAS ${PROJECT_NAME})
Expand Down
82 changes: 10 additions & 72 deletions csrc/mmdeploy/codebase/mmaction/cpu/format_shape_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,79 +5,18 @@

using namespace std;

namespace mmdeploy {
namespace cpu {
namespace mmdeploy::mmaction::cpu {

class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
class FormatShapeImpl : public FormatShapeOp {
public:
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
explicit FormatShapeImpl(std::string input_format) : FormatShapeOp(std::move(input_format)) {}

protected:
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
int N = tensors.size();
int H = tensors[0].shape(1);
int W = tensors[0].shape(2);
int C = tensors[0].shape(3);
Device host_{0, 0};

std::vector<Tensor> host_tensors;
host_tensors.reserve(N);
for (int i = 0; i < N; i++) {
OUTCOME_TRY(auto src_tensor, MakeAvailableOnDevice(tensors[i], kHost, stream_));
host_tensors.push_back(std::move(src_tensor));
}
OUTCOME_TRY(stream_.Wait());

TensorDesc desc = {kHost, DataType::kFLOAT, {N, H, W, C}};
Tensor imgs(desc);
int offset = 0;
int n_item = H * W * C;
int copy_size = n_item * sizeof(float);
for (int i = 0; i < N; i++) {
auto src_buffer = host_tensors[i].buffer();
auto dst_buffer = imgs.buffer();
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
offset += copy_size;
}
OUTCOME_TRY(stream_.Wait());

Tensor dst;
if (arg_.input_format == "NCHW") {
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
}
if (arg_.input_format == "NCTHW") {
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
}
TensorShape expand_dim = dst.shape();
expand_dim.insert(expand_dim.begin(), 1);
dst.Reshape(expand_dim);

return dst;
}
const Device& GetDevice() { return host_; }

Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
};

Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
int L = clip_len;
if (N % L != 0) {
return Status(eInvalidArgument);
}
int M = N / L;
src.Reshape({M, L, H, W, C});

return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
};

Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
Result<Tensor> Transpose(Tensor& src, const TensorShape& src_dims,
const std::vector<int>& permutation) {
Tensor dst(src.desc());
TensorShape shape(src.shape().size());
Expand Down Expand Up @@ -119,11 +58,10 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
} while (i >= 0);
return dst;
}

constexpr static Device kHost{0, 0};
};

MMDEPLOY_REGISTER_TRANSFORM_IMPL(::mmdeploy::FormatShapeImpl, (cpu, 0), FormatShapeImpl);
MMDEPLOY_REGISTER_FACTORY_FUNC(FormatShapeOp, (cpu, 0), [](std::string input_format) {
return std::make_unique<FormatShapeImpl>(std::move(input_format));
});

} // namespace cpu
} // namespace mmdeploy
} // namespace mmdeploy::mmaction::cpu
4 changes: 2 additions & 2 deletions csrc/mmdeploy/codebase/mmaction/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ if (NOT (MMDEPLOY_SHARED_LIBS OR MSVC))
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
endif ()
target_include_directories(${PROJECT_NAME} PRIVATE
${CUDA_INCLUDE_DIRS})
${CUDA_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy::core)
mmdeploy::core)
target_link_libraries(mmdeploy_mmaction PRIVATE ${PROJECT_NAME})
mmdeploy_export(${PROJECT_NAME})
84 changes: 15 additions & 69 deletions csrc/mmdeploy/codebase/mmaction/cuda/format_shape_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,75 +6,20 @@

using namespace std;

namespace mmdeploy {
namespace cuda {
namespace mmdeploy::mmaction::cuda {

template <typename T>
void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides, int ndim,
int total, cudaStream_t stream);

class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
class FormatShapeImpl : public FormatShapeOp {
public:
explicit FormatShapeImpl(const Value& args) : ::mmdeploy::FormatShapeImpl(args) {}
explicit FormatShapeImpl(std::string input_format) : FormatShapeOp(std::move(input_format)) {}

protected:
Result<Tensor> Format(const std::vector<Tensor>& tensors, int clip_len, int num_clips) {
int N = tensors.size();
int H = tensors[0].shape(1);
int W = tensors[0].shape(2);
int C = tensors[0].shape(3);
const Device& GetDevice() { return device(); }

auto t0 = std::chrono::high_resolution_clock::now();
TensorDesc desc = {device_, DataType::kFLOAT, {N, H, W, C}};
Tensor imgs(desc);
int offset = 0;
int n_item = H * W * C;
int copy_size = n_item * sizeof(float);
for (int i = 0; i < N; i++) {
auto src_buffer = tensors[i].buffer();
auto dst_buffer = imgs.buffer();
OUTCOME_TRY(stream_.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
offset += copy_size;
}

Tensor dst;
if (arg_.input_format == "NCHW") {
OUTCOME_TRY(dst, FormatNCHW(imgs, clip_len, num_clips));
}
if (arg_.input_format == "NCTHW") {
OUTCOME_TRY(dst, FormatNCTHW(imgs, clip_len, num_clips));
}
TensorShape expand_dim = dst.shape();
expand_dim.insert(expand_dim.begin(), 1);
dst.Reshape(expand_dim);

return dst;
}

Result<Tensor> FormatNCHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
return Transpose(src, {N, H, W, C}, {0, 3, 1, 2});
};

Result<Tensor> FormatNCTHW(Tensor& src, int clip_len, int num_clips) {
int N = src.shape(0);
int H = src.shape(1);
int W = src.shape(2);
int C = src.shape(3);
int L = clip_len;
if (N % L != 0) {
return Status(eInvalidArgument);
}
int M = N / L;
src.Reshape({M, L, H, W, C});

return Transpose(src, {M, L, H, W, C}, {0, 4, 1, 2, 3});
};

Result<Tensor> Transpose(Tensor& src, const std::vector<int>& src_dims,
Result<Tensor> Transpose(Tensor& src, const TensorShape& src_dims,
const std::vector<int>& permutation) {
Tensor dst(src.desc());
TensorShape shape(src.shape().size());
Expand All @@ -83,7 +28,7 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {
}
dst.Reshape(shape);

int ndim = src_dims.size();
auto ndim = src_dims.size();
std::vector<int> dst_dims(ndim);
for (int i = 0; i < ndim; i++) {
dst_dims[i] = src_dims[permutation[i]];
Expand All @@ -104,17 +49,18 @@ class FormatShapeImpl : public ::mmdeploy::FormatShapeImpl {

Buffer _src_strides(Device("cuda"), sizeof(int) * ndim);
Buffer _dst_strides(Device("cuda"), sizeof(int) * ndim);
OUTCOME_TRY(stream_.Copy(src_strides.data(), _src_strides));
OUTCOME_TRY(stream_.Copy(dst_strides.data(), _dst_strides));
OUTCOME_TRY(stream().Copy(src_strides.data(), _src_strides));
OUTCOME_TRY(stream().Copy(dst_strides.data(), _dst_strides));

::mmdeploy::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides), dst.data<float>(),
GetNative<int*>(_dst_strides), ndim, src.size(),
(cudaStream_t)stream_.GetNative());
::mmdeploy::mmaction::cuda::Transpose(src.data<float>(), GetNative<int*>(_src_strides),
dst.data<float>(), GetNative<int*>(_dst_strides), ndim,
src.size(), (cudaStream_t)stream().GetNative());
return dst;
}
};

MMDEPLOY_REGISTER_TRANSFORM_IMPL(::mmdeploy::FormatShapeImpl, (cuda, 0), FormatShapeImpl);
MMDEPLOY_REGISTER_FACTORY_FUNC(FormatShapeOp, (cuda, 0), [](std::string input_format) {
return std::make_unique<FormatShapeImpl>(std::move(input_format));
});

} // namespace cuda
} // namespace mmdeploy
} // namespace mmdeploy::mmaction::cuda
10 changes: 5 additions & 5 deletions csrc/mmdeploy/codebase/mmaction/cuda/transpose.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
#include <stdint.h>
#include <stdio.h>

namespace mmdeploy {
namespace cuda {
namespace mmdeploy::mmaction::cuda {

template <typename T>
__global__ void transpose(const T* src, const int* src_strides, T* dst, const int* dst_strides,
int ndim, int total) {
int u = blockIdx.x * blockDim.x + threadIdx.x;
if (u >= total) return;
if (u >= total) {
return;
}

int remaining = u;
int v = 0;
Expand All @@ -34,5 +35,4 @@ void Transpose(const T* src, const int* src_strides, T* dst, const int* dst_stri
template void Transpose<float>(const float* src, const int* src_strides, float* dst,
const int* dst_strides, int ndim, int total, cudaStream_t stream);

} // namespace cuda
} // namespace mmdeploy
} // namespace mmdeploy::mmaction::cuda
Loading

0 comments on commit 78901a2

Please sign in to comment.