From 3d3aed8a2131d61248aacc2ce633a6de0f207439 Mon Sep 17 00:00:00 2001 From: nihuini Date: Wed, 2 Nov 2022 19:27:44 +0800 Subject: [PATCH 01/10] implement fold and unfold --- src/CMakeLists.txt | 2 + src/layer/fold.cpp | 132 ++++++++++++++++++++++++++++++++++++++++ src/layer/fold.h | 48 +++++++++++++++ src/layer/unfold.cpp | 141 +++++++++++++++++++++++++++++++++++++++++++ src/layer/unfold.h | 50 +++++++++++++++ 5 files changed, 373 insertions(+) create mode 100644 src/layer/fold.cpp create mode 100644 src/layer/fold.h create mode 100644 src/layer/unfold.cpp create mode 100644 src/layer/unfold.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ab41ea9dc1e..58d826a4d8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -157,6 +157,8 @@ ncnn_add_layer(DeconvolutionDepthWise3D) ncnn_add_layer(Einsum) ncnn_add_layer(DeformableConv2D) ncnn_add_layer(GLU) +ncnn_add_layer(Fold) +ncnn_add_layer(Unfold) if(NCNN_VULKAN) ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp) diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp new file mode 100644 index 00000000000..608be635e40 --- /dev/null +++ b/src/layer/fold.cpp @@ -0,0 +1,132 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "fold.h" + +#include + +namespace ncnn { + +Fold::Fold() +{ + one_blob_only = true; +} + +int Fold::load_param(const ParamDict& pd) +{ + kernel_w = pd.get(1, 0); + kernel_h = pd.get(11, kernel_w); + dilation_w = pd.get(2, 1); + dilation_h = pd.get(12, dilation_w); + stride_w = pd.get(3, 1); + stride_h = pd.get(13, stride_w); + pad_left = pd.get(4, 0); + pad_right = pd.get(15, pad_left); + pad_top = pd.get(14, pad_left); + pad_bottom = pd.get(16, pad_top); + output_w = pd.get(20, 0); + output_h = pd.get(21, output_w); + + return 0; +} + +int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + const int size = bottom_blob.w; + const int max_channels = bottom_blob.h; + size_t elemsize = bottom_blob.elemsize; + + const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; + const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; + + const int outw = output_w + pad_left + pad_right; + const int outh = output_h + pad_top + pad_bottom; + + const int inw = (outw - kernel_extent_w) / stride_w + 1; + const int inh = (outh - kernel_extent_h) / stride_h + 1; + + // assert inw * inh == size + + const int maxk = kernel_w * kernel_h; + const int channels = max_channels / maxk; + + Mat top_blob_bordered; + if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + top_blob_bordered.create(outw, outh, channels, elemsize, opt.workspace_allocator); + } + else + { + top_blob_bordered = top_blob; + top_blob_bordered.create(outw, outh, channels, elemsize, opt.blob_allocator); + } + if (top_blob_bordered.empty()) + return -100; + + // col2im + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < channels; p++) + { + float* ptr = top_blob_bordered.channel(p); + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + float sum = 0.f; + + const int sx_start = (j < kernel_extent_w) ? 0 : (j - kernel_extent_w) / stride_w + 1; + const int sx_end = std::min(j / stride_w + 1, inw); + + const int sy_start = (i < kernel_extent_h) ? 0 : (i - kernel_extent_h) / stride_h + 1; + const int sy_end = std::min(i / stride_h + 1, inh); + + for (int sy = sy_start; sy < sy_end; sy += 1) + { + for (int sx = sx_start; sx < sx_end; sx += 1) + { + int h_k = (i - sy * stride_h); + int w_k = (j - sx * stride_w); + + if (h_k % dilation_h == 0 && w_k % dilation_w == 0) + { + h_k /= dilation_h; + w_k /= dilation_w; + + sum += bottom_blob.row(p * maxk + h_k * kernel_w + w_k)[sy * inw + sx]; + } + } + } + + ptr[0] = sum; + ptr += 1; + } + } + } + + if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + if (top_blob.empty()) + return -100; + } + else + { + top_blob = top_blob_bordered; + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/fold.h b/src/layer/fold.h new file mode 100644 index 00000000000..e6eccad556a --- /dev/null +++ b/src/layer/fold.h @@ -0,0 +1,48 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_FOLD_H +#define LAYER_FOLD_H + +#include "layer.h" + +namespace ncnn { + +class Fold : public Layer +{ +public: + Fold(); + + virtual int load_param(const ParamDict& pd); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + +public: + int kernel_w; + int kernel_h; + int dilation_w; + int dilation_h; + int stride_w; + int stride_h; + int pad_left; // -233=SAME_UPPER -234=SAME_LOWER + int pad_right; + int pad_top; + int pad_bottom; + int output_w; + int output_h; +}; + +} // namespace ncnn + +#endif // LAYER_FOLD_H diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp new file mode 100644 index 00000000000..5f26d71c4d1 --- /dev/null +++ b/src/layer/unfold.cpp @@ -0,0 +1,141 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "unfold.h" + +namespace ncnn { + +Unfold::Unfold() +{ + one_blob_only = true; +} + +int Unfold::load_param(const ParamDict& pd) +{ + kernel_w = pd.get(1, 0); + kernel_h = pd.get(11, kernel_w); + dilation_w = pd.get(2, 1); + dilation_h = pd.get(12, dilation_w); + stride_w = pd.get(3, 1); + stride_h = pd.get(13, stride_w); + pad_left = pd.get(4, 0); + pad_right = pd.get(15, pad_left); + pad_top = pd.get(14, pad_left); + pad_bottom = pd.get(16, pad_top); + pad_value = pd.get(18, 0.f); + + return 0; +} + +int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + Mat bottom_blob_bordered; + make_padding(bottom_blob, bottom_blob_bordered, opt); + if (bottom_blob_bordered.empty()) + return -100; + + const int w = bottom_blob_bordered.w; + const int h = bottom_blob_bordered.h; + const int channels = bottom_blob_bordered.c; + const size_t elemsize = bottom_blob_bordered.elemsize; + + const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; + const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; + + const int outw = (w - kernel_extent_w) / stride_w + 1; + const int outh = (h - kernel_extent_h) / stride_h + 1; + + const int size = outw * outh; + const int maxk = kernel_w * kernel_h; + + top_blob.create(size, maxk * channels, elemsize, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + // im2col + const int gap = w * stride_h - outw * stride_w; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < channels; p++) + { + const Mat img = bottom_blob_bordered.channel(p); + float* ptr = top_blob.row(p * maxk); + + for (int u = 0; u < kernel_h; u++) + { + for (int v = 0; v < kernel_w; v++) + { + const float* sptr = bottom_blob_bordered.row(dilation_h * u) + dilation_w * v; + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + ptr[0] = sptr[0]; + + sptr += stride_w; + ptr += 1; + } + + sptr += gap; + } + } + } + } + + return 0; +} + +void Unfold::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const +{ + int w = bottom_blob.w; + int h = bottom_blob.h; + + const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; + const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; + + bottom_blob_bordered = bottom_blob; + if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, pad_value, opt_b); + } + else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233) + { + // tensorflow padding=SAME or onnx padding=SAME_UPPER + int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w; + int hpad = kernel_extent_h + (h - 1) / stride_h * stride_h - h; + if (wpad > 0 || hpad > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, pad_value, opt_b); + } + } + else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234) + { + // onnx padding=SAME_LOWER + int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w; + int hpad = kernel_extent_h + (h - 1) / stride_h * stride_h - h; + if (wpad > 0 || hpad > 0) + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + copy_make_border(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, pad_value, opt_b); + } + } +} + +} // namespace ncnn diff --git a/src/layer/unfold.h b/src/layer/unfold.h new file mode 100644 index 00000000000..ff7860b7f72 --- /dev/null +++ b/src/layer/unfold.h @@ -0,0 +1,50 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef LAYER_UNFOLD_H +#define LAYER_UNFOLD_H + +#include "layer.h" + +namespace ncnn { + +class Unfold : public Layer +{ +public: + Unfold(); + + virtual int load_param(const ParamDict& pd); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; + +protected: + void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const; + +public: + int kernel_w; + int kernel_h; + int dilation_w; + int dilation_h; + int stride_w; + int stride_h; + int pad_left; // -233=SAME_UPPER -234=SAME_LOWER + int pad_right; + int pad_top; + int pad_bottom; + float pad_value; +}; + +} // namespace ncnn + +#endif // LAYER_UNFOLD_H From 633078c056442e9cd2b6319e33294867dbe380c6 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 11:05:22 +0800 Subject: [PATCH 02/10] add ncnn test --- src/layer/unfold.cpp | 2 +- tools/pnnx/src/CMakeLists.txt | 2 + tools/pnnx/src/pass_ncnn/nn_Fold.cpp | 63 +++++++++++++++++++++++ tools/pnnx/src/pass_ncnn/nn_Unfold.cpp | 61 ++++++++++++++++++++++ tools/pnnx/tests/ncnn/CMakeLists.txt | 2 + tools/pnnx/tests/ncnn/test_nn_Fold.py | 67 +++++++++++++++++++++++++ tools/pnnx/tests/ncnn/test_nn_Unfold.py | 65 ++++++++++++++++++++++++ 7 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 tools/pnnx/src/pass_ncnn/nn_Fold.cpp create mode 100644 tools/pnnx/src/pass_ncnn/nn_Unfold.cpp create mode 100644 tools/pnnx/tests/ncnn/test_nn_Fold.py create mode 100644 tools/pnnx/tests/ncnn/test_nn_Unfold.py diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp index 5f26d71c4d1..a2e2e053ff8 100644 --- a/src/layer/unfold.cpp +++ b/src/layer/unfold.cpp @@ -76,7 +76,7 @@ int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) co { for (int v = 0; v < kernel_w; v++) { - const float* sptr = bottom_blob_bordered.row(dilation_h * u) + dilation_w * v; + const float* sptr = img.row(dilation_h * u) + dilation_w * v; for (int i = 0; i < outh; i++) { diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index f48767d4fbe..f3fb6bf9cd8 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -427,6 +427,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/nn_ConvTranspose3d.cpp pass_ncnn/nn_ELU.cpp pass_ncnn/nn_Embedding.cpp + pass_ncnn/nn_Fold.cpp pass_ncnn/nn_GELU.cpp pass_ncnn/nn_GLU.cpp pass_ncnn/nn_GroupNorm.cpp @@ -461,6 +462,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/nn_Softmax.cpp pass_ncnn/nn_Softmax2d.cpp pass_ncnn/nn_Tanh.cpp + pass_ncnn/nn_Unfold.cpp pass_ncnn/nn_Upsample.cpp pass_ncnn/nn_UpsamplingBilinear2d.cpp pass_ncnn/nn_UpsamplingNearest2d.cpp diff --git a/tools/pnnx/src/pass_ncnn/nn_Fold.cpp b/tools/pnnx/src/pass_ncnn/nn_Fold.cpp new file mode 100644 index 00000000000..d94bc68b030 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/nn_Fold.cpp @@ -0,0 +1,63 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class nn_Fold : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +nn.Fold op_0 1 1 input out output_size=%output_size kernel_size=%kernel_size stride=%stride padding=%padding dilation=%dilation +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Fold"; + } + + const char* name_str() const + { + return "fold"; + } + + void write(Operator* op, const std::map& captured_params) const + { + op->params["1"] = captured_params.at("kernel_size").ai[1]; + op->params["11"] = captured_params.at("kernel_size").ai[0]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["20"] = captured_params.at("output_size").ai[1]; + op->params["21"] = captured_params.at("output_size").ai[0]; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(nn_Fold, 20) + +} // namespace ncnn + +} // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp b/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp new file mode 100644 index 00000000000..526e5d24c38 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp @@ -0,0 +1,61 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class nn_Unfold : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +nn.Unfold op_0 1 1 input out kernel_size=%kernel_size stride=%stride padding=%padding dilation=%dilation +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Unfold"; + } + + const char* name_str() const + { + return "unfold"; + } + + void write(Operator* op, const std::map& captured_params) const + { + op->params["1"] = captured_params.at("kernel_size").ai[1]; + op->params["11"] = captured_params.at("kernel_size").ai[0]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(nn_Unfold, 20) + +} // namespace ncnn + +} // namespace pnnx diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt index b14a1716e3b..69579d14173 100644 --- a/tools/pnnx/tests/ncnn/CMakeLists.txt +++ b/tools/pnnx/tests/ncnn/CMakeLists.txt @@ -84,6 +84,7 @@ pnnx_ncnn_add_test(nn_Dropout2d) pnnx_ncnn_add_test(nn_Dropout3d) pnnx_ncnn_add_test(nn_ELU) pnnx_ncnn_add_test(nn_Embedding) +pnnx_ncnn_add_test(nn_Fold) pnnx_ncnn_add_test(nn_GELU) pnnx_ncnn_add_test(nn_GLU) pnnx_ncnn_add_test(nn_GroupNorm) @@ -117,6 +118,7 @@ pnnx_ncnn_add_test(nn_SiLU) pnnx_ncnn_add_test(nn_Softmax) pnnx_ncnn_add_test(nn_Softmax2d) pnnx_ncnn_add_test(nn_Tanh) +pnnx_ncnn_add_test(nn_Unfold) pnnx_ncnn_add_test(nn_Upsample) pnnx_ncnn_add_test(nn_UpsamplingBilinear2d) pnnx_ncnn_add_test(nn_UpsamplingNearest2d) diff --git a/tools/pnnx/tests/ncnn/test_nn_Fold.py b/tools/pnnx/tests/ncnn/test_nn_Fold.py new file mode 100644 index 00000000000..0ec407979fc --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_nn_Fold.py @@ -0,0 +1,67 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from packaging import version + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + self.fold_0 = nn.Fold(output_size=22, kernel_size=3) + self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) + self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + + def forward(self, x, y, z): + x = self.fold_0(x) + y = self.fold_1(y) + z = self.fold_2(z) + + return x, y, z + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(1, 108, 400) + y = torch.rand(1, 96, 190) + z = torch.rand(1, 33, 153) + + a = net(x, y, z) + + # export torchscript + mod = torch.jit.trace(net, (x, y, z)) + mod.save("test_nn_Fold.pt") + + # torchscript to pnnx + import os + os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + + # ncnn inference + import test_nn_Fold_ncnn + b = test_nn_Fold_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-4, 1e-4): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) diff --git a/tools/pnnx/tests/ncnn/test_nn_Unfold.py b/tools/pnnx/tests/ncnn/test_nn_Unfold.py new file mode 100644 index 00000000000..dbe588127d5 --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_nn_Unfold.py @@ -0,0 +1,65 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from packaging import version + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + self.unfold_0 = nn.Unfold(kernel_size=3) + self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) + self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + + def forward(self, x): + x0 = self.unfold_0(x) + x1 = self.unfold_1(x) + x2 = self.unfold_2(x) + + return x0, x1, x2 + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(1, 12, 64, 64) + + a = net(x) + + # export torchscript + mod = torch.jit.trace(net, x) + mod.save("test_nn_Unfold.pt") + + # torchscript to ncnn + import os + os.system("../../src/pnnx test_nn_Unfold.pt inputshape=[1,12,64,64]") + + # ncnn inference + import test_nn_Unfold_ncnn + b = test_nn_Unfold_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-4, 1e-4): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) From c29f7a23aeb1c5b561b4f4340204786a17f03237 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 11:13:28 +0800 Subject: [PATCH 03/10] convert F.fold and F.unfold --- tools/pnnx/README.md | 8 ++-- tools/pnnx/src/CMakeLists.txt | 2 + tools/pnnx/src/pass_ncnn/F_fold.cpp | 63 ++++++++++++++++++++++++++ tools/pnnx/src/pass_ncnn/F_unfold.cpp | 61 +++++++++++++++++++++++++ tools/pnnx/tests/ncnn/CMakeLists.txt | 2 + tools/pnnx/tests/ncnn/test_F_fold.py | 63 ++++++++++++++++++++++++++ tools/pnnx/tests/ncnn/test_F_unfold.py | 61 +++++++++++++++++++++++++ 7 files changed, 256 insertions(+), 4 deletions(-) create mode 100644 tools/pnnx/src/pass_ncnn/F_fold.cpp create mode 100644 tools/pnnx/src/pass_ncnn/F_unfold.cpp create mode 100644 tools/pnnx/tests/ncnn/test_F_fold.py create mode 100644 tools/pnnx/tests/ncnn/test_F_unfold.py diff --git a/tools/pnnx/README.md b/tools/pnnx/README.md index 0a153dafe2b..eac02294193 100644 --- a/tools/pnnx/README.md +++ b/tools/pnnx/README.md @@ -484,7 +484,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) { |nn.Embedding | :heavy_check_mark: | :heavy_check_mark: | |nn.EmbeddingBag | | |nn.Flatten | :heavy_check_mark: | -|nn.Fold | :heavy_check_mark: | +|nn.Fold | :heavy_check_mark: | :heavy_check_mark: | |nn.FractionalMaxPool2d | | |nn.FractionalMaxPool3d | | |nn.GELU | :heavy_check_mark: | :heavy_check_mark: | @@ -562,7 +562,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) { |nn.TransformerEncoder | | |nn.TransformerEncoderLayer | | |nn.Unflatten | | -|nn.Unfold | :heavy_check_mark: | +|nn.Unfold | :heavy_check_mark: | :heavy_check_mark: | |nn.Upsample | :heavy_check_mark: | :heavy_check_mark: | |nn.UpsamplingBilinear2d | :heavy_check_mark: | :heavy_check_mark: | |nn.UpsamplingNearest2d | :heavy_check_mark: | :heavy_check_mark: | @@ -600,7 +600,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) { |F.embedding | :heavy_check_mark: | :heavy_check_mark: | |F.embedding_bag | | |F.feature_alpha_dropout | :heavy_check_mark: | :heavy_check_mark: | -|F.fold | :heavy_check_mark: | +|F.fold | :heavy_check_mark: | :heavy_check_mark: | |F.fractional_max_pool2d | | |F.fractional_max_pool3d | | |F.gelu | :heavy_check_mark: | :heavy_check_mark: | @@ -656,7 +656,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) { |F.tanhshrink | :heavy_check_mark: | |F.threshold | :heavy_check_mark: | |F.threshold_ | :heavy_check_mark: | -|F.unfold | :heavy_check_mark: | +|F.unfold | :heavy_check_mark: | :heavy_check_mark: | |F.upsample | :heavy_check_mark: | :heavy_check_mark: | |F.upsample_bilinear | :heavy_check_mark: | :heavy_check_mark: | |F.upsample_nearest | :heavy_check_mark: | :heavy_check_mark: | diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt index f3fb6bf9cd8..2e0eb5d8456 100644 --- a/tools/pnnx/src/CMakeLists.txt +++ b/tools/pnnx/src/CMakeLists.txt @@ -372,6 +372,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/F_conv3d.cpp pass_ncnn/F_elu.cpp pass_ncnn/F_embedding.cpp + pass_ncnn/F_fold.cpp pass_ncnn/F_gelu.cpp pass_ncnn/F_glu.cpp pass_ncnn/F_group_norm.cpp @@ -400,6 +401,7 @@ set(pnnx_pass_ncnn_SRCS pass_ncnn/F_silu.cpp pass_ncnn/F_softmax.cpp pass_ncnn/F_tanh.cpp + pass_ncnn/F_unfold.cpp pass_ncnn/F_upsample_bilinear.cpp pass_ncnn/F_upsample_nearest.cpp pass_ncnn/F_upsample.cpp diff --git a/tools/pnnx/src/pass_ncnn/F_fold.cpp b/tools/pnnx/src/pass_ncnn/F_fold.cpp new file mode 100644 index 00000000000..1d35a72eb11 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/F_fold.cpp @@ -0,0 +1,63 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class F_fold : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +F.fold op_0 1 1 input out output_size=%output_size kernel_size=%kernel_size dilation=%dilation stride=%stride padding=%padding +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Fold"; + } + + const char* name_str() const + { + return "fold"; + } + + void write(Operator* op, const std::map& captured_params) const + { + op->params["1"] = captured_params.at("kernel_size").ai[1]; + op->params["11"] = captured_params.at("kernel_size").ai[0]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + op->params["20"] = captured_params.at("output_size").ai[1]; + op->params["21"] = captured_params.at("output_size").ai[0]; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_fold, 20) + +} // namespace ncnn + +} // namespace pnnx diff --git a/tools/pnnx/src/pass_ncnn/F_unfold.cpp b/tools/pnnx/src/pass_ncnn/F_unfold.cpp new file mode 100644 index 00000000000..14f82b08f99 --- /dev/null +++ b/tools/pnnx/src/pass_ncnn/F_unfold.cpp @@ -0,0 +1,61 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "pass_ncnn.h" + +namespace pnnx { + +namespace ncnn { + +class F_unfold : public GraphRewriterPass +{ +public: + const char* match_pattern_graph() const + { + return R"PNNXIR(7767517 +3 2 +pnnx.Input input 0 1 input +F.unfold op_0 1 1 input out kernel_size=%kernel_size dilation=%dilation stride=%stride padding=%padding +pnnx.Output output 1 0 out +)PNNXIR"; + } + + const char* type_str() const + { + return "Unfold"; + } + + const char* name_str() const + { + return "unfold"; + } + + void write(Operator* op, const std::map& captured_params) const + { + op->params["1"] = captured_params.at("kernel_size").ai[1]; + op->params["11"] = captured_params.at("kernel_size").ai[0]; + op->params["2"] = captured_params.at("dilation").ai[1]; + op->params["12"] = captured_params.at("dilation").ai[0]; + op->params["3"] = captured_params.at("stride").ai[1]; + op->params["13"] = captured_params.at("stride").ai[0]; + op->params["4"] = captured_params.at("padding").ai[1]; + op->params["14"] = captured_params.at("padding").ai[0]; + } +}; + +REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_unfold, 20) + +} // namespace ncnn + +} // namespace pnnx diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt index 69579d14173..f5688ce8d70 100644 --- a/tools/pnnx/tests/ncnn/CMakeLists.txt +++ b/tools/pnnx/tests/ncnn/CMakeLists.txt @@ -28,6 +28,7 @@ pnnx_ncnn_add_test(F_dropout3d) pnnx_ncnn_add_test(F_elu) pnnx_ncnn_add_test(F_embedding) pnnx_ncnn_add_test(F_feature_alpha_dropout) +pnnx_ncnn_add_test(F_fold) pnnx_ncnn_add_test(F_gelu) pnnx_ncnn_add_test(F_glu) pnnx_ncnn_add_test(F_group_norm) @@ -52,6 +53,7 @@ pnnx_ncnn_add_test(F_sigmoid) pnnx_ncnn_add_test(F_silu) pnnx_ncnn_add_test(F_softmax) pnnx_ncnn_add_test(F_tanh) +pnnx_ncnn_add_test(F_unfold) pnnx_ncnn_add_test(F_upsample_bilinear) pnnx_ncnn_add_test(F_upsample_nearest) pnnx_ncnn_add_test(F_upsample) diff --git a/tools/pnnx/tests/ncnn/test_F_fold.py b/tools/pnnx/tests/ncnn/test_F_fold.py new file mode 100644 index 00000000000..0a85d105bab --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_F_fold.py @@ -0,0 +1,63 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from packaging import version + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + def forward(self, x, y, z): + x = F.fold(x, output_size=22, kernel_size=3) + y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) + z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + + return x, y, z + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(1, 108, 400) + y = torch.rand(1, 96, 190) + z = torch.rand(1, 33, 153) + + a = net(x, y, z) + + # export torchscript + mod = torch.jit.trace(net, (x, y, z)) + mod.save("test_F_fold.pt") + + # torchscript to pnnx + import os + os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + + # ncnn inference + import test_F_fold_ncnn + b = test_F_fold_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-4, 1e-4): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) diff --git a/tools/pnnx/tests/ncnn/test_F_unfold.py b/tools/pnnx/tests/ncnn/test_F_unfold.py new file mode 100644 index 00000000000..606c7ada402 --- /dev/null +++ b/tools/pnnx/tests/ncnn/test_F_unfold.py @@ -0,0 +1,61 @@ +# Tencent is pleased to support the open source community by making ncnn available. +# +# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +import torch +import torch.nn as nn +import torch.nn.functional as F +from packaging import version + +class Model(nn.Module): + def __init__(self): + super(Model, self).__init__() + + def forward(self, x): + x0 = F.unfold(x, kernel_size=3) + x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) + x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + + return x0, x1, x2 + +def test(): + net = Model() + net.eval() + + torch.manual_seed(0) + x = torch.rand(1, 12, 64, 64) + + a = net(x) + + # export torchscript + mod = torch.jit.trace(net, x) + mod.save("test_F_unfold.pt") + + # torchscript to ncnn + import os + os.system("../../src/pnnx test_F_unfold.pt inputshape=[1,12,64,64]") + + # ncnn inference + import test_F_unfold_ncnn + b = test_F_unfold_ncnn.test_inference() + + for a0, b0 in zip(a, b): + if not torch.allclose(a0, b0, 1e-4, 1e-4): + return False + return True + +if __name__ == "__main__": + if test(): + exit(0) + else: + exit(1) From 4d2247cfa140f549a8a50f3a2233a84d77749f3e Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 11:45:57 +0800 Subject: [PATCH 04/10] add ncnn test --- tests/CMakeLists.txt | 2 + tests/test_fold.cpp | 58 ++++++++++++++++++++++ tests/test_unfold.cpp | 65 +++++++++++++++++++++++++ tools/pnnx/tests/ncnn/test_F_fold.py | 6 +-- tools/pnnx/tests/ncnn/test_F_unfold.py | 2 +- tools/pnnx/tests/ncnn/test_nn_Fold.py | 6 +-- tools/pnnx/tests/ncnn/test_nn_Unfold.py | 2 +- tools/pnnx/tests/test_F_fold.py | 6 +-- tools/pnnx/tests/test_F_unfold.py | 2 +- tools/pnnx/tests/test_nn_Fold.py | 6 +-- tools/pnnx/tests/test_nn_Unfold.py | 2 +- 11 files changed, 141 insertions(+), 16 deletions(-) create mode 100644 tests/test_fold.cpp create mode 100644 tests/test_unfold.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 73efa22868e..bf1d933bb48 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -85,6 +85,7 @@ ncnn_add_layer_test(Eltwise) ncnn_add_layer_test(ELU) ncnn_add_layer_test(ExpandDims) ncnn_add_layer_test(Flatten) +ncnn_add_layer_test(Fold) ncnn_add_layer_test(GELU) ncnn_add_layer_test(GLU) ncnn_add_layer_test(Gemm) @@ -135,4 +136,5 @@ ncnn_add_layer_test(Swish) ncnn_add_layer_test(TanH) ncnn_add_layer_test(Tile) ncnn_add_layer_test(UnaryOp) +ncnn_add_layer_test(Unfold) ncnn_add_layer_test(Yolov3DetectionOutput) diff --git a/tests/test_fold.cpp b/tests/test_fold.cpp new file mode 100644 index 00000000000..11a38428fdd --- /dev/null +++ b/tests/test_fold.cpp @@ -0,0 +1,58 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/fold.h" +#include "testutil.h" + +static int test_fold(int w, int h, int outw, int outh, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int pad_w, int pad_h) +{ + ncnn::Mat a = RandomMat(w, h); + + ncnn::ParamDict pd; + pd.set(1, kernel_w); + pd.set(11, kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_w); + pd.set(14, pad_h); + pd.set(20, outw); + pd.set(21, outh); + + std::vector weights(0); + + int ret = test_layer("Fold", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_fold failed w=%d h=%d outw=%d outh=%d kernel=%d,%d dilation=%d,%d stride=%d,%d pad=%d,%d\n", w, h, outw, outh, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, pad_w, pad_h); + } + + return ret; +} + +static int test_fold_0() +{ + return 0 + || test_fold(400, 108, 22, 22, 3, 3, 1, 1, 1, 1, 0, 0) + || test_fold(190, 96, 18, 17, 4, 2, 1, 1, 1, 2, 2, 2) + || test_fold(120, 36, 11, 5, 3, 2, 2, 1, 1, 1, 4, 2); +} + +int main() +{ + SRAND(7767517); + + return test_fold_0(); +} diff --git a/tests/test_unfold.cpp b/tests/test_unfold.cpp new file mode 100644 index 00000000000..4eea1d020ea --- /dev/null +++ b/tests/test_unfold.cpp @@ -0,0 +1,65 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer/unfold.h" +#include "testutil.h" + +static int test_unfold(int w, int h, int c, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int pad_w, int pad_h, float pad_value) +{ + ncnn::Mat a = RandomMat(w, h, c); + + ncnn::ParamDict pd; + pd.set(1, kernel_w); + pd.set(11, kernel_h); + pd.set(2, dilation_w); + pd.set(12, dilation_h); + pd.set(3, stride_w); + pd.set(13, stride_h); + pd.set(4, pad_w); + pd.set(14, pad_h); + pd.set(18, pad_value); + + std::vector weights(0); + + int ret = test_layer("Unfold", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_unfold failed w=%d h=%d c=%d kernel=%d,%d dilation=%d,%d stride=%d,%d pad=%d,%d pad_value=%f\n", w, h, c, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, pad_w, pad_h, pad_value); + } + + return ret; +} + +static int test_unfold_0() +{ + return 0 + || test_unfold(32, 32, 11, 3, 3, 1, 1, 1, 1, 0, 0, 0.f) + || test_unfold(32, 32, 12, 4, 2, 1, 1, 1, 2, 2, 2, -0.5f) + || test_unfold(32, 32, 16, 3, 2, 2, 1, 1, 1, 4, 2, 2.f); +} + +static int test_unfold_1() +{ + return 0 + || test_unfold(32, 32, 11, 3, 3, 1, 1, 1, 1, -233, -233, -0.5f) + || test_unfold(32, 32, 12, 4, 2, 1, 1, 1, 2, -234, -234, 0.f) + || test_unfold(32, 32, 16, 3, 2, 2, 1, 1, 1, -233, -233, 1.f); +} + +int main() +{ + SRAND(7767517); + + return test_unfold_0() || test_unfold_1(); +} diff --git a/tools/pnnx/tests/ncnn/test_F_fold.py b/tools/pnnx/tests/ncnn/test_F_fold.py index 0a85d105bab..54103fdf261 100644 --- a/tools/pnnx/tests/ncnn/test_F_fold.py +++ b/tools/pnnx/tests/ncnn/test_F_fold.py @@ -24,7 +24,7 @@ def __init__(self): def forward(self, x, y, z): x = F.fold(x, output_size=22, kernel_size=3) y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + z = F.fold(z, output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2)) return x, y, z @@ -35,7 +35,7 @@ def test(): torch.manual_seed(0) x = torch.rand(1, 108, 400) y = torch.rand(1, 96, 190) - z = torch.rand(1, 33, 153) + z = torch.rand(1, 36, 120) a = net(x, y, z) @@ -45,7 +45,7 @@ def test(): # torchscript to pnnx import os - os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]") # ncnn inference import test_F_fold_ncnn diff --git a/tools/pnnx/tests/ncnn/test_F_unfold.py b/tools/pnnx/tests/ncnn/test_F_unfold.py index 606c7ada402..e8e1a603cc3 100644 --- a/tools/pnnx/tests/ncnn/test_F_unfold.py +++ b/tools/pnnx/tests/ncnn/test_F_unfold.py @@ -24,7 +24,7 @@ def __init__(self): def forward(self, x): x0 = F.unfold(x, kernel_size=3) x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2)) return x0, x1, x2 diff --git a/tools/pnnx/tests/ncnn/test_nn_Fold.py b/tools/pnnx/tests/ncnn/test_nn_Fold.py index 0ec407979fc..8b07b2b5d38 100644 --- a/tools/pnnx/tests/ncnn/test_nn_Fold.py +++ b/tools/pnnx/tests/ncnn/test_nn_Fold.py @@ -23,7 +23,7 @@ def __init__(self): self.fold_0 = nn.Fold(output_size=22, kernel_size=3) self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2)) def forward(self, x, y, z): x = self.fold_0(x) @@ -39,7 +39,7 @@ def test(): torch.manual_seed(0) x = torch.rand(1, 108, 400) y = torch.rand(1, 96, 190) - z = torch.rand(1, 33, 153) + z = torch.rand(1, 36, 120) a = net(x, y, z) @@ -49,7 +49,7 @@ def test(): # torchscript to pnnx import os - os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]") # ncnn inference import test_nn_Fold_ncnn diff --git a/tools/pnnx/tests/ncnn/test_nn_Unfold.py b/tools/pnnx/tests/ncnn/test_nn_Unfold.py index dbe588127d5..8d618f76150 100644 --- a/tools/pnnx/tests/ncnn/test_nn_Unfold.py +++ b/tools/pnnx/tests/ncnn/test_nn_Unfold.py @@ -23,7 +23,7 @@ def __init__(self): self.unfold_0 = nn.Unfold(kernel_size=3) self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2)) def forward(self, x): x0 = self.unfold_0(x) diff --git a/tools/pnnx/tests/test_F_fold.py b/tools/pnnx/tests/test_F_fold.py index b85c41fe3b5..68c5b566d56 100644 --- a/tools/pnnx/tests/test_F_fold.py +++ b/tools/pnnx/tests/test_F_fold.py @@ -24,7 +24,7 @@ def __init__(self): def forward(self, x, y, z): x = F.fold(x, output_size=22, kernel_size=3) y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + z = F.fold(z, output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2)) return x, y, z @@ -35,7 +35,7 @@ def test(): torch.manual_seed(0) x = torch.rand(1, 108, 400) y = torch.rand(1, 96, 190) - z = torch.rand(1, 33, 153) + z = torch.rand(1, 36, 120) a0, a1, a2 = net(x, y, z) @@ -45,7 +45,7 @@ def test(): # torchscript to pnnx import os - os.system("../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + os.system("../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]") # pnnx inference import test_F_fold_pnnx diff --git a/tools/pnnx/tests/test_F_unfold.py b/tools/pnnx/tests/test_F_unfold.py index 1f34513792d..51f19a4f48a 100644 --- a/tools/pnnx/tests/test_F_unfold.py +++ b/tools/pnnx/tests/test_F_unfold.py @@ -24,7 +24,7 @@ def __init__(self): def forward(self, x): x0 = F.unfold(x, kernel_size=3) x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2)) return x0, x1, x2 diff --git a/tools/pnnx/tests/test_nn_Fold.py b/tools/pnnx/tests/test_nn_Fold.py index 2cc5eb9a620..8f53639db2a 100644 --- a/tools/pnnx/tests/test_nn_Fold.py +++ b/tools/pnnx/tests/test_nn_Fold.py @@ -23,7 +23,7 @@ def __init__(self): self.fold_0 = nn.Fold(output_size=22, kernel_size=3) self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2)) def forward(self, x, y, z): x = self.fold_0(x) @@ -39,7 +39,7 @@ def test(): torch.manual_seed(0) x = torch.rand(1, 108, 400) y = torch.rand(1, 96, 190) - z = torch.rand(1, 33, 153) + z = torch.rand(1, 36, 120) a0, a1, a2 = net(x, y, z) @@ -49,7 +49,7 @@ def test(): # torchscript to pnnx import os - os.system("../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]") + os.system("../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]") # pnnx inference import test_nn_Fold_pnnx diff --git a/tools/pnnx/tests/test_nn_Unfold.py b/tools/pnnx/tests/test_nn_Unfold.py index aab26f2dcbd..aece085668c 100644 --- a/tools/pnnx/tests/test_nn_Unfold.py +++ b/tools/pnnx/tests/test_nn_Unfold.py @@ -23,7 +23,7 @@ def __init__(self): self.unfold_0 = nn.Unfold(kernel_size=3) self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1) - self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1) + self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2)) def forward(self, x): x0 = self.unfold_0(x) From 13adfb4a6d206ad00421160b6f97ac9587ec0fb6 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 11:53:22 +0800 Subject: [PATCH 05/10] fresh checkout --- .ci/linux-x64-cpu-gcc.yml | 2 ++ .ci/pnnx.yml | 1 + .ci/test-coverage.yml | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/.ci/linux-x64-cpu-gcc.yml b/.ci/linux-x64-cpu-gcc.yml index 2f970f37c12..50f7b0b2589 100644 --- a/.ci/linux-x64-cpu-gcc.yml +++ b/.ci/linux-x64-cpu-gcc.yml @@ -49,6 +49,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -85,6 +86,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: build-nostdio diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml index 0792e1da0a6..e3760d111aa 100644 --- a/.ci/pnnx.yml +++ b/.ci/pnnx.yml @@ -52,6 +52,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml index e3ed988df6d..0c3fe6908e2 100644 --- a/.ci/test-coverage.yml +++ b/.ci/test-coverage.yml @@ -32,6 +32,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -105,6 +106,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -195,6 +197,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -248,6 +251,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -351,6 +355,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -435,6 +440,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -512,6 +518,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -589,6 +596,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps @@ -668,6 +676,7 @@ jobs: - name: checkout checkout: self with: + strategy: FRESH_CHECKOUT enableGitLfs: false - name: install-deps From ce415a0b1f0a63565180810c9ab83c17c3d7a02d Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 12:09:19 +0800 Subject: [PATCH 06/10] fix build --- src/layer/fold.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp index 608be635e40..61f0d7791a8 100644 --- a/src/layer/fold.cpp +++ b/src/layer/fold.cpp @@ -14,8 +14,6 @@ #include "fold.h" -#include - namespace ncnn { Fold::Fold() From c2c707bc09867c4ee4d7e23175fcb9b2bd2dec2a Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 12:13:02 +0800 Subject: [PATCH 07/10] skip submodule --- .ci/linux-x64-cpu-gcc.yml | 2 ++ .ci/pnnx.yml | 1 + .ci/test-coverage.yml | 7 +++++++ 3 files changed, 10 insertions(+) diff --git a/.ci/linux-x64-cpu-gcc.yml b/.ci/linux-x64-cpu-gcc.yml index 50f7b0b2589..4f138d9d080 100644 --- a/.ci/linux-x64-cpu-gcc.yml +++ b/.ci/linux-x64-cpu-gcc.yml @@ -50,6 +50,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -87,6 +88,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: build-nostdio diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml index e3760d111aa..5db6950e6b1 100644 --- a/.ci/pnnx.yml +++ b/.ci/pnnx.yml @@ -53,6 +53,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml index 0c3fe6908e2..9272caac1ba 100644 --- a/.ci/test-coverage.yml +++ b/.ci/test-coverage.yml @@ -198,6 +198,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -252,6 +253,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -356,6 +358,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -441,6 +444,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -519,6 +523,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -597,6 +602,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps @@ -677,6 +683,7 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT + enableSubmodule: false enableGitLfs: false - name: install-deps From b4c2d9a895aa69a9fe9efe0ad4ba754c104d9da4 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 15:15:28 +0800 Subject: [PATCH 08/10] fix --- .ci/pnnx.yml | 1 - src/layer/fold.cpp | 4 +++- src/layer/unfold.cpp | 11 ++++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml index 5db6950e6b1..e3760d111aa 100644 --- a/.ci/pnnx.yml +++ b/.ci/pnnx.yml @@ -53,7 +53,6 @@ jobs: checkout: self with: strategy: FRESH_CHECKOUT - enableSubmodule: false enableGitLfs: false - name: install-deps diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp index 61f0d7791a8..a5e35075652 100644 --- a/src/layer/fold.cpp +++ b/src/layer/fold.cpp @@ -115,7 +115,9 @@ int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) { - copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt); + Option opt_b = opt; + opt_b.use_packing_layout = false; + copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt_b); if (top_blob.empty()) return -100; } diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp index a2e2e053ff8..f747a169ced 100644 --- a/src/layer/unfold.cpp +++ b/src/layer/unfold.cpp @@ -41,9 +41,14 @@ int Unfold::load_param(const ParamDict& pd) int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { Mat bottom_blob_bordered; - make_padding(bottom_blob, bottom_blob_bordered, opt); - if (bottom_blob_bordered.empty()) - return -100; + { + Option opt_b = opt; + opt_b.blob_allocator = opt.workspace_allocator; + opt_b.use_packing_layout = false; + make_padding(bottom_blob, bottom_blob_bordered, opt_b); + if (bottom_blob_bordered.empty()) + return -100; + } const int w = bottom_blob_bordered.w; const int h = bottom_blob_bordered.h; From 462228b975b13d414b5fecce963ab728b25acef6 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 16:21:02 +0800 Subject: [PATCH 09/10] warning-- --- tools/pnnx/src/pass_level1/nn_Fold.cpp | 2 +- tools/pnnx/src/pass_level1/nn_Unfold.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/pnnx/src/pass_level1/nn_Fold.cpp b/tools/pnnx/src/pass_level1/nn_Fold.cpp index a05d8a80402..045c1f6f1ba 100644 --- a/tools/pnnx/src/pass_level1/nn_Fold.cpp +++ b/tools/pnnx/src/pass_level1/nn_Fold.cpp @@ -31,7 +31,7 @@ class Fold : public FuseModulePass return "nn.Fold"; } - void write(Operator* op, const std::shared_ptr& graph, const torch::jit::Module& mod) const + void write(Operator* op, const std::shared_ptr& graph) const { const torch::jit::Node* col2im = find_node_by_kind(graph, "aten::col2im"); diff --git a/tools/pnnx/src/pass_level1/nn_Unfold.cpp b/tools/pnnx/src/pass_level1/nn_Unfold.cpp index 974c5422ea0..1abf6201a83 100644 --- a/tools/pnnx/src/pass_level1/nn_Unfold.cpp +++ b/tools/pnnx/src/pass_level1/nn_Unfold.cpp @@ -31,7 +31,7 @@ class Unfold : public FuseModulePass return "nn.Unfold"; } - void write(Operator* op, const std::shared_ptr& graph, const torch::jit::Module& mod) const + void write(Operator* op, const std::shared_ptr& graph) const { const torch::jit::Node* im2col = find_node_by_kind(graph, "aten::im2col"); From cadc925b31a9754a44bb33fb111c3307dd53f41f Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 3 Nov 2022 17:16:50 +0800 Subject: [PATCH 10/10] naive fold --- src/layer/fold.cpp | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp index a5e35075652..c14f01fbb72 100644 --- a/src/layer/fold.cpp +++ b/src/layer/fold.cpp @@ -73,42 +73,34 @@ int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons return -100; // col2im + const int gap = outw * stride_h - inw * stride_w; + #pragma omp parallel for num_threads(opt.num_threads) for (int p = 0; p < channels; p++) { - float* ptr = top_blob_bordered.channel(p); + const float* sptr = bottom_blob.row(p * maxk); + Mat outm = top_blob_bordered.channel(p); + + outm.fill(0.f); - for (int i = 0; i < outh; i++) + for (int u = 0; u < kernel_h; u++) { - for (int j = 0; j < outw; j++) + for (int v = 0; v < kernel_w; v++) { - float sum = 0.f; - - const int sx_start = (j < kernel_extent_w) ? 0 : (j - kernel_extent_w) / stride_w + 1; - const int sx_end = std::min(j / stride_w + 1, inw); - - const int sy_start = (i < kernel_extent_h) ? 0 : (i - kernel_extent_h) / stride_h + 1; - const int sy_end = std::min(i / stride_h + 1, inh); + float* ptr = outm.row(dilation_h * u) + dilation_w * v; - for (int sy = sy_start; sy < sy_end; sy += 1) + for (int i = 0; i < inh; i++) { - for (int sx = sx_start; sx < sx_end; sx += 1) + for (int j = 0; j < inw; j++) { - int h_k = (i - sy * stride_h); - int w_k = (j - sx * stride_w); + ptr[0] += sptr[0]; - if (h_k % dilation_h == 0 && w_k % dilation_w == 0) - { - h_k /= dilation_h; - w_k /= dilation_w; - - sum += bottom_blob.row(p * maxk + h_k * kernel_w + w_k)[sy * inw + sx]; - } + ptr += stride_w; + sptr += 1; } - } - ptr[0] = sum; - ptr += 1; + ptr += gap; + } } } }