From 3d3aed8a2131d61248aacc2ce633a6de0f207439 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Wed, 2 Nov 2022 19:27:44 +0800
Subject: [PATCH 01/10] implement fold and unfold

---
 src/CMakeLists.txt   |   2 +
 src/layer/fold.cpp   | 132 ++++++++++++++++++++++++++++++++++++++++
 src/layer/fold.h     |  48 +++++++++++++++
 src/layer/unfold.cpp | 141 +++++++++++++++++++++++++++++++++++++++++++
 src/layer/unfold.h   |  50 +++++++++++++++
 5 files changed, 373 insertions(+)
 create mode 100644 src/layer/fold.cpp
 create mode 100644 src/layer/fold.h
 create mode 100644 src/layer/unfold.cpp
 create mode 100644 src/layer/unfold.h
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ab41ea9dc1e..58d826a4d8f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -157,6 +157,8 @@ ncnn_add_layer(DeconvolutionDepthWise3D)
 ncnn_add_layer(Einsum)
 ncnn_add_layer(DeformableConv2D)
 ncnn_add_layer(GLU)
+ncnn_add_layer(Fold)
+ncnn_add_layer(Unfold)
 
 if(NCNN_VULKAN)
     ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp)
diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp
new file mode 100644
index 00000000000..608be635e40
--- /dev/null
+++ b/src/layer/fold.cpp
@@ -0,0 +1,132 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "fold.h"
+
+#include <algorithm>
+
+namespace ncnn {
+
+Fold::Fold()
+{
+    one_blob_only = true;
+}
+
+int Fold::load_param(const ParamDict& pd)
+{
+    kernel_w = pd.get(1, 0);
+    kernel_h = pd.get(11, kernel_w);
+    dilation_w = pd.get(2, 1);
+    dilation_h = pd.get(12, dilation_w);
+    stride_w = pd.get(3, 1);
+    stride_h = pd.get(13, stride_w);
+    pad_left = pd.get(4, 0);
+    pad_right = pd.get(15, pad_left);
+    pad_top = pd.get(14, pad_left);
+    pad_bottom = pd.get(16, pad_top);
+    output_w = pd.get(20, 0);
+    output_h = pd.get(21, output_w);
+
+    return 0;
+}
+
+int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    const int size = bottom_blob.w;
+    const int max_channels = bottom_blob.h;
+    size_t elemsize = bottom_blob.elemsize;
+
+    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
+
+    const int outw = output_w + pad_left + pad_right;
+    const int outh = output_h + pad_top + pad_bottom;
+
+    const int inw = (outw - kernel_extent_w) / stride_w + 1;
+    const int inh = (outh - kernel_extent_h) / stride_h + 1;
+
+    // assert inw * inh == size
+
+    const int maxk = kernel_w * kernel_h;
+    const int channels = max_channels / maxk;
+
+    Mat top_blob_bordered;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
+    {
+        top_blob_bordered.create(outw, outh, channels, elemsize, opt.workspace_allocator);
+    }
+    else
+    {
+        top_blob_bordered = top_blob;
+        top_blob_bordered.create(outw, outh, channels, elemsize, opt.blob_allocator);
+    }
+    if (top_blob_bordered.empty())
+        return -100;
+
+    // col2im
+    #pragma omp parallel for num_threads(opt.num_threads)
+    for (int p = 0; p < channels; p++)
+    {
+        float* ptr = top_blob_bordered.channel(p);
+
+        for (int i = 0; i < outh; i++)
+        {
+            for (int j = 0; j < outw; j++)
+            {
+                float sum = 0.f;
+
+                const int sx_start = (j < kernel_extent_w) ? 0 : (j - kernel_extent_w) / stride_w + 1;
+                const int sx_end = std::min(j / stride_w + 1, inw);
+
+                const int sy_start = (i < kernel_extent_h) ? 0 : (i - kernel_extent_h) / stride_h + 1;
+                const int sy_end = std::min(i / stride_h + 1, inh);
+
+                for (int sy = sy_start; sy < sy_end; sy += 1)
+                {
+                    for (int sx = sx_start; sx < sx_end; sx += 1)
+                    {
+                        int h_k = (i - sy * stride_h);
+                        int w_k = (j - sx * stride_w);
+
+                        if (h_k % dilation_h == 0 && w_k % dilation_w == 0)
+                        {
+                            h_k /= dilation_h;
+                            w_k /= dilation_w;
+
+                            sum += bottom_blob.row(p * maxk + h_k * kernel_w + w_k)[sy * inw + sx];
+                        }
+                    }
+                }
+
+                ptr[0] = sum;
+                ptr += 1;
+            }
+        }
+    }
+
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
+    {
+        copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
+        if (top_blob.empty())
+            return -100;
+    }
+    else
+    {
+        top_blob = top_blob_bordered;
+    }
+
+    return 0;
+}
+
+} // namespace ncnn
diff --git a/src/layer/fold.h b/src/layer/fold.h
new file mode 100644
index 00000000000..e6eccad556a
--- /dev/null
+++ b/src/layer/fold.h
@@ -0,0 +1,48 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef LAYER_FOLD_H
+#define LAYER_FOLD_H
+
+#include "layer.h"
+
+namespace ncnn {
+
+class Fold : public Layer
+{
+public:
+    Fold();
+
+    virtual int load_param(const ParamDict& pd);
+
+    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+
+public:
+    int kernel_w;
+    int kernel_h;
+    int dilation_w;
+    int dilation_h;
+    int stride_w;
+    int stride_h;
+    int pad_left; // -233=SAME_UPPER -234=SAME_LOWER
+    int pad_right;
+    int pad_top;
+    int pad_bottom;
+    int output_w;
+    int output_h;
+};
+
+} // namespace ncnn
+
+#endif // LAYER_FOLD_H
diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp
new file mode 100644
index 00000000000..5f26d71c4d1
--- /dev/null
+++ b/src/layer/unfold.cpp
@@ -0,0 +1,141 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "unfold.h"
+
+namespace ncnn {
+
+Unfold::Unfold()
+{
+    one_blob_only = true;
+}
+
+int Unfold::load_param(const ParamDict& pd)
+{
+    kernel_w = pd.get(1, 0);
+    kernel_h = pd.get(11, kernel_w);
+    dilation_w = pd.get(2, 1);
+    dilation_h = pd.get(12, dilation_w);
+    stride_w = pd.get(3, 1);
+    stride_h = pd.get(13, stride_w);
+    pad_left = pd.get(4, 0);
+    pad_right = pd.get(15, pad_left);
+    pad_top = pd.get(14, pad_left);
+    pad_bottom = pd.get(16, pad_top);
+    pad_value = pd.get(18, 0.f);
+
+    return 0;
+}
+
+int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    Mat bottom_blob_bordered;
+    make_padding(bottom_blob, bottom_blob_bordered, opt);
+    if (bottom_blob_bordered.empty())
+        return -100;
+
+    const int w = bottom_blob_bordered.w;
+    const int h = bottom_blob_bordered.h;
+    const int channels = bottom_blob_bordered.c;
+    const size_t elemsize = bottom_blob_bordered.elemsize;
+
+    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
+
+    const int outw = (w - kernel_extent_w) / stride_w + 1;
+    const int outh = (h - kernel_extent_h) / stride_h + 1;
+
+    const int size = outw * outh;
+    const int maxk = kernel_w * kernel_h;
+
+    top_blob.create(size, maxk * channels, elemsize, opt.blob_allocator);
+    if (top_blob.empty())
+        return -100;
+
+    // im2col
+    const int gap = w * stride_h - outw * stride_w;
+
+    #pragma omp parallel for num_threads(opt.num_threads)
+    for (int p = 0; p < channels; p++)
+    {
+        const Mat img = bottom_blob_bordered.channel(p);
+        float* ptr = top_blob.row(p * maxk);
+
+        for (int u = 0; u < kernel_h; u++)
+        {
+            for (int v = 0; v < kernel_w; v++)
+            {
+                const float* sptr = bottom_blob_bordered.row(dilation_h * u) + dilation_w * v;
+
+                for (int i = 0; i < outh; i++)
+                {
+                    for (int j = 0; j < outw; j++)
+                    {
+                        ptr[0] = sptr[0];
+
+                        sptr += stride_w;
+                        ptr += 1;
+                    }
+
+                    sptr += gap;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+void Unfold::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const
+{
+    int w = bottom_blob.w;
+    int h = bottom_blob.h;
+
+    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
+    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
+
+    bottom_blob_bordered = bottom_blob;
+    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
+    {
+        Option opt_b = opt;
+        opt_b.blob_allocator = opt.workspace_allocator;
+        copy_make_border(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, BORDER_CONSTANT, pad_value, opt_b);
+    }
+    else if (pad_left == -233 && pad_right == -233 && pad_top == -233 && pad_bottom == -233)
+    {
+        // tensorflow padding=SAME or onnx padding=SAME_UPPER
+        int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w;
+        int hpad = kernel_extent_h + (h - 1) / stride_h * stride_h - h;
+        if (wpad > 0 || hpad > 0)
+        {
+            Option opt_b = opt;
+            opt_b.blob_allocator = opt.workspace_allocator;
+            copy_make_border(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, BORDER_CONSTANT, pad_value, opt_b);
+        }
+    }
+    else if (pad_left == -234 && pad_right == -234 && pad_top == -234 && pad_bottom == -234)
+    {
+        // onnx padding=SAME_LOWER
+        int wpad = kernel_extent_w + (w - 1) / stride_w * stride_w - w;
+        int hpad = kernel_extent_h + (h - 1) / stride_h * stride_h - h;
+        if (wpad > 0 || hpad > 0)
+        {
+            Option opt_b = opt;
+            opt_b.blob_allocator = opt.workspace_allocator;
+            copy_make_border(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, BORDER_CONSTANT, pad_value, opt_b);
+        }
+    }
+}
+
+} // namespace ncnn
diff --git a/src/layer/unfold.h b/src/layer/unfold.h
new file mode 100644
index 00000000000..ff7860b7f72
--- /dev/null
+++ b/src/layer/unfold.h
@@ -0,0 +1,50 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef LAYER_UNFOLD_H
+#define LAYER_UNFOLD_H
+
+#include "layer.h"
+
+namespace ncnn {
+
+class Unfold : public Layer
+{
+public:
+    Unfold();
+
+    virtual int load_param(const ParamDict& pd);
+
+    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+
+protected:
+    void make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const;
+
+public:
+    int kernel_w;
+    int kernel_h;
+    int dilation_w;
+    int dilation_h;
+    int stride_w;
+    int stride_h;
+    int pad_left; // -233=SAME_UPPER -234=SAME_LOWER
+    int pad_right;
+    int pad_top;
+    int pad_bottom;
+    float pad_value;
+};
+
+} // namespace ncnn
+
+#endif // LAYER_UNFOLD_H

From 633078c056442e9cd2b6319e33294867dbe380c6 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 11:05:22 +0800
Subject: [PATCH 02/10] add ncnn test

---
 src/layer/unfold.cpp                    |  2 +-
 tools/pnnx/src/CMakeLists.txt           |  2 +
 tools/pnnx/src/pass_ncnn/nn_Fold.cpp    | 63 +++++++++++++++++++++++
 tools/pnnx/src/pass_ncnn/nn_Unfold.cpp  | 61 ++++++++++++++++++++++
 tools/pnnx/tests/ncnn/CMakeLists.txt    |  2 +
 tools/pnnx/tests/ncnn/test_nn_Fold.py   | 67 +++++++++++++++++++++++++
 tools/pnnx/tests/ncnn/test_nn_Unfold.py | 65 ++++++++++++++++++++++++
 7 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 tools/pnnx/src/pass_ncnn/nn_Fold.cpp
 create mode 100644 tools/pnnx/src/pass_ncnn/nn_Unfold.cpp
 create mode 100644 tools/pnnx/tests/ncnn/test_nn_Fold.py
 create mode 100644 tools/pnnx/tests/ncnn/test_nn_Unfold.py

diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp
index 5f26d71c4d1..a2e2e053ff8 100644
--- a/src/layer/unfold.cpp
+++ b/src/layer/unfold.cpp
@@ -76,7 +76,7 @@ int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) co
         {
             for (int v = 0; v < kernel_w; v++)
             {
-                const float* sptr = bottom_blob_bordered.row(dilation_h * u) + dilation_w * v;
+                const float* sptr = img.row(dilation_h * u) + dilation_w * v;
 
                 for (int i = 0; i < outh; i++)
                 {
diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt
index f48767d4fbe..f3fb6bf9cd8 100644
--- a/tools/pnnx/src/CMakeLists.txt
+++ b/tools/pnnx/src/CMakeLists.txt
@@ -427,6 +427,7 @@ set(pnnx_pass_ncnn_SRCS
     pass_ncnn/nn_ConvTranspose3d.cpp
     pass_ncnn/nn_ELU.cpp
     pass_ncnn/nn_Embedding.cpp
+    pass_ncnn/nn_Fold.cpp
     pass_ncnn/nn_GELU.cpp
     pass_ncnn/nn_GLU.cpp
     pass_ncnn/nn_GroupNorm.cpp
@@ -461,6 +462,7 @@ set(pnnx_pass_ncnn_SRCS
     pass_ncnn/nn_Softmax.cpp
     pass_ncnn/nn_Softmax2d.cpp
     pass_ncnn/nn_Tanh.cpp
+    pass_ncnn/nn_Unfold.cpp
     pass_ncnn/nn_Upsample.cpp
     pass_ncnn/nn_UpsamplingBilinear2d.cpp
     pass_ncnn/nn_UpsamplingNearest2d.cpp
diff --git a/tools/pnnx/src/pass_ncnn/nn_Fold.cpp b/tools/pnnx/src/pass_ncnn/nn_Fold.cpp
new file mode 100644
index 00000000000..d94bc68b030
--- /dev/null
+++ b/tools/pnnx/src/pass_ncnn/nn_Fold.cpp
@@ -0,0 +1,63 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "pass_ncnn.h"
+
+namespace pnnx {
+
+namespace ncnn {
+
+class nn_Fold : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+3 2
+pnnx.Input              input       0 1 input
+nn.Fold                 op_0        1 1 input out output_size=%output_size kernel_size=%kernel_size stride=%stride padding=%padding dilation=%dilation
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Fold";
+    }
+
+    const char* name_str() const
+    {
+        return "fold";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params) const
+    {
+        op->params["1"] = captured_params.at("kernel_size").ai[1];
+        op->params["11"] = captured_params.at("kernel_size").ai[0];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["20"] = captured_params.at("output_size").ai[1];
+        op->params["21"] = captured_params.at("output_size").ai[0];
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(nn_Fold, 20)
+
+} // namespace ncnn
+
+} // namespace pnnx
diff --git a/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp b/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp
new file mode 100644
index 00000000000..526e5d24c38
--- /dev/null
+++ b/tools/pnnx/src/pass_ncnn/nn_Unfold.cpp
@@ -0,0 +1,61 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "pass_ncnn.h"
+
+namespace pnnx {
+
+namespace ncnn {
+
+class nn_Unfold : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+3 2
+pnnx.Input              input       0 1 input
+nn.Unfold               op_0        1 1 input out kernel_size=%kernel_size stride=%stride padding=%padding dilation=%dilation
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Unfold";
+    }
+
+    const char* name_str() const
+    {
+        return "unfold";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params) const
+    {
+        op->params["1"] = captured_params.at("kernel_size").ai[1];
+        op->params["11"] = captured_params.at("kernel_size").ai[0];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(nn_Unfold, 20)
+
+} // namespace ncnn
+
+} // namespace pnnx
diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt
index b14a1716e3b..69579d14173 100644
--- a/tools/pnnx/tests/ncnn/CMakeLists.txt
+++ b/tools/pnnx/tests/ncnn/CMakeLists.txt
@@ -84,6 +84,7 @@ pnnx_ncnn_add_test(nn_Dropout2d)
 pnnx_ncnn_add_test(nn_Dropout3d)
 pnnx_ncnn_add_test(nn_ELU)
 pnnx_ncnn_add_test(nn_Embedding)
+pnnx_ncnn_add_test(nn_Fold)
 pnnx_ncnn_add_test(nn_GELU)
 pnnx_ncnn_add_test(nn_GLU)
 pnnx_ncnn_add_test(nn_GroupNorm)
@@ -117,6 +118,7 @@ pnnx_ncnn_add_test(nn_SiLU)
 pnnx_ncnn_add_test(nn_Softmax)
 pnnx_ncnn_add_test(nn_Softmax2d)
 pnnx_ncnn_add_test(nn_Tanh)
+pnnx_ncnn_add_test(nn_Unfold)
 pnnx_ncnn_add_test(nn_Upsample)
 pnnx_ncnn_add_test(nn_UpsamplingBilinear2d)
 pnnx_ncnn_add_test(nn_UpsamplingNearest2d)
diff --git a/tools/pnnx/tests/ncnn/test_nn_Fold.py b/tools/pnnx/tests/ncnn/test_nn_Fold.py
new file mode 100644
index 00000000000..0ec407979fc
--- /dev/null
+++ b/tools/pnnx/tests/ncnn/test_nn_Fold.py
@@ -0,0 +1,67 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging import version
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+        self.fold_0 = nn.Fold(output_size=22, kernel_size=3)
+        self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
+        self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+
+    def forward(self, x, y, z):
+        x = self.fold_0(x)
+        y = self.fold_1(y)
+        z = self.fold_2(z)
+
+        return x, y, z
+
+def test():
+    net = Model()
+    net.eval()
+
+    torch.manual_seed(0)
+    x = torch.rand(1, 108, 400)
+    y = torch.rand(1, 96, 190)
+    z = torch.rand(1, 33, 153)
+
+    a = net(x, y, z)
+
+    # export torchscript
+    mod = torch.jit.trace(net, (x, y, z))
+    mod.save("test_nn_Fold.pt")
+
+    # torchscript to pnnx
+    import os
+    os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+
+    # ncnn inference
+    import test_nn_Fold_ncnn
+    b = test_nn_Fold_ncnn.test_inference()
+
+    for a0, b0 in zip(a, b):
+        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+            return False
+    return True
+
+if __name__ == "__main__":
+    if test():
+        exit(0)
+    else:
+        exit(1)
diff --git a/tools/pnnx/tests/ncnn/test_nn_Unfold.py b/tools/pnnx/tests/ncnn/test_nn_Unfold.py
new file mode 100644
index 00000000000..dbe588127d5
--- /dev/null
+++ b/tools/pnnx/tests/ncnn/test_nn_Unfold.py
@@ -0,0 +1,65 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging import version
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+        self.unfold_0 = nn.Unfold(kernel_size=3)
+        self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
+        self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+
+    def forward(self, x):
+        x0 = self.unfold_0(x)
+        x1 = self.unfold_1(x)
+        x2 = self.unfold_2(x)
+
+        return x0, x1, x2
+
+def test():
+    net = Model()
+    net.eval()
+
+    torch.manual_seed(0)
+    x = torch.rand(1, 12, 64, 64)
+
+    a = net(x)
+
+    # export torchscript
+    mod = torch.jit.trace(net, x)
+    mod.save("test_nn_Unfold.pt")
+
+    # torchscript to ncnn
+    import os
+    os.system("../../src/pnnx test_nn_Unfold.pt inputshape=[1,12,64,64]")
+
+    # ncnn inference
+    import test_nn_Unfold_ncnn
+    b = test_nn_Unfold_ncnn.test_inference()
+
+    for a0, b0 in zip(a, b):
+        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+            return False
+    return True
+
+if __name__ == "__main__":
+    if test():
+        exit(0)
+    else:
+        exit(1)

From c29f7a23aeb1c5b561b4f4340204786a17f03237 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 11:13:28 +0800
Subject: [PATCH 03/10] convert F.fold and F.unfold

---
 tools/pnnx/README.md                   |  8 ++--
 tools/pnnx/src/CMakeLists.txt          |  2 +
 tools/pnnx/src/pass_ncnn/F_fold.cpp    | 63 ++++++++++++++++++++++++++
 tools/pnnx/src/pass_ncnn/F_unfold.cpp  | 61 +++++++++++++++++++++++++
 tools/pnnx/tests/ncnn/CMakeLists.txt   |  2 +
 tools/pnnx/tests/ncnn/test_F_fold.py   | 63 ++++++++++++++++++++++++++
 tools/pnnx/tests/ncnn/test_F_unfold.py | 61 +++++++++++++++++++++++++
 7 files changed, 256 insertions(+), 4 deletions(-)
 create mode 100644 tools/pnnx/src/pass_ncnn/F_fold.cpp
 create mode 100644 tools/pnnx/src/pass_ncnn/F_unfold.cpp
 create mode 100644 tools/pnnx/tests/ncnn/test_F_fold.py
 create mode 100644 tools/pnnx/tests/ncnn/test_F_unfold.py

diff --git a/tools/pnnx/README.md b/tools/pnnx/README.md
index 0a153dafe2b..eac02294193 100644
--- a/tools/pnnx/README.md
+++ b/tools/pnnx/README.md
@@ -484,7 +484,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
 |nn.Embedding               | :heavy_check_mark: | :heavy_check_mark: |
 |nn.EmbeddingBag            |   |
 |nn.Flatten                 | :heavy_check_mark: |
-|nn.Fold                    | :heavy_check_mark: |
+|nn.Fold                    | :heavy_check_mark: | :heavy_check_mark: |
 |nn.FractionalMaxPool2d     |   |
 |nn.FractionalMaxPool3d     |   |
 |nn.GELU                    | :heavy_check_mark: | :heavy_check_mark: |
@@ -562,7 +562,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
 |nn.TransformerEncoder      |   |
 |nn.TransformerEncoderLayer |   |
 |nn.Unflatten               |   |
-|nn.Unfold                  | :heavy_check_mark: |
+|nn.Unfold                  | :heavy_check_mark: | :heavy_check_mark: |
 |nn.Upsample                | :heavy_check_mark: | :heavy_check_mark: |
 |nn.UpsamplingBilinear2d    | :heavy_check_mark: | :heavy_check_mark: |
 |nn.UpsamplingNearest2d     | :heavy_check_mark: | :heavy_check_mark: |
@@ -600,7 +600,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
 |F.embedding                | :heavy_check_mark: | :heavy_check_mark: |
 |F.embedding_bag            |  |
 |F.feature_alpha_dropout    | :heavy_check_mark: | :heavy_check_mark: |
-|F.fold                     | :heavy_check_mark: |
+|F.fold                     | :heavy_check_mark: | :heavy_check_mark: |
 |F.fractional_max_pool2d    |  |
 |F.fractional_max_pool3d    |  |
 |F.gelu                     | :heavy_check_mark: | :heavy_check_mark: |
@@ -656,7 +656,7 @@ TORCH_LIBRARY(upfirdn2d_op, m) {
 |F.tanhshrink               | :heavy_check_mark: |
 |F.threshold                | :heavy_check_mark: |
 |F.threshold_               | :heavy_check_mark: |
-|F.unfold                   | :heavy_check_mark: |
+|F.unfold                   | :heavy_check_mark: | :heavy_check_mark: |
 |F.upsample                 | :heavy_check_mark: | :heavy_check_mark: |
 |F.upsample_bilinear        | :heavy_check_mark: | :heavy_check_mark: |
 |F.upsample_nearest         | :heavy_check_mark: | :heavy_check_mark: |
diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt
index f3fb6bf9cd8..2e0eb5d8456 100644
--- a/tools/pnnx/src/CMakeLists.txt
+++ b/tools/pnnx/src/CMakeLists.txt
@@ -372,6 +372,7 @@ set(pnnx_pass_ncnn_SRCS
     pass_ncnn/F_conv3d.cpp
     pass_ncnn/F_elu.cpp
     pass_ncnn/F_embedding.cpp
+    pass_ncnn/F_fold.cpp
     pass_ncnn/F_gelu.cpp
     pass_ncnn/F_glu.cpp
     pass_ncnn/F_group_norm.cpp
@@ -400,6 +401,7 @@ set(pnnx_pass_ncnn_SRCS
     pass_ncnn/F_silu.cpp
     pass_ncnn/F_softmax.cpp
     pass_ncnn/F_tanh.cpp
+    pass_ncnn/F_unfold.cpp
     pass_ncnn/F_upsample_bilinear.cpp
     pass_ncnn/F_upsample_nearest.cpp
     pass_ncnn/F_upsample.cpp
diff --git a/tools/pnnx/src/pass_ncnn/F_fold.cpp b/tools/pnnx/src/pass_ncnn/F_fold.cpp
new file mode 100644
index 00000000000..1d35a72eb11
--- /dev/null
+++ b/tools/pnnx/src/pass_ncnn/F_fold.cpp
@@ -0,0 +1,63 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "pass_ncnn.h"
+
+namespace pnnx {
+
+namespace ncnn {
+
+class F_fold : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+3 2
+pnnx.Input              input       0 1 input
+F.fold                  op_0        1 1 input out output_size=%output_size kernel_size=%kernel_size dilation=%dilation stride=%stride padding=%padding
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Fold";
+    }
+
+    const char* name_str() const
+    {
+        return "fold";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params) const
+    {
+        op->params["1"] = captured_params.at("kernel_size").ai[1];
+        op->params["11"] = captured_params.at("kernel_size").ai[0];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+        op->params["20"] = captured_params.at("output_size").ai[1];
+        op->params["21"] = captured_params.at("output_size").ai[0];
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_fold, 20)
+
+} // namespace ncnn
+
+} // namespace pnnx
diff --git a/tools/pnnx/src/pass_ncnn/F_unfold.cpp b/tools/pnnx/src/pass_ncnn/F_unfold.cpp
new file mode 100644
index 00000000000..14f82b08f99
--- /dev/null
+++ b/tools/pnnx/src/pass_ncnn/F_unfold.cpp
@@ -0,0 +1,61 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "pass_ncnn.h"
+
+namespace pnnx {
+
+namespace ncnn {
+
+class F_unfold : public GraphRewriterPass
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+3 2
+pnnx.Input              input       0 1 input
+F.unfold                op_0        1 1 input out kernel_size=%kernel_size dilation=%dilation stride=%stride padding=%padding
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    const char* type_str() const
+    {
+        return "Unfold";
+    }
+
+    const char* name_str() const
+    {
+        return "unfold";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params) const
+    {
+        op->params["1"] = captured_params.at("kernel_size").ai[1];
+        op->params["11"] = captured_params.at("kernel_size").ai[0];
+        op->params["2"] = captured_params.at("dilation").ai[1];
+        op->params["12"] = captured_params.at("dilation").ai[0];
+        op->params["3"] = captured_params.at("stride").ai[1];
+        op->params["13"] = captured_params.at("stride").ai[0];
+        op->params["4"] = captured_params.at("padding").ai[1];
+        op->params["14"] = captured_params.at("padding").ai[0];
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_unfold, 20)
+
+} // namespace ncnn
+
+} // namespace pnnx
diff --git a/tools/pnnx/tests/ncnn/CMakeLists.txt b/tools/pnnx/tests/ncnn/CMakeLists.txt
index 69579d14173..f5688ce8d70 100644
--- a/tools/pnnx/tests/ncnn/CMakeLists.txt
+++ b/tools/pnnx/tests/ncnn/CMakeLists.txt
@@ -28,6 +28,7 @@ pnnx_ncnn_add_test(F_dropout3d)
 pnnx_ncnn_add_test(F_elu)
 pnnx_ncnn_add_test(F_embedding)
 pnnx_ncnn_add_test(F_feature_alpha_dropout)
+pnnx_ncnn_add_test(F_fold)
 pnnx_ncnn_add_test(F_gelu)
 pnnx_ncnn_add_test(F_glu)
 pnnx_ncnn_add_test(F_group_norm)
@@ -52,6 +53,7 @@ pnnx_ncnn_add_test(F_sigmoid)
 pnnx_ncnn_add_test(F_silu)
 pnnx_ncnn_add_test(F_softmax)
 pnnx_ncnn_add_test(F_tanh)
+pnnx_ncnn_add_test(F_unfold)
 pnnx_ncnn_add_test(F_upsample_bilinear)
 pnnx_ncnn_add_test(F_upsample_nearest)
 pnnx_ncnn_add_test(F_upsample)
diff --git a/tools/pnnx/tests/ncnn/test_F_fold.py b/tools/pnnx/tests/ncnn/test_F_fold.py
new file mode 100644
index 00000000000..0a85d105bab
--- /dev/null
+++ b/tools/pnnx/tests/ncnn/test_F_fold.py
@@ -0,0 +1,63 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging import version
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, x, y, z):
+        x = F.fold(x, output_size=22, kernel_size=3)
+        y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
+        z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+
+        return x, y, z
+
+def test():
+    net = Model()
+    net.eval()
+
+    torch.manual_seed(0)
+    x = torch.rand(1, 108, 400)
+    y = torch.rand(1, 96, 190)
+    z = torch.rand(1, 33, 153)
+
+    a = net(x, y, z)
+
+    # export torchscript
+    mod = torch.jit.trace(net, (x, y, z))
+    mod.save("test_F_fold.pt")
+
+    # torchscript to pnnx
+    import os
+    os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+
+    # ncnn inference
+    import test_F_fold_ncnn
+    b = test_F_fold_ncnn.test_inference()
+
+    for a0, b0 in zip(a, b):
+        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+            return False
+    return True
+
+if __name__ == "__main__":
+    if test():
+        exit(0)
+    else:
+        exit(1)
diff --git a/tools/pnnx/tests/ncnn/test_F_unfold.py b/tools/pnnx/tests/ncnn/test_F_unfold.py
new file mode 100644
index 00000000000..606c7ada402
--- /dev/null
+++ b/tools/pnnx/tests/ncnn/test_F_unfold.py
@@ -0,0 +1,61 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from packaging import version
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, x):
+        x0 = F.unfold(x, kernel_size=3)
+        x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
+        x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+
+        return x0, x1, x2
+
+def test():
+    net = Model()
+    net.eval()
+
+    torch.manual_seed(0)
+    x = torch.rand(1, 12, 64, 64)
+
+    a = net(x)
+
+    # export torchscript
+    mod = torch.jit.trace(net, x)
+    mod.save("test_F_unfold.pt")
+
+    # torchscript to ncnn
+    import os
+    os.system("../../src/pnnx test_F_unfold.pt inputshape=[1,12,64,64]")
+
+    # ncnn inference
+    import test_F_unfold_ncnn
+    b = test_F_unfold_ncnn.test_inference()
+
+    for a0, b0 in zip(a, b):
+        if not torch.allclose(a0, b0, 1e-4, 1e-4):
+            return False
+    return True
+
+if __name__ == "__main__":
+    if test():
+        exit(0)
+    else:
+        exit(1)

From 4d2247cfa140f549a8a50f3a2233a84d77749f3e Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 11:45:57 +0800
Subject: [PATCH 04/10] add ncnn test

---
 tests/CMakeLists.txt                    |  2 +
 tests/test_fold.cpp                     | 58 ++++++++++++++++++++++
 tests/test_unfold.cpp                   | 65 +++++++++++++++++++++++++
 tools/pnnx/tests/ncnn/test_F_fold.py    |  6 +--
 tools/pnnx/tests/ncnn/test_F_unfold.py  |  2 +-
 tools/pnnx/tests/ncnn/test_nn_Fold.py   |  6 +--
 tools/pnnx/tests/ncnn/test_nn_Unfold.py |  2 +-
 tools/pnnx/tests/test_F_fold.py         |  6 +--
 tools/pnnx/tests/test_F_unfold.py       |  2 +-
 tools/pnnx/tests/test_nn_Fold.py        |  6 +--
 tools/pnnx/tests/test_nn_Unfold.py      |  2 +-
 11 files changed, 141 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_fold.cpp
 create mode 100644 tests/test_unfold.cpp

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 73efa22868e..bf1d933bb48 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -85,6 +85,7 @@ ncnn_add_layer_test(Eltwise)
 ncnn_add_layer_test(ELU)
 ncnn_add_layer_test(ExpandDims)
 ncnn_add_layer_test(Flatten)
+ncnn_add_layer_test(Fold)
 ncnn_add_layer_test(GELU)
 ncnn_add_layer_test(GLU)
 ncnn_add_layer_test(Gemm)
@@ -135,4 +136,5 @@ ncnn_add_layer_test(Swish)
 ncnn_add_layer_test(TanH)
 ncnn_add_layer_test(Tile)
 ncnn_add_layer_test(UnaryOp)
+ncnn_add_layer_test(Unfold)
 ncnn_add_layer_test(Yolov3DetectionOutput)
diff --git a/tests/test_fold.cpp b/tests/test_fold.cpp
new file mode 100644
index 00000000000..11a38428fdd
--- /dev/null
+++ b/tests/test_fold.cpp
@@ -0,0 +1,58 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "layer/fold.h"
+#include "testutil.h"
+
+static int test_fold(int w, int h, int outw, int outh, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int pad_w, int pad_h)
+{
+    ncnn::Mat a = RandomMat(w, h);
+
+    ncnn::ParamDict pd;
+    pd.set(1, kernel_w);
+    pd.set(11, kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_w);
+    pd.set(14, pad_h);
+    pd.set(20, outw);
+    pd.set(21, outh);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::Fold>("Fold", pd, weights, a);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_fold failed w=%d h=%d outw=%d outh=%d kernel=%d,%d dilation=%d,%d stride=%d,%d pad=%d,%d\n", w, h, outw, outh, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, pad_w, pad_h);
+    }
+
+    return ret;
+}
+
+static int test_fold_0()
+{
+    return 0
+           || test_fold(400, 108, 22, 22, 3, 3, 1, 1, 1, 1, 0, 0)
+           || test_fold(190, 96, 18, 17, 4, 2, 1, 1, 1, 2, 2, 2)
+           || test_fold(120, 36, 11, 5, 3, 2, 2, 1, 1, 1, 4, 2);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    return test_fold_0();
+}
diff --git a/tests/test_unfold.cpp b/tests/test_unfold.cpp
new file mode 100644
index 00000000000..4eea1d020ea
--- /dev/null
+++ b/tests/test_unfold.cpp
@@ -0,0 +1,65 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "layer/unfold.h"
+#include "testutil.h"
+
+static int test_unfold(int w, int h, int c, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int pad_w, int pad_h, float pad_value)
+{
+    ncnn::Mat a = RandomMat(w, h, c);
+
+    ncnn::ParamDict pd;
+    pd.set(1, kernel_w);
+    pd.set(11, kernel_h);
+    pd.set(2, dilation_w);
+    pd.set(12, dilation_h);
+    pd.set(3, stride_w);
+    pd.set(13, stride_h);
+    pd.set(4, pad_w);
+    pd.set(14, pad_h);
+    pd.set(18, pad_value);
+
+    std::vector<ncnn::Mat> weights(0);
+
+    int ret = test_layer<ncnn::Unfold>("Unfold", pd, weights, a);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_unfold failed w=%d h=%d c=%d kernel=%d,%d dilation=%d,%d stride=%d,%d pad=%d,%d pad_value=%f\n", w, h, c, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, pad_w, pad_h, pad_value);
+    }
+
+    return ret;
+}
+
+static int test_unfold_0()
+{
+    return 0
+           || test_unfold(32, 32, 11, 3, 3, 1, 1, 1, 1, 0, 0, 0.f)
+           || test_unfold(32, 32, 12, 4, 2, 1, 1, 1, 2, 2, 2, -0.5f)
+           || test_unfold(32, 32, 16, 3, 2, 2, 1, 1, 1, 4, 2, 2.f);
+}
+
+static int test_unfold_1()
+{
+    return 0
+           || test_unfold(32, 32, 11, 3, 3, 1, 1, 1, 1, -233, -233, -0.5f)
+           || test_unfold(32, 32, 12, 4, 2, 1, 1, 1, 2, -234, -234, 0.f)
+           || test_unfold(32, 32, 16, 3, 2, 2, 1, 1, 1, -233, -233, 1.f);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    return test_unfold_0() || test_unfold_1();
+}
diff --git a/tools/pnnx/tests/ncnn/test_F_fold.py b/tools/pnnx/tests/ncnn/test_F_fold.py
index 0a85d105bab..54103fdf261 100644
--- a/tools/pnnx/tests/ncnn/test_F_fold.py
+++ b/tools/pnnx/tests/ncnn/test_F_fold.py
@@ -24,7 +24,7 @@ def __init__(self):
     def forward(self, x, y, z):
         x = F.fold(x, output_size=22, kernel_size=3)
         y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        z = F.fold(z, output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2))
 
         return x, y, z
 
@@ -35,7 +35,7 @@ def test():
     torch.manual_seed(0)
     x = torch.rand(1, 108, 400)
     y = torch.rand(1, 96, 190)
-    z = torch.rand(1, 33, 153)
+    z = torch.rand(1, 36, 120)
 
     a = net(x, y, z)
 
@@ -45,7 +45,7 @@ def test():
 
     # torchscript to pnnx
     import os
-    os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+    os.system("../../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]")
 
     # ncnn inference
     import test_F_fold_ncnn
diff --git a/tools/pnnx/tests/ncnn/test_F_unfold.py b/tools/pnnx/tests/ncnn/test_F_unfold.py
index 606c7ada402..e8e1a603cc3 100644
--- a/tools/pnnx/tests/ncnn/test_F_unfold.py
+++ b/tools/pnnx/tests/ncnn/test_F_unfold.py
@@ -24,7 +24,7 @@ def __init__(self):
     def forward(self, x):
         x0 = F.unfold(x, kernel_size=3)
         x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2))
 
         return x0, x1, x2
 
diff --git a/tools/pnnx/tests/ncnn/test_nn_Fold.py b/tools/pnnx/tests/ncnn/test_nn_Fold.py
index 0ec407979fc..8b07b2b5d38 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Fold.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Fold.py
@@ -23,7 +23,7 @@ def __init__(self):
 
         self.fold_0 = nn.Fold(output_size=22, kernel_size=3)
         self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2))
 
     def forward(self, x, y, z):
         x = self.fold_0(x)
@@ -39,7 +39,7 @@ def test():
     torch.manual_seed(0)
     x = torch.rand(1, 108, 400)
     y = torch.rand(1, 96, 190)
-    z = torch.rand(1, 33, 153)
+    z = torch.rand(1, 36, 120)
 
     a = net(x, y, z)
 
@@ -49,7 +49,7 @@ def test():
 
     # torchscript to pnnx
     import os
-    os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+    os.system("../../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]")
 
     # ncnn inference
     import test_nn_Fold_ncnn
diff --git a/tools/pnnx/tests/ncnn/test_nn_Unfold.py b/tools/pnnx/tests/ncnn/test_nn_Unfold.py
index dbe588127d5..8d618f76150 100644
--- a/tools/pnnx/tests/ncnn/test_nn_Unfold.py
+++ b/tools/pnnx/tests/ncnn/test_nn_Unfold.py
@@ -23,7 +23,7 @@ def __init__(self):
 
         self.unfold_0 = nn.Unfold(kernel_size=3)
         self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2))
 
     def forward(self, x):
         x0 = self.unfold_0(x)
diff --git a/tools/pnnx/tests/test_F_fold.py b/tools/pnnx/tests/test_F_fold.py
index b85c41fe3b5..68c5b566d56 100644
--- a/tools/pnnx/tests/test_F_fold.py
+++ b/tools/pnnx/tests/test_F_fold.py
@@ -24,7 +24,7 @@ def __init__(self):
     def forward(self, x, y, z):
         x = F.fold(x, output_size=22, kernel_size=3)
         y = F.fold(y, output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        z = F.fold(z, output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        z = F.fold(z, output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2))
 
         return x, y, z
 
@@ -35,7 +35,7 @@ def test():
     torch.manual_seed(0)
     x = torch.rand(1, 108, 400)
     y = torch.rand(1, 96, 190)
-    z = torch.rand(1, 33, 153)
+    z = torch.rand(1, 36, 120)
 
     a0, a1, a2 = net(x, y, z)
 
@@ -45,7 +45,7 @@ def test():
 
     # torchscript to pnnx
     import os
-    os.system("../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+    os.system("../src/pnnx test_F_fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]")
 
     # pnnx inference
     import test_F_fold_pnnx
diff --git a/tools/pnnx/tests/test_F_unfold.py b/tools/pnnx/tests/test_F_unfold.py
index 1f34513792d..51f19a4f48a 100644
--- a/tools/pnnx/tests/test_F_unfold.py
+++ b/tools/pnnx/tests/test_F_unfold.py
@@ -24,7 +24,7 @@ def __init__(self):
     def forward(self, x):
         x0 = F.unfold(x, kernel_size=3)
         x1 = F.unfold(x, kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        x2 = F.unfold(x, kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2))
 
         return x0, x1, x2
 
diff --git a/tools/pnnx/tests/test_nn_Fold.py b/tools/pnnx/tests/test_nn_Fold.py
index 2cc5eb9a620..8f53639db2a 100644
--- a/tools/pnnx/tests/test_nn_Fold.py
+++ b/tools/pnnx/tests/test_nn_Fold.py
@@ -23,7 +23,7 @@ def __init__(self):
 
         self.fold_0 = nn.Fold(output_size=22, kernel_size=3)
         self.fold_1 = nn.Fold(output_size=(17,18), kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        self.fold_2 = nn.Fold(output_size=(5,11), kernel_size=(2,3), stride=1, padding=(2,4), dilation=(1,2))
 
     def forward(self, x, y, z):
         x = self.fold_0(x)
@@ -39,7 +39,7 @@ def test():
     torch.manual_seed(0)
     x = torch.rand(1, 108, 400)
     y = torch.rand(1, 96, 190)
-    z = torch.rand(1, 33, 153)
+    z = torch.rand(1, 36, 120)
 
     a0, a1, a2 = net(x, y, z)
 
@@ -49,7 +49,7 @@ def test():
 
     # torchscript to pnnx
     import os
-    os.system("../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,33,153]")
+    os.system("../src/pnnx test_nn_Fold.pt inputshape=[1,108,400],[1,96,190],[1,36,120]")
 
     # pnnx inference
     import test_nn_Fold_pnnx
diff --git a/tools/pnnx/tests/test_nn_Unfold.py b/tools/pnnx/tests/test_nn_Unfold.py
index aab26f2dcbd..aece085668c 100644
--- a/tools/pnnx/tests/test_nn_Unfold.py
+++ b/tools/pnnx/tests/test_nn_Unfold.py
@@ -23,7 +23,7 @@ def __init__(self):
 
         self.unfold_0 = nn.Unfold(kernel_size=3)
         self.unfold_1 = nn.Unfold(kernel_size=(2,4), stride=(2,1), padding=2, dilation=1)
-        self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=1)
+        self.unfold_2 = nn.Unfold(kernel_size=(1,3), stride=1, padding=(2,4), dilation=(1,2))
 
     def forward(self, x):
         x0 = self.unfold_0(x)

From 13adfb4a6d206ad00421160b6f97ac9587ec0fb6 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 11:53:22 +0800
Subject: [PATCH 05/10] fresh checkout

---
 .ci/linux-x64-cpu-gcc.yml | 2 ++
 .ci/pnnx.yml              | 1 +
 .ci/test-coverage.yml     | 9 +++++++++
 3 files changed, 12 insertions(+)

diff --git a/.ci/linux-x64-cpu-gcc.yml b/.ci/linux-x64-cpu-gcc.yml
index 2f970f37c12..50f7b0b2589 100644
--- a/.ci/linux-x64-cpu-gcc.yml
+++ b/.ci/linux-x64-cpu-gcc.yml
@@ -49,6 +49,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -85,6 +86,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: build-nostdio
diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml
index 0792e1da0a6..e3760d111aa 100644
--- a/.ci/pnnx.yml
+++ b/.ci/pnnx.yml
@@ -52,6 +52,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml
index e3ed988df6d..0c3fe6908e2 100644
--- a/.ci/test-coverage.yml
+++ b/.ci/test-coverage.yml
@@ -32,6 +32,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -105,6 +106,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -195,6 +197,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -248,6 +251,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -351,6 +355,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -435,6 +440,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -512,6 +518,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -589,6 +596,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps
@@ -668,6 +676,7 @@ jobs:
     - name: checkout
       checkout: self
       with:
+        strategy: FRESH_CHECKOUT
         enableGitLfs: false
 
     - name: install-deps

From ce415a0b1f0a63565180810c9ab83c17c3d7a02d Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 12:09:19 +0800
Subject: [PATCH 06/10] fix build

---
 src/layer/fold.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp
index 608be635e40..61f0d7791a8 100644
--- a/src/layer/fold.cpp
+++ b/src/layer/fold.cpp
@@ -14,8 +14,6 @@
 
 #include "fold.h"
 
-#include <algorithm>
-
 namespace ncnn {
 
 Fold::Fold()

From c2c707bc09867c4ee4d7e23175fcb9b2bd2dec2a Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 12:13:02 +0800
Subject: [PATCH 07/10] skip submodule

---
 .ci/linux-x64-cpu-gcc.yml | 2 ++
 .ci/pnnx.yml              | 1 +
 .ci/test-coverage.yml     | 7 +++++++
 3 files changed, 10 insertions(+)

diff --git a/.ci/linux-x64-cpu-gcc.yml b/.ci/linux-x64-cpu-gcc.yml
index 50f7b0b2589..4f138d9d080 100644
--- a/.ci/linux-x64-cpu-gcc.yml
+++ b/.ci/linux-x64-cpu-gcc.yml
@@ -50,6 +50,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -87,6 +88,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: build-nostdio
diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml
index e3760d111aa..5db6950e6b1 100644
--- a/.ci/pnnx.yml
+++ b/.ci/pnnx.yml
@@ -53,6 +53,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
diff --git a/.ci/test-coverage.yml b/.ci/test-coverage.yml
index 0c3fe6908e2..9272caac1ba 100644
--- a/.ci/test-coverage.yml
+++ b/.ci/test-coverage.yml
@@ -198,6 +198,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -252,6 +253,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -356,6 +358,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -441,6 +444,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -519,6 +523,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -597,6 +602,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
@@ -677,6 +683,7 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
+        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps

From b4c2d9a895aa69a9fe9efe0ad4ba754c104d9da4 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 15:15:28 +0800
Subject: [PATCH 08/10] fix

---
 .ci/pnnx.yml         |  1 -
 src/layer/fold.cpp   |  4 +++-
 src/layer/unfold.cpp | 11 ++++++++---
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml
index 5db6950e6b1..e3760d111aa 100644
--- a/.ci/pnnx.yml
+++ b/.ci/pnnx.yml
@@ -53,7 +53,6 @@ jobs:
       checkout: self
       with:
         strategy: FRESH_CHECKOUT
-        enableSubmodule: false
         enableGitLfs: false
 
     - name: install-deps
diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp
index 61f0d7791a8..a5e35075652 100644
--- a/src/layer/fold.cpp
+++ b/src/layer/fold.cpp
@@ -115,7 +115,9 @@ int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons
 
     if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0)
     {
-        copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt);
+        Option opt_b = opt;
+        opt_b.use_packing_layout = false;
+        copy_cut_border(top_blob_bordered, top_blob, pad_top, pad_bottom, pad_left, pad_right, opt_b);
         if (top_blob.empty())
             return -100;
     }
diff --git a/src/layer/unfold.cpp b/src/layer/unfold.cpp
index a2e2e053ff8..f747a169ced 100644
--- a/src/layer/unfold.cpp
+++ b/src/layer/unfold.cpp
@@ -41,9 +41,14 @@ int Unfold::load_param(const ParamDict& pd)
 int Unfold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     Mat bottom_blob_bordered;
-    make_padding(bottom_blob, bottom_blob_bordered, opt);
-    if (bottom_blob_bordered.empty())
-        return -100;
+    {
+        Option opt_b = opt;
+        opt_b.blob_allocator = opt.workspace_allocator;
+        opt_b.use_packing_layout = false;
+        make_padding(bottom_blob, bottom_blob_bordered, opt_b);
+        if (bottom_blob_bordered.empty())
+            return -100;
+    }
 
     const int w = bottom_blob_bordered.w;
     const int h = bottom_blob_bordered.h;

From 462228b975b13d414b5fecce963ab728b25acef6 Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 16:21:02 +0800
Subject: [PATCH 09/10] warning--

---
 tools/pnnx/src/pass_level1/nn_Fold.cpp   | 2 +-
 tools/pnnx/src/pass_level1/nn_Unfold.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/pnnx/src/pass_level1/nn_Fold.cpp b/tools/pnnx/src/pass_level1/nn_Fold.cpp
index a05d8a80402..045c1f6f1ba 100644
--- a/tools/pnnx/src/pass_level1/nn_Fold.cpp
+++ b/tools/pnnx/src/pass_level1/nn_Fold.cpp
@@ -31,7 +31,7 @@ class Fold : public FuseModulePass
         return "nn.Fold";
     }
 
-    void write(Operator* op, const std::shared_ptr<torch::jit::Graph>& graph, const torch::jit::Module& mod) const
+    void write(Operator* op, const std::shared_ptr<torch::jit::Graph>& graph) const
     {
         const torch::jit::Node* col2im = find_node_by_kind(graph, "aten::col2im");
 
diff --git a/tools/pnnx/src/pass_level1/nn_Unfold.cpp b/tools/pnnx/src/pass_level1/nn_Unfold.cpp
index 974c5422ea0..1abf6201a83 100644
--- a/tools/pnnx/src/pass_level1/nn_Unfold.cpp
+++ b/tools/pnnx/src/pass_level1/nn_Unfold.cpp
@@ -31,7 +31,7 @@ class Unfold : public FuseModulePass
         return "nn.Unfold";
     }
 
-    void write(Operator* op, const std::shared_ptr<torch::jit::Graph>& graph, const torch::jit::Module& mod) const
+    void write(Operator* op, const std::shared_ptr<torch::jit::Graph>& graph) const
     {
         const torch::jit::Node* im2col = find_node_by_kind(graph, "aten::im2col");
 

From cadc925b31a9754a44bb33fb111c3307dd53f41f Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 3 Nov 2022 17:16:50 +0800
Subject: [PATCH 10/10] naive fold

---
 src/layer/fold.cpp | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/src/layer/fold.cpp b/src/layer/fold.cpp
index a5e35075652..c14f01fbb72 100644
--- a/src/layer/fold.cpp
+++ b/src/layer/fold.cpp
@@ -73,42 +73,34 @@ int Fold::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons
         return -100;
 
     // col2im
+    const int gap = outw * stride_h - inw * stride_w;
+
     #pragma omp parallel for num_threads(opt.num_threads)
     for (int p = 0; p < channels; p++)
     {
-        float* ptr = top_blob_bordered.channel(p);
+        const float* sptr = bottom_blob.row(p * maxk);
+        Mat outm = top_blob_bordered.channel(p);
+
+        outm.fill(0.f);
 
-        for (int i = 0; i < outh; i++)
+        for (int u = 0; u < kernel_h; u++)
         {
-            for (int j = 0; j < outw; j++)
+            for (int v = 0; v < kernel_w; v++)
             {
-                float sum = 0.f;
-
-                const int sx_start = (j < kernel_extent_w) ? 0 : (j - kernel_extent_w) / stride_w + 1;
-                const int sx_end = std::min(j / stride_w + 1, inw);
-
-                const int sy_start = (i < kernel_extent_h) ? 0 : (i - kernel_extent_h) / stride_h + 1;
-                const int sy_end = std::min(i / stride_h + 1, inh);
+                float* ptr = outm.row(dilation_h * u) + dilation_w * v;
 
-                for (int sy = sy_start; sy < sy_end; sy += 1)
+                for (int i = 0; i < inh; i++)
                 {
-                    for (int sx = sx_start; sx < sx_end; sx += 1)
+                    for (int j = 0; j < inw; j++)
                     {
-                        int h_k = (i - sy * stride_h);
-                        int w_k = (j - sx * stride_w);
+                        ptr[0] += sptr[0];
 
-                        if (h_k % dilation_h == 0 && w_k % dilation_w == 0)
-                        {
-                            h_k /= dilation_h;
-                            w_k /= dilation_w;
-
-                            sum += bottom_blob.row(p * maxk + h_k * kernel_w + w_k)[sy * inw + sx];
-                        }
+                        ptr += stride_w;
+                        sptr += 1;
                     }
-                }
 
-                ptr[0] = sum;
-                ptr += 1;
+                    ptr += gap;
+                }
             }
         }
     }