From 3e67cb2b0c451462bc6a916947586ff38f536902 Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Tue, 25 Jul 2023 06:12:45 +0000
Subject: [PATCH 1/6] [Paddle-TRT] add flip op

---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   3 +-
 .../inference/tensorrt/convert/flip_op.cc     |  83 ++++++++++
 paddle/fluid/inference/tensorrt/op_teller.cc  |  23 ++-
 test/ir/inference/test_trt_convert_flip.py    | 144 ++++++++++++++++++
 5 files changed, 251 insertions(+), 3 deletions(-)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/flip_op.cc
 create mode 100644 test/ir/inference/test_trt_convert_flip.py
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index d9899e27e24af7..b604caa0ea51c3 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2917,6 +2917,7 @@ USE_TRT_CONVERTER(preln_groupnorm_act)
 USE_TRT_CONVERTER(cumsum)
 USE_TRT_CONVERTER(assign)
 USE_TRT_CONVERTER(unbind)
+USE_TRT_CONVERTER(flip)
 #if IS_TRT_VERSION_GE(8522)
 USE_TRT_CONVERTER(flash_multihead_matmul)
 USE_TRT_CONVERTER(cross_multihead_matmul)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 2420033c08695f..6b7fd4324010d6 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -108,7 +108,8 @@ list(
   temporal_shift_op.cc
   einsum_op.cc
   unbind_op.cc
-  assign_op.cc)
+  assign_op.cc
+  flip_op.cc)
 
 if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
   list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/flip_op.cc b/paddle/fluid/inference/tensorrt/convert/flip_op.cc
new file mode 100644
index 00000000000000..ea38d6b174b926
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/flip_op.cc
@@ -0,0 +1,83 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class FlipOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(4) << "convert a flip op to tensorrt layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    auto input_dims = input->getDimensions();
+
+    // Get Attrs
+    std::vector<int> axis =
+        PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("axis"));
+    for (size_t i = 0; i < axis.size(); ++i) {
+      axis[i] += (axis[i] < 0) ? input_dims.nbDims : 0;
+    }
+
+    nvinfer1::ITensor* shape_tensor = Shape(input);
+    // getAxisLength default is a scalar
+    auto getAxisLength = [&](int axis, bool scalar = true) {
+      int d = input_dims.d[axis];
+      if (d >= 0) {
+        return Add1DConstantLayer(d, "", scalar);
+      } else {
+        return GetEleTensorOfShape(shape_tensor, axis, scalar);
+      }
+    };
+    for (size_t i = 0; i < axis.size(); ++i) {
+      auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop);
+      nvinfer1::ITensor* tripLimit = getAxisLength(axis[i]);
+      loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
+      auto iterator = loop->addIterator(*input, axis[i], true);
+      std::vector<int32_t> zero_vec{0};
+      std::vector<int32_t> one_vec{1};
+      auto zero = Add1DConstantLayer(zero_vec);
+      auto one = Add1DConstantLayer(one_vec);
+      nvinfer1::IRecurrenceLayer* iRec = loop->addRecurrence(*zero);
+      nvinfer1::ITensor* iCur = iRec->getOutput(0);
+      auto iNext = TRT_ENGINE_ADD_LAYER(engine_,
+                                        ElementWise,
+                                        *iCur,
+                                        *one,
+                                        nvinfer1::ElementWiseOperation::kSUM);
+      iRec->setInput(1, *iNext->getOutput(0));
+      nvinfer1::ILoopOutputLayer* loopOut = loop->addLoopOutput(
+          *iterator->getOutput(0), nvinfer1::LoopOutput::kCONCATENATE, axis[i]);
+      loopOut->setInput(1, *tripLimit);
+      input = loopOut->getOutput(0);
+    }
+
+    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input);
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, "flip", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(flip, FlipOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index c4a700856fafcd..85c07686909a2e 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2747,6 +2747,23 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
     }
 
+    if (op_type == "flip") {
+      if (!desc.HasAttr("axis")) {
+        VLOG(3) << "The necessary attributes of the flip operator axis "
+                   " is missing.";
+        return false;
+      }
+      if (!with_dynamic_shape) {
+        VLOG(3) << "the flip does not support "
+                   "static shape yet";
+        return false;
+      }
+#if !IS_TRT_VERSION_GE(7220)
+      VLOG(3) << "flip is not supported when TensorRT blow 7.2.2";
+      return false;
+#endif
+    }
+
     if (use_no_calib_int8) {
       return int8_teller_set.count(op_type);
     } else {
@@ -2917,7 +2934,8 @@ struct SimpleOpTypeSetTeller : public Teller {
       "grid_sampler",
       "cumsum",
       "unbind",
-      "assign"};
+      "assign",
+      "flip"};
 
   std::unordered_set<std::string> teller_set{
       "matrix_multiply",
@@ -3081,7 +3099,8 @@ struct SimpleOpTypeSetTeller : public Teller {
       "grid_sampler",
       "cumsum",
       "unbind",
-      "assign"};
+      "assign",
+      "flip"};
 };
 
 struct GenericPluginTeller : public Teller {
diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py
new file mode 100644
index 00000000000000..34de1adececc0f
--- /dev/null
+++ b/test/ir/inference/test_trt_convert_flip.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertFlipTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        ver = paddle_infer.get_trt_compile_version()
+        if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220:
+            return False
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(batch):
+            if self.dims == 4:
+                return np.random.random([batch, 3, 3, 24]).astype(np.float32)
+            elif self.dims == 3:
+                return np.random.random([batch, 3, 24]).astype(np.float32)
+            elif self.dims == 2:
+                return np.random.random([batch, 24]).astype(np.float32)
+            elif self.dims == 1:
+                return np.random.random([24]).astype(np.int32)
+
+        def generate_axis(is_int):
+            if is_int:
+                return random.randint(0, self.dims - 1)
+            return np.arange(self.dims).tolist()
+
+        for dims in [2, 3, 4]:
+            for batch in [3, 6, 9]:
+                for is_int in [True, False]:
+                    self.dims = dims
+                    axis = generate_axis(is_int)
+                    ops_config = [
+                        {
+                            "op_type": "flip",
+                            "op_inputs": {
+                                "X": ["input_data"],
+                            },
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": {"axis": axis},
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input, batch)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3 - 1, 3 - 1, 24 - 1]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [9, 3 + 1, 3 + 1, 24 + 1]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 3, 24]
+                }
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3 - 1, 24 - 1]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [9, 3 + 1, 24 + 1]
+                }
+                self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 24]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 24]}
+                self.dynamic_shape.max_input_shape = {"input_data": [9, 24]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [1, 24]}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [24 - 1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [24 + 1]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [24]}
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220:
+                return 0, 3
+            return 1, 2
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        self.trt_param.max_batch_size = 9
+        self.trt_param.workspace_size = 102400
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()

From b78c1771e8625a98d0568675b89cfdd6836defb4 Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Tue, 25 Jul 2023 11:44:13 +0000
Subject: [PATCH 2/6] modify text info

---
 paddle/fluid/inference/tensorrt/convert/flip_op.cc | 2 +-
 paddle/fluid/inference/tensorrt/op_teller.cc       | 2 +-
 test/ir/inference/test_trt_convert_flip.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/convert/flip_op.cc b/paddle/fluid/inference/tensorrt/convert/flip_op.cc
index ea38d6b174b926..53ac35e690551d 100644
--- a/paddle/fluid/inference/tensorrt/convert/flip_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/flip_op.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 85c07686909a2e..2adeb43046d262 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2759,7 +2759,7 @@ struct SimpleOpTypeSetTeller : public Teller {
         return false;
       }
 #if !IS_TRT_VERSION_GE(7220)
-      VLOG(3) << "flip is not supported when TensorRT blow 7.2.2";
+      VLOG(3) << "flip is not supported when TensorRT below 7.2.2";
       return false;
 #endif
     }
diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py
index 34de1adececc0f..ec2fda8bd877c6 100644
--- a/test/ir/inference/test_trt_convert_flip.py
+++ b/test/ir/inference/test_trt_convert_flip.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 69864a757d2920ab823ef90678f247607408f06d Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Wed, 26 Jul 2023 05:51:18 +0000
Subject: [PATCH 3/6] remove test on int type

---
 test/ir/inference/test_trt_convert_flip.py | 56 ++++++++++------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py
index ec2fda8bd877c6..c983ebae33b525 100644
--- a/test/ir/inference/test_trt_convert_flip.py
+++ b/test/ir/inference/test_trt_convert_flip.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import random
 import unittest
 from functools import partial
 from typing import List
@@ -42,40 +41,37 @@ def generate_input(batch):
             elif self.dims == 1:
                 return np.random.random([24]).astype(np.int32)
 
-        def generate_axis(is_int):
-            if is_int:
-                return random.randint(0, self.dims - 1)
+        def generate_axis():
             return np.arange(self.dims).tolist()
 
         for dims in [2, 3, 4]:
             for batch in [3, 6, 9]:
-                for is_int in [True, False]:
-                    self.dims = dims
-                    axis = generate_axis(is_int)
-                    ops_config = [
-                        {
-                            "op_type": "flip",
-                            "op_inputs": {
-                                "X": ["input_data"],
-                            },
-                            "op_outputs": {"Out": ["output_data"]},
-                            "op_attrs": {"axis": axis},
-                        }
-                    ]
-                    ops = self.generate_op_config(ops_config)
-
-                    program_config = ProgramConfig(
-                        ops=ops,
-                        weights={},
-                        inputs={
-                            "input_data": TensorConfig(
-                                data_gen=partial(generate_input, batch)
-                            ),
+                self.dims = dims
+                axis = generate_axis()
+                ops_config = [
+                    {
+                        "op_type": "flip",
+                        "op_inputs": {
+                            "X": ["input_data"],
                         },
-                        outputs=["output_data"],
-                    )
-
-                    yield program_config
+                        "op_outputs": {"Out": ["output_data"]},
+                        "op_attrs": {"axis": axis},
+                    }
+                ]
+                ops = self.generate_op_config(ops_config)
+
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "input_data": TensorConfig(
+                            data_gen=partial(generate_input, batch)
+                        ),
+                    },
+                    outputs=["output_data"],
+                )
+
+                yield program_config
 
     def sample_predictor_configs(
         self, program_config

From b549c6451ff35a64e213e996a4364eec1fc3186e Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Wed, 26 Jul 2023 09:31:53 +0000
Subject: [PATCH 4/6] increase test workspace size

---
 test/ir/inference/test_trt_convert_flip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py
index c983ebae33b525..0ce78a71cf783f 100644
--- a/test/ir/inference/test_trt_convert_flip.py
+++ b/test/ir/inference/test_trt_convert_flip.py
@@ -119,7 +119,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             program_config.ops[i].attrs for i in range(len(program_config.ops))
         ]
         self.trt_param.max_batch_size = 9
-        self.trt_param.workspace_size = 102400
+        self.trt_param.workspace_size = 1024000
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)

From a8c1fbb6c741d3384768262de306b2aacc66ef1b Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Wed, 26 Jul 2023 10:58:28 +0000
Subject: [PATCH 5/6] increase test workspace size

---
 test/ir/inference/test_trt_convert_flip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/ir/inference/test_trt_convert_flip.py b/test/ir/inference/test_trt_convert_flip.py
index 0ce78a71cf783f..442a3381e38439 100644
--- a/test/ir/inference/test_trt_convert_flip.py
+++ b/test/ir/inference/test_trt_convert_flip.py
@@ -119,7 +119,7 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
             program_config.ops[i].attrs for i in range(len(program_config.ops))
         ]
         self.trt_param.max_batch_size = 9
-        self.trt_param.workspace_size = 1024000
+        self.trt_param.workspace_size = 1073741824
 
         # for dynamic_shape
         generate_dynamic_shape(attrs)

From 150706a540879f2d22bb96443c29e597cb105b97 Mon Sep 17 00:00:00 2001
From: minghaipeng <minghaipeng@baidu.com>
Date: Thu, 27 Jul 2023 03:30:22 +0000
Subject: [PATCH 6/6] delete attribute filter

---
 paddle/fluid/inference/tensorrt/op_teller.cc | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 2adeb43046d262..b81bdec2688636 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2748,11 +2748,6 @@ struct SimpleOpTypeSetTeller : public Teller {
     }
 
     if (op_type == "flip") {
-      if (!desc.HasAttr("axis")) {
-        VLOG(3) << "The necessary attributes of the flip operator axis "
-                   " is missing.";
-        return false;
-      }
       if (!with_dynamic_shape) {
         VLOG(3) << "the flip does not support "
                    "static shape yet";