PaddlePaddle · luotao1 · Mar 14, 2023 · Feb 28, 2023 · Feb 28, 2023 · Mar 1, 2023
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2544,6 +2544,7 @@ USE_TRT_CONVERTER(grid_sampler)
 #endif
 #if IS_TRT_VERSION_GE(8200)
 USE_TRT_CONVERTER(set_value)
+USE_TRT_CONVERTER(temporal_shift);
 #endif
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
 USE_TRT_CONVERTER(sparse_fc)

diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -101,7 +101,8 @@ list(
   elementwiseadd_transpose_op.cc
   skip_groupnorm_act_op.cc
   preln_groupnorm_act_op.cc
-  expand_v2_op.cc)
+  expand_v2_op.cc
+  temporal_shift_op.cc)
 
 if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
   list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc

diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc
@@ -0,0 +1,191 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * TemporalShiftOp.
+ */
+class TemporalShiftOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer";
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+
+    const float shift_ratio =
+        PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio"));
+    const int T = PADDLE_GET_CONST(int, op_desc.GetAttr("seg_num"));
+
+    auto input_dims = input->getDimensions();
+
+    const int NT = input_dims.d[0];
+    const int C = input_dims.d[1];
+    const int H = input_dims.d[2];
+    const int W = input_dims.d[3];
+    const int N = NT / T;
+
+    // Reshape input to [N,T,C,H,W]
+    auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+    nvinfer1::Dims reshape_dims{5, {N, T, C, H, W}};
+    reshape_layer->setReshapeDimensions(reshape_dims);
+
+    // Pad input to [N,T+2,C,H,W]
+    std::vector<int> pre_pad_v{0, 1, 0, 0, 0};
+    std::vector<int> post_pad_v{0, 1, 0, 0, 0};
+    nvinfer1::ITensor* pre_pad = vectorToTensor<int>(pre_pad_v);
+    nvinfer1::ITensor* post_pad = vectorToTensor<int>(post_pad_v);
+
+    int dims = 5;
+    std::vector<int> zeros_v(dims, 0);
+    auto const zeros = vectorToTensor<int>(zeros_v);
+
+    nvinfer1::ITensor* start{};
+    nvinfer1::ITensor* size{};
+
+    start = TRT_ENGINE_ADD_LAYER(engine_,
+                                 ElementWise,
+                                 *zeros,
+                                 *pre_pad,
+                                 nvinfer1::ElementWiseOperation::kSUB)
+                ->getOutput(0);
+
+    auto const total_padding =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             ElementWise,
+                             *pre_pad,
+                             *post_pad,
+                             nvinfer1::ElementWiseOperation::kSUM)
+            ->getOutput(0);
+
+    std::vector<int> input_shape_v(dims, 0);
+    for (int i = 0; i < dims; i++) {
+      input_shape_v[i] = input->getDimensions().d[i];
+    }
+    auto const input_shape = vectorToTensor<int>(input_shape_v);
+
+    size = TRT_ENGINE_ADD_LAYER(engine_,
+                                ElementWise,
+                                *input_shape,
+                                *total_padding,
+                                nvinfer1::ElementWiseOperation::kSUM)
+               ->getOutput(0);
+    nvinfer1::Dims stride;
+    stride.nbDims = dims;
+    std::fill_n(stride.d, dims, 1);
+    auto const& dummy = stride;
+    auto* slice_layer =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Slice,
+                             *const_cast<nvinfer1::ITensor*>(input),
+                             dummy,
+                             dummy,
+                             stride);
+    slice_layer->setInput(1, *start);
+    slice_layer->setInput(2, *size);
+    slice_layer->setMode(nvinfer1::SliceMode::kFILL);
+
+    // Slice Padded Tensor
+    int slice_c = int(C * shift_ratio);
+    int slice_c2 = int(C * shift_ratio * 2);
+    auto* slice1_layer =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Slice,
+                             *slice_layer->getOutput(0),
+                             nvinfer1::Dims{5, {0, 0, 0, 0, 0}},
+                             nvinfer1::Dims{5, {N, T, slice_c, H, W}},
+                             nvinfer1::Dims{5, {1, 1, 1, 1, 1}});
+    auto* slice2_layer =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Slice,
+                             *slice_layer->getOutput(0),
+                             nvinfer1::Dims{5, {0, 2, slice_c, 0, 0}},
+                             nvinfer1::Dims{5, {N, T, slice_c, H, W}},
+                             nvinfer1::Dims{5, {1, 1, 1, 1, 1}});
+    auto* slice3_layer =
+        TRT_ENGINE_ADD_LAYER(engine_,
+                             Slice,
+                             *slice_layer->getOutput(0),
+                             nvinfer1::Dims{5, {0, 1, slice_c2, 0, 0}},
+                             nvinfer1::Dims{5, {N, T, C - slice_c2, H, W}},
+                             nvinfer1::Dims{5, {1, 1, 1, 1, 1}});
+
+    // Concatenate slices along the third dimension (C)
+    nvinfer1::IConcatenationLayer* concat_layer;
+    if (!slice_c) {
+      nvinfer1::ITensor* concat_inputs[2] = {slice2_layer->getOutput(0),
+                                             slice3_layer->getOutput(0)};
+      concat_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 2);
+      concat_layer->setAxis(2);
+    } else {
+      nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0),
+                                             slice2_layer->getOutput(0),
+                                             slice3_layer->getOutput(0)};
+      concat_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 3);
+      concat_layer->setAxis(2);
+    }
+
+    // Reshape output to [N*T,C,H,W]
+    nvinfer1::Dims output_shape{4, {N * T, C, H, W}};
+    auto* reshape_layer3 =
+        TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0));
+    reshape_layer3->setReshapeDimensions(output_shape);
+
+    // Set output
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(
+        reshape_layer3, "temporal_shift", {output_name}, test_mode);
+  }
+
+ private:
+  template <typename T>
+  nvinfer1::ITensor* vectorToTensor(std::vector<T> v) {
+    int* v_data = const_cast<T*>(static_cast<const T*>(v.data()));
+
+    nvinfer1::Weights v_wt{nvinfer1::DataType::kINT32,
+                           static_cast<void*>(v_data),
+                           static_cast<int32_t>(v.size())};
+
+    nvinfer1::Dims v_dim;
+    v_dim.nbDims = 1;
+    v_dim.d[0] = static_cast<int>(v.size());
+
+    return TRT_ENGINE_ADD_LAYER(engine_, Constant, v_dim, v_wt)->getOutput(0);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(temporal_shift, TemporalShiftOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2579,6 +2579,13 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
     }
 
+    if (op_type == "temporal_shift") {
+#if !IS_TRT_VERSION_GE(8200)
+      VLOG(3) << "temporal_shift is not supported when TensorRT < 8.5.1";
+      return false;
+#endif
+    }
+
     if (use_no_calib_int8) {
       return int8_teller_set.count(op_type);
     } else {
@@ -2739,6 +2746,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "fuse_eleadd_transpose",
       "skip_groupnorm_act",
       "preln_groupnorm_act",
+      "temporal_shift",
       "grid_sampler"};
 
   std::unordered_set<std::string> teller_set{
@@ -2892,6 +2900,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "fuse_eleadd_transpose",
       "skip_groupnorm_act",
       "preln_groupnorm_act",
+      "temporal_shift"
       "grid_sampler"};
 };
 

diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertTemporalShiftTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(attrs):
+            T = attrs[0]["seg_num"]
+            return np.ones([3 * T, 10, 64, 64]).astype(np.float32)
+
+        for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]:
+            for T in range(2, 5):
+                dics = [{"shift_ratio": shift_value, "seg_num": T}, {}]
+
+                ops_config = [
+                    {
+                        "op_type": "temporal_shift",
+                        "op_inputs": {"X": ["input_data"]},
+                        "op_outputs": {"Out": ["output_data"]},
+                        "op_attrs": dics[0],
+                    }
+                ]
+
+                ops = self.generate_op_config(ops_config)
+                for i in range(10):
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input1, dics)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [6, 10, 64, 64]
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [20, 10, 64, 64]
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [6, 10, 64, 64]
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # # for static_shape
+        # clear_dynamic_shape()
+        # self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        # yield self.create_inference_config(), (1, 3), 1e-5
+        # self.trt_param.precision = paddle_infer.PrecisionType.Half
+        # yield self.create_inference_config(), (1, 3), 1e-3
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (0, 3), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (0, 3), 1e-3
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()