PaddlePaddle · shangzhizhou · Nov 24, 2021 · Nov 17, 2021 · Nov 17, 2021 · Nov 18, 2021
diff --git a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h"
 
 namespace paddle {
 namespace framework {
@@ -35,99 +36,149 @@ class MatMulOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(3) << "convert a fluid matmul op to tensorrt mul layer without bias";
-
+    VLOG(3) << "convert a fluid matmul op to tensorrt matmul layer ";
     framework::OpDesc op_desc(op, nullptr);
+    nvinfer1::ILayer* layer = nullptr;
+
     // Declare inputs
     auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
     auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]);
 
+    nvinfer1::Dims dims_x = input1->getDimensions();
+    nvinfer1::Dims dims_y = input2->getDimensions();
+
     bool transpose_X = BOOST_GET_CONST(bool, op_desc.GetAttr("transpose_X"));
     bool transpose_Y = BOOST_GET_CONST(bool, op_desc.GetAttr("transpose_Y"));
 
+    auto output_name = op_desc.Output("Out")[0];
+    float alpha = 1;
+    if (op_desc.HasAttr("alpha")) {
+      float alpha_tem = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
+      alpha = alpha_tem;
+    }
     nvinfer1::MatrixOperation matrix_operation_X =
         transpose_X ? nvinfer1::MatrixOperation::kTRANSPOSE
                     : nvinfer1::MatrixOperation::kNONE;
     nvinfer1::MatrixOperation matrix_operation_Y =
         transpose_Y ? nvinfer1::MatrixOperation::kTRANSPOSE
                     : nvinfer1::MatrixOperation::kNONE;
 
-    auto* layer =
-        TRT_ENGINE_ADD_LAYER(engine_, MatrixMultiply, *input1,
-                             matrix_operation_X, *input2, matrix_operation_Y);
-
-    float alpha = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
-    auto output_name = op_desc.Output("Out")[0];
-    if (fabs(alpha - 1.0) < std::numeric_limits<float>::epsilon()) {
-      engine_->SetITensor(output_name, layer->getOutput(0));
-    } else {
-      // IScaleLayer requires the input must have at least
-      // three dimensions in static shape mode and at least
-      // four dimensions in dynamic shape mode.
-      auto* matmul_out = layer->getOutput(0);
-      nvinfer1::Dims out_shape = matmul_out->getDimensions();
-      const int out_dims = out_shape.nbDims;
-      bool need_change_dim = false;
-
+    if (op_desc.HasAttr("support_int8") &&
+        engine_->precision() == AnalysisConfig::Precision::kInt8) {
       if (engine_->with_dynamic_shape()) {
-        if (out_dims == 3) {
-          need_change_dim = true;
-        }
+        VLOG(3) << "Convert a fluid matmul_op_int8_dynamic to TensorRT "
+                   "MatmulPluginLayer";
+        plugin::MatmulPluginDynamic* plugin =
+            new plugin::MatmulPluginDynamic(transpose_X, transpose_Y, alpha);
+        std::vector<nvinfer1::ITensor*> inputs{input1, input2};
+        layer = engine_->AddDynamicPlugin(inputs.data(), inputs.size(), plugin);
+        RreplenishLayerAndOutput(layer, "matmul_op_int8_dynamic", {output_name},
+                                 test_mode);
       } else {
-        if (out_dims == 2) {
-          need_change_dim = true;
-        }
+        VLOG(3) << "Convert a fluid matmul_op_int8_static to TensorRT "
+                   "MatmulPluginLayer";
+        plugin::MatmulPlugin* plugin = new plugin::MatmulPlugin(
+            dims_x, dims_y, transpose_X, transpose_Y, alpha);
+        std::vector<nvinfer1::ITensor*> inputs{input1, input2};
+        layer = engine_->AddPluginV2IOExt(inputs.data(), inputs.size(), plugin);
+        RreplenishLayerAndOutput(layer, "matmul_op_int8_static", {output_name},
+                                 test_mode);
       }
-
-      if (need_change_dim) {
-        nvinfer1::Dims reshape_dim;
-        reshape_dim.nbDims = out_dims + 1;
-        reshape_dim.d[out_dims] = 1;
-        for (int i = 0; i < out_dims; i++) {
-          reshape_dim.d[i] = out_shape.d[i];
+    } else {
+      VLOG(3) << "Convert a fluid matmul_op_float to TensorRT ";
+      layer =
+          TRT_ENGINE_ADD_LAYER(engine_, MatrixMultiply, *input1,
+                               matrix_operation_X, *input2, matrix_operation_Y);
+      if (alpha == 1) {
+        RreplenishLayerAndOutput(layer, "matmul_op_float_no_alpha",
+                                 {output_name}, test_mode);
+      } else {
+        layer->setName(
+            ("matmul_op_float_has_alpha: MatrixMultiplyLayer (Output: " +
+             output_name + ")")
+                .c_str());
+        // IScaleLayer requires the input must have at least
+        // three dimensions in static shape mode and at least
+        // four dimensions in dynamic shape mode.
+        auto* matmul_out = layer->getOutput(0);
+        nvinfer1::Dims out_shape = matmul_out->getDimensions();
+        const int out_dims = out_shape.nbDims;
+        bool need_change_dim = false;
+
+        if (engine_->with_dynamic_shape()) {
+          if (out_dims == 3) {
+            need_change_dim = true;
+          }
+        } else {
+          if (out_dims == 2) {
+            need_change_dim = true;
+          }
         }
 
-        auto* reshape_layer =
-            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out);
-        reshape_layer->setReshapeDimensions(reshape_dim);
-        matmul_out = reshape_layer->getOutput(0);
-      }
+        if (need_change_dim) {
+          nvinfer1::Dims reshape_dim;
+          reshape_dim.nbDims = out_dims + 1;
+          reshape_dim.d[out_dims] = 1;
+          for (int i = 0; i < out_dims; i++) {
+            reshape_dim.d[i] = out_shape.d[i];
+          }
+
+          auto* reshape_layer =
+              TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out);
+          reshape_layer->setReshapeDimensions(reshape_dim);
+          matmul_out = reshape_layer->getOutput(0);
+          reshape_layer->setName(("matmul_op_float_has_alpha_reshape_before: "
+                                  "ShuffleLayer (Output: " +
+                                  output_name + ")")
+                                     .c_str());
+        }
 
-      auto create_weights = [&](float data, const std::string& type) -> float* {
-        std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
-        tmp_tensor->Resize({1});
-        auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
-        tmp_data[0] = data;
-        engine_->SetWeights(output_name + "_add_scale_op_" + type,
-                            std::move(tmp_tensor));
-        return tmp_data;
-      };
-      float* alpha_data = create_weights(alpha, "alpha");
-      float* shift_data = create_weights(0.0, "shift");
-      float* power_data = create_weights(1.0, "power");
-      TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT,
-                                      static_cast<void*>(alpha_data), 1};
-      TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT,
-                                      static_cast<void*>(shift_data), 1};
-      TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT,
-                                      static_cast<void*>(power_data), 1};
-      auto* scale_layer = TRT_ENGINE_ADD_LAYER(
-          engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM,
-          nv_shift.get(), nv_alpha.get(), nv_power.get());
-      auto* scale_out = scale_layer->getOutput(0);
-
-      if (need_change_dim) {
-        auto* reshape_layer =
-            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out);
-        reshape_layer->setReshapeDimensions(out_shape);
-        scale_out = reshape_layer->getOutput(0);
+        auto create_weights = [&](float data,
+                                  const std::string& type) -> float* {
+          std::unique_ptr<framework::Tensor> tmp_tensor(
+              new framework::Tensor());
+          tmp_tensor->Resize({1});
+          auto* tmp_data =
+              tmp_tensor->mutable_data<float>(platform::CPUPlace());
+          tmp_data[0] = data;
+          engine_->SetWeights(output_name + "_add_scale_op_" + type,
+                              std::move(tmp_tensor));
+          return tmp_data;
+        };
+        float* alpha_data = create_weights(alpha, "alpha");
+        float* shift_data = create_weights(0.0, "shift");
+        float* power_data = create_weights(1.0, "power");
+        TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT,
+                                        static_cast<void*>(alpha_data), 1};
+        TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT,
+                                        static_cast<void*>(shift_data), 1};
+        TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT,
+                                        static_cast<void*>(power_data), 1};
+        auto* scale_layer = TRT_ENGINE_ADD_LAYER(
+            engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM,
+            nv_shift.get(), nv_alpha.get(), nv_power.get());
+        auto* scale_out = scale_layer->getOutput(0);
+        scale_layer->setName(
+            ("matmul_op_float_has_alpha: ScaleLayer (Output: " + output_name +
+             ")")
+                .c_str());
+
+        if (need_change_dim) {
+          auto* reshape_layer =
+              TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out);
+          reshape_layer->setReshapeDimensions(out_shape);
+          scale_out = reshape_layer->getOutput(0);
+          reshape_layer->setName(("matmul_op_float_has_alpha_reshape_after: "
+                                  "ShuffleLayer (Output: " +
+                                  output_name + ")")
+                                     .c_str());
+        }
+        engine_->SetITensor(output_name, scale_out);
+        if (test_mode) {  // the test framework can not determine which is the
+                          // output, so place the declaration inside.
+          engine_->DeclareOutput(output_name);
+        }
       }
-
-      engine_->SetITensor(output_name, scale_out);
-    }
-    if (test_mode) {  // the test framework can not determine which is the
-                      // output, so place the declaration inside.
-      engine_->DeclareOutput(output_name);
     }
   }
 };

diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
@@ -12,7 +12,8 @@ nv_library(tensorrt_plugin
            mish_op_plugin.cu
            pool3d_op_plugin.cu
            deformable_conv_op_plugin.cu
-           DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
+           matmul_op_int8_plugin.cu
+	   DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
 
 nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS
   paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_plugin)