From a452c6129f81626d07a9dd531299f3e4bf8a4a4b Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Tue, 12 Oct 2021 23:10:49 +0200
Subject: [PATCH 1/8] added softplus

---
 .../operators/mkldnn/softplus_mkldnn_op.cc    | 134 ++++++++++++++++++
 .../mkldnn/test_softplus_mkldnn_op.py         |  51 +++++++
 2 files changed, 185 insertions(+)
 create mode 100644 paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc
new file mode 100644
index 00000000000000..e1c841a5b4d29e
--- /dev/null
+++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc
@@ -0,0 +1,134 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using paddle::framework::Tensor;
+
+template <typename T>
+class SoftplusMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
+                                   T, dnnl::binary> {
+ public:
+  SoftplusMKLDNNHandler(const Tensor* x, const float beta,
+                         const mkldnn::engine engine, platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(
+            engine, cpu_place) {
+
+    auto x_tz = framework::vectorize(x->dims());
+    auto x_md = dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
+
+    auto beta_tz = std::vector<int64_t>(x_tz.size(), 1);
+    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(), x->format());
+
+    dnnl::post_ops po;
+    po.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f, 0.0f);
+    binary_ops.append_binary(dnnl::algorithm::binary_div, variance_md);
+    dnnl::primitive_attr attrs;
+    attrs.set_post_ops(po);
+
+    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul, x_md, beta_md,
+                                            x_md);
+  }
+
+  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(
+      const float* beta) {
+    return this->AcquireMemoryFromPrimitive(fwd_pd_->src1_desc(),
+                                            to_void_cast<float>(beta));
+  }
+};
+
+
+template <typename T>
+class SoftplusMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    // if beta = 1 then we can simply use oneDNN soft_relu activation, in the other case, we need to use some binary + fused(eltwise + eltwise) combination
+    if(ctx.Attr<float>("beta"); == 1.0f) {
+        this->RunBaseKernel(ctx)
+    } else {
+        this->RunExtendedKernel(ctx)
+    }
+  }
+
+  void RunBaseKernel(const framework::ExecutionContext& ctx) const {
+    const auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    bool is_inplaced = x->IsSharedBufferWith(*out);
+
+    platform::ActivationMKLDNNHandler<T> handler(
+        mkldnn::algorithm::eltwise_soft_relu, ctx, mkldnn_engine, ctx.GetPlace(),
+        x);
+
+    auto src_memory_p = handler.AcquireSrcMemory(x);
+    auto dst_memory_p =
+        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+    auto activation_p = handler.AcquireForwardPrimitive();
+
+    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
+                                    {MKLDNN_ARG_TO, *dst_memory_p}});
+    astream.wait();
+
+    out->set_layout(framework::DataLayout::kMKLDNN);
+    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+  }
+
+  void RunExtendedKernel(const framework::ExecutionContext& ctx) const {
+    const auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    bool is_inplaced = x->IsSharedBufferWith(*out);
+
+    platform::ActivationMKLDNNHandler<T> handler(
+        mkldnn::algorithm::eltwise_soft_relu, ctx, mkldnn_engine, ctx.GetPlace(),
+        x);
+
+    auto src_memory_p = handler.AcquireSrcMemory(x);
+
+    const float beta = ctx.Attr<float>("beta");
+    auto beta_memory_p = handler.AcquireBetaMemory(&beta);
+    auto dst_memory_p =
+        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+    auto binary_p = handler.AcquireForwardPrimitive();
+
+    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+    activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
+                                    {MKLDNN_ARG_TO, *dst_memory_p}});
+    astream.wait();
+
+    out->set_layout(framework::DataLayout::kMKLDNN);
+    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+  }
+
+};
+
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_KERNEL(softplus, MKLDNN, paddle::platform::CPUPlace,
+                   ops::SoftplusMKLDNNKernel<float>);
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
new file mode 100644
index 00000000000000..4a21c649e79717
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
@@ -0,0 +1,51 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+
+def ref_softplus(x, beta, threshold):
+    x_beta = beta * x
+    out = np.select([x_beta <= threshold, x_beta > threshold],
+                    [np.log(1 + np.exp(x_beta)) / beta, x])
+    return out
+
+class TestSoftplusOneDNNOp(OpTest):
+    def setUp(self):
+        self.op_type = "softplus"
+        self.beta = 1
+        self.threshold = 20
+        self.config()
+        self.inputs = {'X': np.random.random(self.x_shape).astype(np.float32)}
+        self.outputs = {
+            'Out': ref_softplus(self.inputs['X'], self.beta, self.threshold)
+        }
+
+    def config(self):
+        self.x_shape = (10, 10)
+        self.attrs = {'use_mkldnn': True}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == "__main__":
+    paddle.enable_static()
+    unittest.main()

From dff0af970b238c2f3193239ed3ea6b829be25329 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 01:11:52 +0200
Subject: [PATCH 2/8] refactored softplus op

---
 .../operators/mkldnn/activation_mkldnn_op.cc  | 20 ++++++++++++++
 .../mkldnn/test_softplus_mkldnn_op.py         | 26 ++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index 603a70458b0ceb..96fbafe2d1a856 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -13,6 +13,7 @@
    limitations under the License. */
 
 #include "paddle/fluid/operators/activation_op.h"
+#include "paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 
 namespace paddle {
@@ -169,6 +170,20 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    const float beta = ctx.Attr<float>("beta");
+    // if beta is equal to 1.0f then we can simply use oneDNN's soft_relu but if
+    // it has other value, we have to fuse binary + eltwise + binary
+    if (beta == 1.0f) {
+      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_soft_relu);
+    } else {
+      custom_softplus_eltwise_forward<T>(ctx);
+    }
+  }
+};
+
 template <typename T>
 using ReluMKLDNNFunctor =
     MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;
@@ -272,3 +287,8 @@ REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(gelu, GeluMKLDNNFunctor,
                                        GeluMKLDNNGradFunctor);
 REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sigmoid, SigmoidMKLDNNFunctor,
                                        SigmoidMKLDNNGradFunctor);
+
+namespace ops = paddle::operators;
+REGISTER_OP_KERNEL(
+    softplus, MKLDNN, paddle::platform::CPUPlace,
+    ops::MKLDNNActivationKernel<ops::SoftplusMKLDNNFunctor<float>>);
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
index 4a21c649e79717..58434eaa2c4bef 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
@@ -21,18 +21,21 @@
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 
+
 def ref_softplus(x, beta, threshold):
     x_beta = beta * x
     out = np.select([x_beta <= threshold, x_beta > threshold],
                     [np.log(1 + np.exp(x_beta)) / beta, x])
     return out
 
+
 class TestSoftplusOneDNNOp(OpTest):
     def setUp(self):
         self.op_type = "softplus"
         self.beta = 1
         self.threshold = 20
         self.config()
+        self.attrs = {'use_mkldnn': True, 'beta': self.beta}
         self.inputs = {'X': np.random.random(self.x_shape).astype(np.float32)}
         self.outputs = {
             'Out': ref_softplus(self.inputs['X'], self.beta, self.threshold)
@@ -40,12 +43,33 @@ def setUp(self):
 
     def config(self):
         self.x_shape = (10, 10)
-        self.attrs = {'use_mkldnn': True}
 
     def test_check_output(self):
         self.check_output()
 
 
+class TestSoftplus4DOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (10, 5, 4, 2)
+
+
+class TestSoftplus6DOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (3, 2, 2, 5, 4, 2)
+
+
+class TestSoftplus6DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (3, 5, 2, 5, 4, 2)
+        self.beta = 2.5
+
+
+class TestSoftplus3DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (20, 4, 2)
+        self.beta = 0.4
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()

From 91216c551981f97db9e9fe6acdd3e31ea9583998 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 01:13:41 +0200
Subject: [PATCH 3/8] deleted unnecessary file

---
 .../operators/mkldnn/softplus_mkldnn_op.cc    | 134 ------------------
 1 file changed, 134 deletions(-)
 delete mode 100644 paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc

diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc
deleted file mode 100644
index e1c841a5b4d29e..00000000000000
--- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.cc
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/platform/mkldnn_reuse.h"
-
-namespace paddle {
-namespace operators {
-
-using paddle::framework::Tensor;
-
-template <typename T>
-class SoftplusMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
-                                   T, dnnl::binary> {
- public:
-  SoftplusMKLDNNHandler(const Tensor* x, const float beta,
-                         const mkldnn::engine engine, platform::Place cpu_place)
-      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(
-            engine, cpu_place) {
-
-    auto x_tz = framework::vectorize(x->dims());
-    auto x_md = dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
-
-    auto beta_tz = std::vector<int64_t>(x_tz.size(), 1);
-    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(), x->format());
-
-    dnnl::post_ops po;
-    po.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f, 0.0f);
-    binary_ops.append_binary(dnnl::algorithm::binary_div, variance_md);
-    dnnl::primitive_attr attrs;
-    attrs.set_post_ops(po);
-
-    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul, x_md, beta_md,
-                                            x_md);
-  }
-
-  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(
-      const float* beta) {
-    return this->AcquireMemoryFromPrimitive(fwd_pd_->src1_desc(),
-                                            to_void_cast<float>(beta));
-  }
-};
-
-
-template <typename T>
-class SoftplusMKLDNNKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    // if beta = 1 then we can simply use oneDNN soft_relu activation, in the other case, we need to use some binary + fused(eltwise + eltwise) combination
-    if(ctx.Attr<float>("beta"); == 1.0f) {
-        this->RunBaseKernel(ctx)
-    } else {
-        this->RunExtendedKernel(ctx)
-    }
-  }
-
-  void RunBaseKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-    auto* x = ctx.Input<Tensor>("X");
-    auto* out = ctx.Output<Tensor>("Out");
-
-    bool is_inplaced = x->IsSharedBufferWith(*out);
-
-    platform::ActivationMKLDNNHandler<T> handler(
-        mkldnn::algorithm::eltwise_soft_relu, ctx, mkldnn_engine, ctx.GetPlace(),
-        x);
-
-    auto src_memory_p = handler.AcquireSrcMemory(x);
-    auto dst_memory_p =
-        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
-    auto activation_p = handler.AcquireForwardPrimitive();
-
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
-    activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
-                                    {MKLDNN_ARG_TO, *dst_memory_p}});
-    astream.wait();
-
-    out->set_layout(framework::DataLayout::kMKLDNN);
-    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
-  }
-
-  void RunExtendedKernel(const framework::ExecutionContext& ctx) const {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-    auto* x = ctx.Input<Tensor>("X");
-    auto* out = ctx.Output<Tensor>("Out");
-
-    bool is_inplaced = x->IsSharedBufferWith(*out);
-
-    platform::ActivationMKLDNNHandler<T> handler(
-        mkldnn::algorithm::eltwise_soft_relu, ctx, mkldnn_engine, ctx.GetPlace(),
-        x);
-
-    auto src_memory_p = handler.AcquireSrcMemory(x);
-
-    const float beta = ctx.Attr<float>("beta");
-    auto beta_memory_p = handler.AcquireBetaMemory(&beta);
-    auto dst_memory_p =
-        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
-    auto binary_p = handler.AcquireForwardPrimitive();
-
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
-    activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
-                                    {MKLDNN_ARG_TO, *dst_memory_p}});
-    astream.wait();
-
-    out->set_layout(framework::DataLayout::kMKLDNN);
-    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
-  }
-
-};
-
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP_KERNEL(softplus, MKLDNN, paddle::platform::CPUPlace,
-                   ops::SoftplusMKLDNNKernel<float>);

From 980be69aa250f97b048700d695d9a8bd80adb411 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 01:14:20 +0200
Subject: [PATCH 4/8] added missing file

---
 .../operators/mkldnn/softplus_mkldnn_op.h     | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h

diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
new file mode 100644
index 00000000000000..c4eafac4bb7571
--- /dev/null
+++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using paddle::framework::Tensor;
+
+template <typename T>
+class SoftplusMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
+                                   T, dnnl::binary> {
+ public:
+  SoftplusMKLDNNHandler(const Tensor* x, const float beta,
+                         const mkldnn::engine engine, platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(
+            engine, cpu_place) {
+
+    auto x_tz = framework::vectorize(x->dims());
+    auto x_md = dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
+
+    auto beta_tz = std::vector<int64_t>(x_tz.size(), 1);
+    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(), x->format());
+
+    dnnl::post_ops post_ops;
+    post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f, 0.0f);
+    post_ops.append_binary(dnnl::algorithm::binary_div, beta_md);
+    dnnl::primitive_attr attrs;
+    attrs.set_post_ops(post_ops);
+
+    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul, x_md, beta_md,
+                                            x_md);
+  }
+
+  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(
+      const float* beta) {
+    return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src1_desc(),
+                                            platform::to_void_cast<float>(beta));
+  }
+};
+
+
+template <typename T>
+void custom_softplus_eltwise_forward(const framework::ExecutionContext &ctx) {
+    const auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    const auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    bool is_inplaced = x->IsSharedBufferWith(*out);
+
+    const float beta = ctx.Attr<float>("beta");
+
+    SoftplusMKLDNNHandler<T> handler(
+        x, beta, mkldnn_engine, ctx.GetPlace());
+
+    auto src_memory_p = handler.AcquireSrcMemory(x);
+
+    auto beta_memory_p = handler.AcquireBetaMemory(&beta);
+    auto dst_memory_p =
+        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+    auto binary_p = handler.AcquireForwardPrimitive();
+
+    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+
+    const std::unordered_map<int, dnnl::memory> args = {
+        {DNNL_ARG_SRC_0, *src_memory_p},
+        {DNNL_ARG_SRC_1, *beta_memory_p},
+        {DNNL_ARG_DST, *dst_memory_p},
+        {DNNL_ARG_ATTR_MULTIPLE_POST_OP(1) | DNNL_ARG_SRC_1, *beta_memory_p}};
+
+    binary_p->execute(astream, args);
+    astream.wait();
+
+    out->set_layout(framework::DataLayout::kMKLDNN);
+    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+}
+}  // namespace operators
+}  // namespace paddle

From 1a0dd1c5a3bbe86345d0f7cc0fb788130d4ffbc8 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 01:15:30 +0200
Subject: [PATCH 5/8] added formatting

---
 .../operators/mkldnn/softplus_mkldnn_op.h     | 80 +++++++++----------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
index c4eafac4bb7571..b74451d3538551 100644
--- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
+++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
@@ -20,74 +20,72 @@ namespace operators {
 using paddle::framework::Tensor;
 
 template <typename T>
-class SoftplusMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT<
-                                   T, dnnl::binary> {
+class SoftplusMKLDNNHandler
+    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
  public:
   SoftplusMKLDNNHandler(const Tensor* x, const float beta,
-                         const mkldnn::engine engine, platform::Place cpu_place)
-      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(
-            engine, cpu_place) {
-
+                        const mkldnn::engine engine, platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
     auto x_tz = framework::vectorize(x->dims());
-    auto x_md = dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
+    auto x_md =
+        dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
 
     auto beta_tz = std::vector<int64_t>(x_tz.size(), 1);
-    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(), x->format());
+    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(),
+                                      x->format());
 
     dnnl::post_ops post_ops;
-    post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f, 0.0f);
+    post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f,
+                            0.0f);
     post_ops.append_binary(dnnl::algorithm::binary_div, beta_md);
     dnnl::primitive_attr attrs;
     attrs.set_post_ops(post_ops);
 
-    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul, x_md, beta_md,
-                                            x_md);
+    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul,
+                                            x_md, beta_md, x_md);
   }
 
-  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(
-      const float* beta) {
-    return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src1_desc(),
-                                            platform::to_void_cast<float>(beta));
+  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(const float* beta) {
+    return this->AcquireMemoryFromPrimitive(
+        this->fwd_pd_->src1_desc(), platform::to_void_cast<float>(beta));
   }
 };
 
-
 template <typename T>
-void custom_softplus_eltwise_forward(const framework::ExecutionContext &ctx) {
-    const auto& dev_ctx =
-        ctx.template device_context<platform::MKLDNNDeviceContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) {
+  const auto& dev_ctx =
+      ctx.template device_context<platform::MKLDNNDeviceContext>();
+  const auto& mkldnn_engine = dev_ctx.GetEngine();
 
-    const auto* x = ctx.Input<Tensor>("X");
-    auto* out = ctx.Output<Tensor>("Out");
+  const auto* x = ctx.Input<Tensor>("X");
+  auto* out = ctx.Output<Tensor>("Out");
 
-    bool is_inplaced = x->IsSharedBufferWith(*out);
+  bool is_inplaced = x->IsSharedBufferWith(*out);
 
-    const float beta = ctx.Attr<float>("beta");
+  const float beta = ctx.Attr<float>("beta");
 
-    SoftplusMKLDNNHandler<T> handler(
-        x, beta, mkldnn_engine, ctx.GetPlace());
+  SoftplusMKLDNNHandler<T> handler(x, beta, mkldnn_engine, ctx.GetPlace());
 
-    auto src_memory_p = handler.AcquireSrcMemory(x);
+  auto src_memory_p = handler.AcquireSrcMemory(x);
 
-    auto beta_memory_p = handler.AcquireBetaMemory(&beta);
-    auto dst_memory_p =
-        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
-    auto binary_p = handler.AcquireForwardPrimitive();
+  auto beta_memory_p = handler.AcquireBetaMemory(&beta);
+  auto dst_memory_p =
+      is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+  auto binary_p = handler.AcquireForwardPrimitive();
 
-    auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+  auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
 
-    const std::unordered_map<int, dnnl::memory> args = {
-        {DNNL_ARG_SRC_0, *src_memory_p},
-        {DNNL_ARG_SRC_1, *beta_memory_p},
-        {DNNL_ARG_DST, *dst_memory_p},
-        {DNNL_ARG_ATTR_MULTIPLE_POST_OP(1) | DNNL_ARG_SRC_1, *beta_memory_p}};
+  const std::unordered_map<int, dnnl::memory> args = {
+      {DNNL_ARG_SRC_0, *src_memory_p},
+      {DNNL_ARG_SRC_1, *beta_memory_p},
+      {DNNL_ARG_DST, *dst_memory_p},
+      {DNNL_ARG_ATTR_MULTIPLE_POST_OP(1) | DNNL_ARG_SRC_1, *beta_memory_p}};
 
-    binary_p->execute(astream, args);
-    astream.wait();
+  binary_p->execute(astream, args);
+  astream.wait();
 
-    out->set_layout(framework::DataLayout::kMKLDNN);
-    out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+  out->set_layout(framework::DataLayout::kMKLDNN);
+  out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
 }
 }  // namespace operators
 }  // namespace paddle

From 90efe13909632a4d1813e09cfae9c7a5419399f0 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 02:26:15 +0200
Subject: [PATCH 6/8] disabled tests if GPU is used

---
 .../fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
index 58434eaa2c4bef..94fac200390f0d 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
@@ -29,6 +29,8 @@ def ref_softplus(x, beta, threshold):
     return out
 
 
+@OpTestTool.skip_if(not (isinstance(_current_expected_place(), core.CPUPlace)),
+                    "GPU is not supported")
 class TestSoftplusOneDNNOp(OpTest):
     def setUp(self):
         self.op_type = "softplus"

From 0377970ed8d8f9663d982997aec9f82f5553c995 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 12:11:32 +0200
Subject: [PATCH 7/8] added reviewer suggestion

---
 .../fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
index 94fac200390f0d..92699cdbd27092 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
@@ -16,10 +16,11 @@
 
 import unittest
 import numpy as np
-from paddle.fluid.tests.unittests.op_test import OpTest
+from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+from paddle.fluid.framework import _current_expected_place
 
 
 def ref_softplus(x, beta, threshold):

From be4e56c41dbc10171fb272dca1df4b854cb31fe5 Mon Sep 17 00:00:00 2001
From: jakpiase <jakpia21@gmail.com>
Date: Wed, 13 Oct 2021 16:56:05 +0200
Subject: [PATCH 8/8] unified softplus kernel

---
 paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc | 9 +--------
 paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h    | 9 ++++++---
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index 96fbafe2d1a856..29106dc30498e8 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -173,14 +173,7 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
 template <typename T>
 struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
   void operator()(const framework::ExecutionContext &ctx) const {
-    const float beta = ctx.Attr<float>("beta");
-    // if beta is equal to 1.0f then we can simply use oneDNN's soft_relu but if
-    // it has other value, we have to fuse binary + eltwise + binary
-    if (beta == 1.0f) {
-      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_soft_relu);
-    } else {
-      custom_softplus_eltwise_forward<T>(ctx);
-    }
+    custom_softplus_eltwise_forward<T>(ctx);
   }
 };
 
diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
index b74451d3538551..fdb2c534e03634 100644
--- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
+++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
@@ -37,7 +37,11 @@ class SoftplusMKLDNNHandler
     dnnl::post_ops post_ops;
     post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f,
                             0.0f);
-    post_ops.append_binary(dnnl::algorithm::binary_div, beta_md);
+    if (beta != 1.0f) {
+      post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_linear,
+                              1.0f / beta, 0.0f);
+    }
+
     dnnl::primitive_attr attrs;
     attrs.set_post_ops(post_ops);
 
@@ -78,8 +82,7 @@ void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) {
   const std::unordered_map<int, dnnl::memory> args = {
       {DNNL_ARG_SRC_0, *src_memory_p},
       {DNNL_ARG_SRC_1, *beta_memory_p},
-      {DNNL_ARG_DST, *dst_memory_p},
-      {DNNL_ARG_ATTR_MULTIPLE_POST_OP(1) | DNNL_ARG_SRC_1, *beta_memory_p}};
+      {DNNL_ARG_DST, *dst_memory_p}};
 
   binary_p->execute(astream, args);
   astream.wait();