Skip to content

Commit

Permalink
Added sigmoid BF16 FWD/BWD kernels and gelu BF16 BWD kernel (PaddlePa…
Browse files Browse the repository at this point in the history
…ddle#34216)

* added sigmoid BF16 FWD/BWD and gelu BF16 BWD

* added newline at EOF

* switched from lambdas to local functions

* changed function names
  • Loading branch information
jakpiase authored Jul 22, 2021
1 parent b0a2f00 commit 5d3c89c
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 29 deletions.
7 changes: 5 additions & 2 deletions paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,19 +251,22 @@ namespace ops = paddle::operators;
ops::MKLDNNActivationKernel<ops::functor<paddle::platform::bfloat16>>); \
REGISTER_OP_KERNEL( \
act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \
ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>);
ops::MKLDNNActivationGradKernel<ops::grad_functor<float>>, \
ops::MKLDNNActivationGradKernel< \
ops::grad_functor<paddle::platform::bfloat16>>);

#define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \
__macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
__macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor); \
__macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
__macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \
__macro(hardswish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \
__macro(sigmoid, SigmoidMKLDNNFunctor, SigmoidMKLDNNGradFunctor); \
__macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \
__macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \
__macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor);

FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(gelu, GeluMKLDNNFunctor,
GeluMKLDNNGradFunctor);
REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sigmoid, SigmoidMKLDNNFunctor,
SigmoidMKLDNNGradFunctor);
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@

import unittest
import numpy as np
from scipy.special import expit
from scipy.special import expit, erf
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_float_to_uint16
from paddle.fluid.tests.unittests.test_activation_op import TestActivation, TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu, TestSwish, TestHardSwish, TestRelu6, TestSigmoid
from paddle.fluid.tests.unittests.test_gelu_op import gelu
from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
Expand Down Expand Up @@ -79,46 +79,88 @@ def setUp(self):
self.attrs = {"use_mkldnn": True, "approximate": True}


@unittest.skipIf(not core.supports_bfloat16(),
"place does not support BF16 evaluation")
class TestMKLDNNGeluBf16Dim2(TestActivation):
#Use it as a base class for BF16 activation tests, just override necessary functions
class TestMKLDNNSigmoidBF16Op(TestActivation):
@OpTestTool.skip_if_not_cpu_bf16()
def config(self):
self.op_type = "sigmoid"

def op_forward(self, x):
return 1 / (1 + np.exp(-x))

def op_grad(self, dout, x):
return dout * self.op_forward(x) * (1 - self.op_forward(x))

def set_attrs(self):
self.attrs = {"use_mkldnn": True}

def init_data(self):
self.x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype(np.float32)

def setUp(self):
self.op_type = "gelu"
self.dtype = np.uint16
self.init_data()
self.config()
self.out = self.op_forward(self.x)

x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)
out = convert_float_to_uint16(gelu(x, False))
self.inputs = {'X': convert_float_to_uint16(self.x)}
self.outputs = {'Out': self.out}
self.set_attrs()

self.inputs = {'X': convert_float_to_uint16(x)}
self.outputs = {'Out': out}
self.attrs = {"use_mkldnn": True}
def calculate_grads(self):
self.dx = self.op_grad(self.out, self.x)

def test_check_output(self):
self.check_output_with_place(core.CPUPlace())

def test_check_grad(self):
pass
self.calculate_grads()
self.check_grad_with_place(
core.CPUPlace(), ["X"],
"Out",
user_defined_grads=[self.dx],
user_defined_grad_outputs=[convert_float_to_uint16(self.out)])


@unittest.skipIf(not core.supports_bfloat16(),
"place does not support BF16 evaluation")
class TestMKLDNNGeluBf16Dim2Approx(TestActivation):
def setUp(self):
class TestMKLDNNGeluErfBF16Op(TestMKLDNNSigmoidBF16Op):
def config(self):
self.op_type = "gelu"
self.dtype = np.uint16

x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)
out = convert_float_to_uint16(gelu(x, True))
def op_forward(self, x):
return gelu(x, False)

self.inputs = {'X': convert_float_to_uint16(x)}
self.outputs = {'Out': out}
def op_grad(self, dout, x):
return (dout *
(0.5 + 0.5 * erf(x / np.sqrt(2)) +
(x / np.sqrt(2 * np.pi) * np.exp(-0.5 * np.power(x, 2)))))


class TestMKLDNNGeluErfDim2BF16Op(TestMKLDNNGeluErfBF16Op):
def init_data(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)


class TestMKLDNNGeluTanhBF16Op(TestMKLDNNSigmoidBF16Op):
def config(self):
self.op_type = "gelu"

def op_forward(self, x):
return gelu(x, True)

def op_grad(self, dout, x):
grad_part = np.tanh(
np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))
return dout * 0.5 * (1 + grad_part) * (1 + np.sqrt(2 / np.pi) *
(x + 0.134145 * np.power(x, 3)) *
(1 - grad_part))

def set_attrs(self):
self.attrs = {"use_mkldnn": True, "approximate": True}

def test_check_output(self):
self.check_output_with_place(core.CPUPlace())

def test_check_grad(self):
pass
class TestMKLDNNGeluTanhDim2BF16Op(TestMKLDNNGeluTanhBF16Op):
def init_data(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(np.float32)


class TestMKLDNNTanhDim2(TestTanh):
Expand Down
3 changes: 1 addition & 2 deletions python/paddle/fluid/tests/unittests/test_activation_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np
from scipy.special import expit, erf

from op_test import OpTest, convert_float_to_uint16
from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
Expand Down Expand Up @@ -1619,7 +1619,6 @@ def setUp(self):
self.op_type = 'hard_swish'
self.init_dtype()

from op_test import skip_check_grad_ci
skip_check_grad_ci(reason="not implemented yet")

np.random.seed(1024)
Expand Down

0 comments on commit 5d3c89c

Please sign in to comment.