Skip to content

Commit

Permalink
[Cherry-pick]to Release/2.3, Improve MSRAInitializer (#43721)
Browse files Browse the repository at this point in the history
* fix conflict

* improve the doc
  • Loading branch information
Jackwaterveg authored Jun 22, 2022
1 parent 4dcfc6d commit 1aafc31
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 44 deletions.
77 changes: 45 additions & 32 deletions python/paddle/fluid/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,20 +679,23 @@ class MSRAInitializer(Initializer):
.. math::
x = \sqrt{\\frac{6.0}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
\frac{gain}{\sqrt{{fan\_in}}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
seed (int32): random seed
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
Expand All @@ -709,7 +712,12 @@ class MSRAInitializer(Initializer):
"""

def __init__(self, uniform=True, fan_in=None, seed=0):
def __init__(self,
uniform=True,
fan_in=None,
seed=0,
negative_slope=0,
nonlinearity='relu'):
"""Constructor for MSRAInitializer
"""
assert uniform is not None
Expand All @@ -718,6 +726,8 @@ def __init__(self, uniform=True, fan_in=None, seed=0):
self._uniform = uniform
self._fan_in = fan_in
self._seed = seed
self._negative_slope = negative_slope
self._nonlinearity = nonlinearity

def __call__(self, var, block=None):
"""Initialize the input tensor with MSRA initialization.
Expand Down Expand Up @@ -759,13 +769,16 @@ def __call__(self, var, block=None):

if framework._non_static_mode():
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in))
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))

out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
-limit, 'max', limit, 'seed',
self._seed, 'dtype',
int(out_dtype))
else:
std = math.sqrt(2.0 / float(fan_in))
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
if in_dygraph_mode():
place = _current_expected_place()
out_var = _C_ops.final_state_gaussian_random(
Expand All @@ -786,33 +799,33 @@ def __call__(self, var, block=None):
return None
else:
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in))
op = block.append_op(
type="uniform_random",
inputs={},
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"min": -limit,
"max": limit,
"seed": self._seed
},
stop_gradient=True)
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
op = block.append_op(type="uniform_random",
inputs={},
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"min": -limit,
"max": limit,
"seed": self._seed
},
stop_gradient=True)

else:
std = np.sqrt(2.0 / float(fan_in))
op = block.append_op(
type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
op = block.append_op(type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)

if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
Expand Down
36 changes: 24 additions & 12 deletions python/paddle/nn/initializer/kaiming.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
.. math::
\sqrt{\frac{2.0}{fan\_in}}
\frac{gain}{\sqrt{{fan\_in}}}
Args:
fan_in (float32|None): fan_in for Kaiming normal Initializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
Expand All @@ -56,9 +59,12 @@ class KaimingNormal(MSRAInitializer):
"""

def __init__(self, fan_in=None):
super(KaimingNormal, self).__init__(
uniform=False, fan_in=fan_in, seed=0)
def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingNormal, self).__init__(uniform=False,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)


class KaimingUniform(MSRAInitializer):
Expand All @@ -75,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
.. math::
x = \sqrt{\frac{6.0}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_in}}
Args:
fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
Expand All @@ -98,6 +107,9 @@ class KaimingUniform(MSRAInitializer):
"""

def __init__(self, fan_in=None):
super(KaimingUniform, self).__init__(
uniform=True, fan_in=fan_in, seed=0)
def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingUniform, self).__init__(uniform=True,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)

0 comments on commit 1aafc31

Please sign in to comment.