diff --git a/paddle/fluid/operators/prim_ops/CMakeLists.txt b/paddle/fluid/operators/prim_ops/CMakeLists.txt index 34290303dfb73d..f63d43a9314b44 100644 --- a/paddle/fluid/operators/prim_ops/CMakeLists.txt +++ b/paddle/fluid/operators/prim_ops/CMakeLists.txt @@ -39,7 +39,8 @@ set(PRIM_OP_SRCS bernoulli_p_op.cc abs_p_op.cc cast_p_op.cc - rsqrt_p_op.cc) + rsqrt_p_op.cc + uniform_random_p_op.cc) cc_test( prim_op_test diff --git a/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc b/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc new file mode 100644 index 00000000000000..e024e62c4535c1 --- /dev/null +++ b/paddle/fluid/operators/prim_ops/uniform_random_p_op.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class VarDesc; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace operators { +class UniformRandomPrimOp : public framework::OperatorBase { + public: + UniformRandomPrimOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : framework::OperatorBase(type, inputs, outputs, attrs) {} + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + PADDLE_THROW(platform::errors::Unimplemented( + "Prim operator uniform_randrom_p should not be excuted directly")); + } +}; + +class UniformRandomPrimOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddOutput("Out", "(Tensor), The output tensor of uniform_random_p op."); + AddAttr>("shape", "The shape of the output tensor") + .SetDefault({}); + AddAttr("min", "Minimum value of uniform_random_p. [default -1.0]."); + AddAttr("max", "Maximun value of uniform_random_p. [default 1.0]."); + AddAttr("seed", + "Random seed used for generating samples. " + "0 means use a seed generated by the system." + "Note that if seed is not 0, this operator will always " + "generate the same random numbers every time. "); + AddAttr("dtype", "Output tensor data type. "); + AddComment(R"DOC( +Autograd primitive uniform_random_p operator. +)DOC"); + } +}; + +class UniformRandomPrimOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + framework::InferShapeVarPtr y_var_ptr = ctx->GetOutputVarPtrs("Out")[0]; + auto shape = ctx->Attrs().Get>("shape"); + PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(shape); + } +}; + +class UniformRandomPrimOpVarTypeInference + : public framework::StaticGraphVarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override { + auto y_name = Output(ctx, "Out")[0]; + auto data_type = static_cast( + PADDLE_GET_CONST(int, ctx->GetAttr("dtype"))); + SetDataType(ctx, y_name, data_type); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(uniform_random_p, + paddle::operators::UniformRandomPrimOp, + paddle::operators::UniformRandomPrimOpMaker, + paddle::operators::UniformRandomPrimOpShapeInference, + paddle::operators::UniformRandomPrimOpVarTypeInference); diff --git a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py index 4a311fef1dc6db..914ea38fa9ad79 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_orig2prim.py @@ -1083,5 +1083,44 @@ def init_data(self): self.out_map = {0: self.output['Out']} +class TestUniformRandomOrig2Prim(TestElementWiseAddOrig2Prim): + + def init_data(self): + self.op_type = 'uniform_random' + self.input = {} + self.output = { + 'Out': + self.layer_help.create_variable_for_type_inference( + dtype=paddle.float32) + } + self.attrs = {'shape': [1, 2]} + + self.orig2prim_args = (None, None) + self.all_ops = ['uniform_random', 'uniform_random_p'] + self.out_map = {0: self.output['Out']} + + +class TestSigmoidOrig2Prim(TestElementWiseAddOrig2Prim): + + def init_data(self): + self.op_type = 'sigmoid' + X = paddle.static.data(name='X', shape=[3], dtype='float32') + + self.attrs = {} + self.input = {'X': X} + self.output = { + 'Out': + self.layer_help.create_variable_for_type_inference( + dtype=paddle.float32) + } + + self.orig2prim_args = (X, ) + self.all_ops = [ + 'sigmoid', 'div_p', 'fill_constant_p', 'add_p', 'fill_constant_p', + 'exp_p', 'fill_constant_p', 'sub_p' + ] + self.out_map = {0: self.output['Out']} + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py index add39fd472fc04..13423690ed8699 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_prim2orig.py @@ -728,5 +728,29 @@ def init_data(self): self.out_map = {self.output['Y']: 0} +class TestUniformRandomPrim2Orig(TestAddPPrim2Orig): + + def init_data(self): + self.op_type = 'uniform_random_p' + + self.input = {} + self.output = { + 'Out': + self.layer_help.create_variable_for_type_inference( + dtype=paddle.float64) + } + self.attrs = { + 'shape': [1, 2, 3], + 'min': -1.0, + 'max': 1.0, + 'seed': 0, + 'dtype': paddle.float64 + } + + self.prim2orig_args = () + self.all_ops = ['uniform_random_p', 'uniform_random'] + self.out_map = {self.output['Out']: 0} + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/autograd/test_primapi.py b/python/paddle/fluid/tests/unittests/autograd/test_primapi.py index 9e7dbae5bbddcb..2451ed4190c9e1 100644 --- a/python/paddle/fluid/tests/unittests/autograd/test_primapi.py +++ b/python/paddle/fluid/tests/unittests/autograd/test_primapi.py @@ -23,6 +23,66 @@ import autograd.scipy as ascipy import config import utils +from paddle.incubate.autograd import primx + + +@utils.place(config.DEVICES) +@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'dtype'), ( + ('uniform_random', + lambda: paddle.uniform([1, 2, 3], dtype='float32', min=0, max=1.0, seed=1), + (), 'int32'), ('sigmoid', paddle.nn.functional.sigmoid, + (np.random.rand(5, ), ), 'float32'))) +class TestFowardApi(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs) + + def setUp(self): + paddle.enable_static() + paddle.incubate.autograd.enable_prim() + + def tearDown(self): + paddle.incubate.autograd.disable_prim() + paddle.disable_static() + + def test_grad(self): + + def expected(): + paddle.incubate.autograd.disable_prim() + sp = paddle.static.Program() + mp = paddle.static.Program() + with paddle.static.program_guard(mp, sp): + feed, static_xs = utils.gen_static_inputs_and_feed( + self.xs, stop_gradient=False) + out = self.fun(*static_xs) + exe = paddle.static.Executor() + exe.run(sp) + out = exe.run(mp, feed=feed, fetch_list=out) + paddle.incubate.autograd.enable_prim() + return out + + def actual(): + paddle.incubate.autograd.enable_prim() + sp = paddle.static.Program() + mp = paddle.static.Program() + with paddle.static.program_guard(mp, sp): + feed, static_xs = utils.gen_static_inputs_and_feed( + self.xs, stop_gradient=False) + out = self.fun(*static_xs) + primx.orig2prim(mp.block(0)) + primx.prim2orig(mp.block(0)) + exe = paddle.static.Executor() + exe.run(sp) + out = exe.run(mp, feed=feed, fetch_list=out) + paddle.incubate.autograd.disable_prim() + return out + + expected = expected() + actual = actual() + self.assertEqual(type(actual), type(expected)) + for i, j in zip(actual, expected): + np.testing.assert_allclose(i, j, atol=1e-3, rtol=1e-3) @utils.place(config.DEVICES) @@ -85,7 +145,7 @@ def actual(): actual = actual() self.assertEqual(type(actual), type(expected)) for i, j in zip(actual, expected): - np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-3) + np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-1) @utils.place(config.DEVICES) @@ -200,23 +260,25 @@ def without_program_guard(): @utils.place(config.DEVICES) -@utils.parameterize((utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'), ( - ('matmul', paddle.matmul, - (np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'), - ('multiply', paddle.multiply, - (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'), - ('add', paddle.add, - (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'), - ('input_not_sequence', paddle.tanh, - (np.random.rand(5, 5), ), None, 'float64'), - ('input_gradients_not_none', paddle.matmul, - (np.random.rand(3, 3), np.random.rand(3, 3)), - (np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'), - ('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'), - ('abs', paddle.abs, (np.random.uniform(-10, 10, - (10, 10)), ), None, 'float32'), - ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'), -)) +@utils.parameterize( + (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'), + (('matmul', paddle.matmul, + (np.random.rand(2, 3), np.random.rand(3, 2)), None, 'float32'), + ('multiply', paddle.multiply, + (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float64'), + ('add', paddle.add, + (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'), + ('input_not_sequence', paddle.tanh, + (np.random.rand(5, 5), ), None, 'float64'), + ('input_gradients_not_none', paddle.matmul, + (np.random.rand(3, 3), np.random.rand(3, 3)), + (np.random.rand(3, 3), np.random.rand(3, 3)), 'float64'), + ('log', paddle.log, (np.random.rand(3, 4), ), None, 'float32'), + ('abs', paddle.abs, (np.random.uniform(-10, 10, + (10, 10)), ), None, 'float32'), + ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200), ), None, 'float32'), + ('sigmoid', paddle.nn.functional.sigmoid, + (np.random.rand(5, ), ), None, 'float32'))) # paddle.where, paddle.pow, paddle.maximum has no double grad definition, # can not compute forward grad use double trick class TestForwardGrad(unittest.TestCase): @@ -353,6 +415,8 @@ def test_illegal_param(self): ('gelu_approximate', lambda x: paddle.nn.functional.gelu(x, True), (np.random.rand(200, 189), ), None, 'float32'), ('sum', paddle.sum, (np.random.rand(200, 345), ), None, 'float32'), + ('sigmoid', paddle.nn.functional.sigmoid, + (np.random.rand(5, ), ), None, 'float32'), ('sum_with_axis', lambda x: paddle.sum(x, axis=1), (np.random.rand(200, 345), ), None, 'float32'), ('sum_with_keepdim', lambda x: paddle.sum(x, keepdim=True), @@ -538,6 +602,7 @@ def multiply_pd(x): pow_ag = lambda xs: xs[0]**xs[1] log_ag = lambda xs: anp.log(xs[0]) erf_ag = lambda xs: ascipy.special.erf(xs[0]) +sigmoid_ag = lambda xs: 1.0 / (1 + anp.exp(-xs[0])) def gelu_ag(x, approximate=False): @@ -551,22 +616,26 @@ def gelu_ag(x, approximate=False): @utils.place(config.DEVICES) @utils.parameterize( - (utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'), - (('multiply', multiply_pd, multiply_ag, - (np.random.rand(3, 5), ), None, 'float32'), - ('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'), - ('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'), - ('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'), - ('pow', paddle.pow, pow_ag, - (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'), - ('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'), - ('erf', paddle.erf, erf_ag, (np.random.rand(100, 200), ), None, 'float32'), - ('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]), - (np.random.rand(10, 20, 30), ), None, 'float32'), - ('gelu_approximate', - lambda x: paddle.nn.functional.gelu(x, approximate=True), - lambda xs: gelu_ag(xs[0], approximate=True), - (np.random.rand(10, 20, 30), ), None, 'float32'))) + (utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'), ( + ('multiply', multiply_pd, multiply_ag, + (np.random.rand(3, 5), ), None, 'float32'), + ('sin', paddle.sin, sin_ag, (np.random.rand(2, 3), ), None, 'float32'), + ('cos', paddle.cos, cos_ag, (np.random.rand(3, 4), ), None, 'float32'), + ('exp', paddle.exp, exp_ag, (np.random.rand(2, 3), ), None, 'float32'), + ('pow', paddle.pow, pow_ag, + (np.random.rand(2, 3), np.random.rand(2, 3)), None, 'float32'), + ('log', paddle.log, log_ag, (np.random.rand(3, 8), ), None, 'float32'), + ('erf', paddle.erf, erf_ag, + (np.random.rand(100, 200), ), None, 'float32'), + ('gelu', paddle.nn.functional.gelu, lambda xs: gelu_ag(xs[0]), + (np.random.rand(10, 20, 30), ), None, 'float32'), + ('gelu_approximate', + lambda x: paddle.nn.functional.gelu(x, approximate=True), + lambda xs: gelu_ag(xs[0], approximate=True), + (np.random.rand(10, 20, 30), ), None, 'float32'), + ('sigmoid', paddle.nn.functional.sigmoid, sigmoid_ag, + (np.random.rand(10, 20), ), None, 'float32'), + )) class TestGradWithHigherOrder(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/autograd/utils.py b/python/paddle/fluid/tests/unittests/autograd/utils.py index e7bde31e5415ce..bbbce4eb22f5b9 100644 --- a/python/paddle/fluid/tests/unittests/autograd/utils.py +++ b/python/paddle/fluid/tests/unittests/autograd/utils.py @@ -420,3 +420,19 @@ def gen_static_data_and_feed(xs, v, stop_gradient=True): static_v = v return feed, static_xs, static_v + + +def gen_static_inputs_and_feed(xs, stop_gradient=True): + feed = {} + if isinstance(xs, typing.Sequence): + static_xs = [] + for i, x in enumerate(xs): + x = paddle.static.data(f"x{i}", x.shape, x.dtype) + x.stop_gradient = stop_gradient + static_xs.append(x) + feed.update({f'x{idx}': value for idx, value in enumerate(xs)}) + else: + static_xs = paddle.static.data('x', xs.shape, xs.dtype) + static_xs.stop_gradient = stop_gradient + feed.update({'x': xs}) + return feed, static_xs diff --git a/python/paddle/incubate/autograd/primops.py b/python/paddle/incubate/autograd/primops.py index 454a99b764acd6..502c10783e887d 100644 --- a/python/paddle/incubate/autograd/primops.py +++ b/python/paddle/incubate/autograd/primops.py @@ -502,3 +502,19 @@ def cast(x, dtype, out=None): @REGISTER_FN('rsqrt_p', 'X', 'Y') def rsqrt(x, out=None): return _simple_unop(LayerHelper('rsqrt_p', **locals())) + + +@REGISTER_FN('uniform_random_p', 'Out') +def uniform_random(dtype, min_value, max_value, seed, shape=None, out=None): + attrs = { + 'shape': shape, + 'dtype': dtype, + 'min': min_value, + 'max': max_value, + 'seed': seed + } + helper = LayerHelper('uniform_random_p', **locals()) + if out is None: + out = helper.create_variable_for_type_inference(dtype) + helper.append_op(type=helper.layer_type, outputs={'Out': out}, attrs=attrs) + return out diff --git a/python/paddle/incubate/autograd/primrules.py b/python/paddle/incubate/autograd/primrules.py index 601ed6b0a950b5..8a2f94145cd06f 100644 --- a/python/paddle/incubate/autograd/primrules.py +++ b/python/paddle/incubate/autograd/primrules.py @@ -23,14 +23,12 @@ fill_const, gather, ge, gt, log, matmul, mul, ne, neg, reduce_sum, reshape, scatter_add, select, set_value, sin, slice_assign, slice_select, split, sqrt, sub, tanh, - transpose, bernoulli, rsqrt) + transpose, bernoulli, rsqrt, uniform_random) from .primreg import (REGISTER_JVP, REGISTER_ORIG2PRIM, REGISTER_PRIM2ORIG, REGISTER_TRANSPOSE, lookup_fn, lookup_jvp, lookup_orig2prim, lookup_prim2orig, lookup_transpose, op_position_inputs, op_position_output) from .utils import INT_DTYPE_2_STRING, get_output_var_list -from paddle.fluid.data_feeder import convert_dtype -from paddle.fluid.framework import convert_np_dtype_to_dtype_ def _orig2prim(op, *args): @@ -79,6 +77,7 @@ def linear_jvp(op, *args, **kwargs): equal elementwise_pow dropout +uniform_random These original ops are partially supported: @@ -212,8 +211,7 @@ def fill_any_like_orig2prim(op, x): return fill_const(value=op.attr('value'), shape=x.shape, dtype=x.dtype) return fill_const(value=op.attr('value'), shape=x.shape, - dtype=convert_np_dtype_to_dtype_( - convert_dtype(INT_DTYPE_2_STRING[op.attr('dtype')]))) + dtype=paddle.dtype(op.attr('dtype'))) @REGISTER_ORIG2PRIM('fill_constant') @@ -327,6 +325,13 @@ def slice_orig2prim(op, ends_t, ends_tl, x, starts_t, starts_tl): return y +@REGISTER_ORIG2PRIM('sigmoid') +def sigmoid_orig2prim(op, x): + return div( + fill_const(value=1.0, shape=x.shape, dtype=x.dtype), + (add(fill_const(value=1.0, shape=x.shape, dtype=x.dtype), exp(neg(x))))) + + @REGISTER_ORIG2PRIM('p_norm') def p_norm_orig2prim(op, x): @@ -464,6 +469,20 @@ def dropout_orig2prim(op, seed_t, x): ) +@REGISTER_ORIG2PRIM('uniform_random') +def uniform_random_orig2prim(op, shape_t, shape_tl): + if shape_t or shape_tl: + raise TypeError( + 'uniform_random_orig2prim currently not support ShapeTensor input or ShapeTensorList input.' + ) + min_value = op.attr('min') + max_value = op.attr('max') + seed = op.attr('seed') + dtype = paddle.dtype(op.attr('dtype')) + shape = op.attr('shape') + return uniform_random(dtype, min_value, max_value, seed, shape=shape) + + @REGISTER_ORIG2PRIM('reduce_sum') def reduce_sum_orig2prim(op, x): axes = tuple(range(0, len( @@ -667,6 +686,15 @@ def bernoulli_prim2orig(op): return paddle.bernoulli(t) +@REGISTER_PRIM2ORIG('uniform_random_p') +def uniform_random_prim2orig(op): + return paddle.uniform(shape=op.attr('shape'), + dtype=INT_DTYPE_2_STRING[op.attr('dtype')], + min=op.attr('min'), + max=op.attr('max'), + seed=op.attr('seed')) + + @REGISTER_PRIM2ORIG('select_p') def select_prim2orig(op, condition, x, y): return paddle.where(condition, x, y)