diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 5e39b764fa96d..241fafbf9cdf5 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -432,6 +432,7 @@ infer_meta : func : UnchangedInferMeta param : [x] + spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : cos_grad backward : cos_double_grad @@ -708,6 +709,7 @@ infer_meta : func : UnchangedInferMeta param : [out] + spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : exp_grad inplace : (out_grad -> x_grad) @@ -1907,6 +1909,7 @@ infer_meta : func : UnchangedInferMeta param : [out] + spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : rsqrt_grad backward : rsqrt_double_grad @@ -2062,6 +2065,7 @@ infer_meta : func : UnchangedInferMeta param : [x] + spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : silu_grad backward : silu_double_grad @@ -2088,6 +2092,7 @@ infer_meta : func : UnchangedInferMeta param : [x] + spmd_rule : ElementwiseUnaryGradInferSpmd kernel : func : sin_grad backward : sin_double_grad diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 0cb62f2a84c76..e5e3f9fb86c53 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -193,6 +193,7 @@ infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd kernel : func : divide_grad composite : divide_grad(x, y, out, out_grad, axis, x_grad, y_grad) @@ -226,6 +227,7 @@ infer_meta : func : GeneralBinaryGradInferMeta param: [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd composite : elementwise_pow_grad(x, y, out_grad, x_grad, y_grad) kernel : func : elementwise_pow_grad @@ -440,6 +442,7 @@ infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] + spmd_rule : ElementwiseBinaryGradInferSpmd kernel : func : multiply_grad composite: multiply_grad(x, y, out_grad, axis, x_grad, y_grad) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index f43ae357df3e8..d37377890c9a9 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -317,6 +317,7 @@ output : Tensor(out) infer_meta : func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd kernel : func : divide inplace: (x -> out) @@ -348,6 +349,7 @@ output : Tensor(out) infer_meta : func : ElementwiseInferMeta + spmd_rule: ElementwiseBinaryInferSpmd kernel : func : elementwise_pow backward : elementwise_pow_grad @@ -415,6 +417,7 @@ output : Tensor(out) infer_meta : func : CompareInferMeta + spmd_rule: ElementwiseBinaryInferSpmd kernel : func : equal inplace: (x -> out) @@ -783,6 +786,7 @@ output : Tensor infer_meta : func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd kernel : func : multiply {dense, dense -> dense}, multiply_sr {selected_rows, dense -> selected_rows} @@ -803,6 +807,7 @@ output : Tensor(out) infer_meta : func : CompareInferMeta + spmd_rule : ElementwiseBinaryInferSpmd kernel : func : not_equal inplace: (x -> out) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 97d4f3dc0d17e..e6b11884f74eb 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -327,6 +327,7 @@ output : Tensor(out) infer_meta : func : ElementwiseInferMeta + spmd_rule : ElementwiseBinaryInferSpmd kernel : func : bitwise_and backend : x @@ -337,6 +338,7 @@ output : Tensor(out) infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : bitwise_not backend : x @@ -550,6 +552,7 @@ output : Tensor(out) infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : cos inplace: (x -> out) @@ -819,6 +822,7 @@ output : Tensor(out) infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : exp inplace : (x -> out) @@ -2180,6 +2184,7 @@ output : Tensor(out) infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : rsqrt inplace : (x -> out) @@ -2360,6 +2365,7 @@ output : Tensor infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : silu backward : silu_grad @@ -2369,6 +2375,7 @@ output : Tensor(out) infer_meta : func : UnchangedInferMeta + spmd_rule : ElementwiseUnaryInferSpmd kernel : func : sin inplace: (x -> out) diff --git a/paddle/phi/core/distributed/auto_parallel/reshard_utils.h b/paddle/phi/core/distributed/auto_parallel/reshard_utils.h index f4a4cd68ce5e1..0ee6a513c0e48 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard_utils.h +++ b/paddle/phi/core/distributed/auto_parallel/reshard_utils.h @@ -76,14 +76,14 @@ CommContext* CreateOrGetCommContext(const DeviceContext& dev_ctx, do { \ if (phi::CPUContext::classof(dev_ctx)) { \ VLOG(4) << "Call `" << #fn_name << "` in Resharding on GPU."; \ - PD_VISIT_FLOATING_AND_INTEGRAL_TYPES( \ + PD_VISIT_BOOL_AND_FLOATING_AND_INTEGRAL_TYPES( \ dtype, #fn_name, ([&] { \ fn_name(static_cast(*dev_ctx), \ __VA_ARGS__); \ })); \ } else if (phi::GPUContext::classof(dev_ctx)) { \ VLOG(4) << "Call `" << #fn_name << "` in Resharding on CPU."; \ - PD_VISIT_FLOATING_AND_INTEGRAL_TYPES( \ + PD_VISIT_BOOL_AND_FLOATING_AND_INTEGRAL_TYPES( \ dtype, #fn_name, ([&] { \ fn_name(static_cast(*dev_ctx), \ __VA_ARGS__); \ diff --git a/paddle/phi/core/visit_type.h b/paddle/phi/core/visit_type.h index c5612b203d233..5206da6ec3785 100644 --- a/paddle/phi/core/visit_type.h +++ b/paddle/phi/core/visit_type.h @@ -148,6 +148,33 @@ namespace phi { } \ }() +///////// BOOL and Floating and Integral Dispatch Marco /////////// + +#define PD_VISIT_BOOL_AND_FLOATING_AND_INTEGRAL_TYPES(TYPE, NAME, ...) \ + [&] { \ + const auto& __dtype__ = TYPE; \ + switch (__dtype__) { \ + PD_PRIVATE_CASE_TYPE(NAME, ::phi::DataType::BOOL, bool, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::FLOAT64, double, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT8, int8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::UINT8, uint8_t, __VA_ARGS__) \ + PD_PRIVATE_CASE_TYPE( \ + NAME, ::paddle::DataType::INT16, int16_t, __VA_ARGS__) \ + default: \ + PD_THROW("function " #NAME " is not implemented for data type `", \ + __dtype__, \ + "`"); \ + } \ + }() + ///////// Floating and Complex Dispatch Marco /////////// #define PD_VISIT_FLOATING_AND_COMPLEX_TYPES(TYPE, NAME, ...) \ diff --git a/paddle/phi/infermeta/spmd_rules/elementwise.cc b/paddle/phi/infermeta/spmd_rules/elementwise.cc index 3a9e422320210..9ec18bdaf50ce 100644 --- a/paddle/phi/infermeta/spmd_rules/elementwise.cc +++ b/paddle/phi/infermeta/spmd_rules/elementwise.cc @@ -314,6 +314,13 @@ SpmdInfo ElementwiseUnaryGradInferSpmd(const DistMetaTensor& x, return {{out_grad.dist_attr(), out_grad.dist_attr()}, {out_grad.dist_attr()}}; } +SpmdInfo ElementwiseUnaryGradInferSpmd(const DistMetaTensor& x, + const DistMetaTensor& out, + const DistMetaTensor& out_grad) { + return {{out_grad.dist_attr(), out_grad.dist_attr(), out_grad.dist_attr()}, + {out_grad.dist_attr()}}; +} + SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out_grad, @@ -381,5 +388,17 @@ SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, {x_grad_dist_attr, y_grad_dist_attr}}; } +SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, + const DistMetaTensor& y, + const DistMetaTensor& out, + const DistMetaTensor& out_grad, + int64_t axis) { + // The out's dist_attr is the same with out_grad's dist_attr, reuse + // ElementwiseBinaryGradInferSpmd(x, y, out_grad, axis) to infer dist_attrs of + // {{x, y, out_grad}, {x_grad, y_grad}}, then insert out's dist_attr into it. + SpmdInfo info = ElementwiseBinaryGradInferSpmd(x, y, out_grad, axis); + info.first.emplace(info.first.begin() + 2, out_grad.dist_attr()); + return info; +} } // namespace distributed } // namespace phi diff --git a/paddle/phi/infermeta/spmd_rules/elementwise.h b/paddle/phi/infermeta/spmd_rules/elementwise.h index 637c3b793b6c4..2dd8d4c764a40 100644 --- a/paddle/phi/infermeta/spmd_rules/elementwise.h +++ b/paddle/phi/infermeta/spmd_rules/elementwise.h @@ -30,6 +30,10 @@ SpmdInfo ElementwiseUnaryInferSpmdReverse(const DistMetaTensor& x, SpmdInfo ElementwiseUnaryGradInferSpmd(const DistMetaTensor& x, const DistMetaTensor& out_grad); +SpmdInfo ElementwiseUnaryGradInferSpmd(const DistMetaTensor& x, + const DistMetaTensor& out, + const DistMetaTensor& out_grad); + SpmdInfo ElementwiseBinaryInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y); @@ -42,5 +46,11 @@ SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, const DistMetaTensor& out_grad, int64_t axis = -1); +SpmdInfo ElementwiseBinaryGradInferSpmd(const DistMetaTensor& x, + const DistMetaTensor& y, + const DistMetaTensor& out, + const DistMetaTensor& out_grad, + int64_t axis = -1); + } // namespace distributed } // namespace phi diff --git a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc index 1a9a737866153..f09e09a1a14aa 100644 --- a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc @@ -50,6 +50,7 @@ PD_REGISTER_KERNEL(divide_grad, int16_t, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) {} @@ -61,5 +62,6 @@ PD_REGISTER_KERNEL(divide_double_grad, double, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc index a5fc4552bfbf2..b7fdefe023e73 100644 --- a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc @@ -64,5 +64,6 @@ PD_REGISTER_KERNEL(divide, int16_t, int, int64_t, + bool, complex64, complex128) {} diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu index 783d94e8e7bb2..3261243c986c0 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu @@ -329,6 +329,7 @@ PD_REGISTER_KERNEL(divide_grad, int16_t, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) {} @@ -342,6 +343,7 @@ PD_REGISTER_KERNEL(divide_double_grad, double, int, int64_t, + bool, phi::dtype::complex, phi::dtype::complex) {} diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index d40f1bd7a7062..6de33dd78d2d0 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -312,6 +312,7 @@ PD_REGISTER_KERNEL(divide, int16_t, int, int64_t, + bool, float16, bfloat16, complex64, diff --git a/paddle/phi/kernels/legacy/cpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/legacy/cpu/elementwise_divide_kernel.cc index ad09d6830f974..6f4debdcb216f 100644 --- a/paddle/phi/kernels/legacy/cpu/elementwise_divide_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/elementwise_divide_kernel.cc @@ -62,5 +62,6 @@ PD_REGISTER_KERNEL(divide_raw, double, int, int64_t, + bool, complex64, complex128) {} diff --git a/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu b/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu index 394d525b15f0f..ad802ee190861 100644 --- a/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu @@ -82,6 +82,7 @@ PD_REGISTER_KERNEL(divide_raw, int16_t, int, int64_t, + bool, float16, bfloat16, complex64, diff --git a/test/auto_parallel/CMakeLists.txt b/test/auto_parallel/CMakeLists.txt index 6d7c8c149435b..aa1212c9ecc11 100644 --- a/test/auto_parallel/CMakeLists.txt +++ b/test/auto_parallel/CMakeLists.txt @@ -118,7 +118,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU) py_test_modules(test_semi_auto_parallel_basic MODULES test_semi_auto_parallel_basic) set_tests_properties(test_semi_auto_parallel_basic - PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 200) py_test_modules(test_semi_auto_parallel_single_strategy MODULES test_semi_auto_parallel_single_strategy) set_tests_properties(test_semi_auto_parallel_single_strategy diff --git a/test/auto_parallel/semi_auto_parallel_for_bitwise.py b/test/auto_parallel/semi_auto_parallel_for_bitwise.py new file mode 100644 index 0000000000000..1cbc6654b53b5 --- /dev/null +++ b/test/auto_parallel/semi_auto_parallel_for_bitwise.py @@ -0,0 +1,161 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np + +import paddle +import paddle.distributed as dist + + +class TestBitwiseApiForSemiAutoParallel: + def __init__(self): + self._dtype = os.getenv("dtype") + self._backend = os.getenv("backend") + self._seed = eval(os.getenv("seed")) + self._mesh = dist.ProcessMesh([0, 1], dim_names=["x"]) + self._check_grad = False + self._rtol = 1e-6 + self._atol = 0.0 + paddle.seed(self._seed) + np.random.seed(self._seed) + + def check_tensor_eq(self, a, b): + np1 = a.numpy() + np2 = b.numpy() + np.testing.assert_allclose( + np1, np2, rtol=self._rtol, atol=self._atol, verbose=True + ) + + def test_unary_body(self, x_shape, out_shape, x_specs, unary_func): + x = paddle.randint(0, 100, x_shape, self._dtype) + x.stop_gradient = False + + x_dist_attr = dist.DistAttr(mesh=self._mesh, sharding_specs=x_specs) + + dist_x = dist.shard_tensor(x, dist_attr=x_dist_attr) + dist_x.stop_gradient = False + + dist_out = unary_func(dist_x) + out = unary_func(x) + self.check_tensor_eq(out, dist_out) + if self._check_grad: + dist_out.backward() + out.backward() + self.check_tensor_eq(x.grad, dist_x.grad) + + def test_binary_body( + self, x_shape, y_shape, out_shape, x_specs, y_specs, binary_func + ): + x = paddle.randint(0, 100, x_shape, self._dtype) + y = paddle.randint(0, 100, y_shape, self._dtype) + x.stop_gradient = False + y.stop_gradient = False + + x_dist_attr = dist.DistAttr(mesh=self._mesh, sharding_specs=x_specs) + y_dist_attr = dist.DistAttr(mesh=self._mesh, sharding_specs=y_specs) + + dist_x = dist.shard_tensor(x, dist_attr=x_dist_attr) + dist_y = dist.shard_tensor(y, dist_attr=y_dist_attr) + dist_x.stop_gradient = False + dist_y.stop_gradient = False + + dist_out = binary_func(dist_x, dist_y) + out = binary_func(x, y) + self.check_tensor_eq(out, dist_out) + + if self._check_grad: + dist_out.backward() + out.backward() + self.check_tensor_eq(x.grad, dist_x.grad) + self.check_tensor_eq(y.grad, dist_y.grad) + + def test_bitwise_and_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 16, 32], + y_shape=[16, 32], + out_shape=[4, 16, 32], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_not_x_shard(self): + self.test_unary_body( + x_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + unary_func=paddle.bitwise_not, + ) + + def test_bitwise_not_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.bitwise_not, + ) + + def run_test_case(self): + if self._backend == "cpu": + paddle.set_device("cpu") + elif self._backend == "gpu": + paddle.set_device("gpu:" + str(dist.get_rank())) + else: + raise ValueError("Only support cpu or gpu backend.") + + self.test_bitwise_and_x_shard() + self.test_bitwise_and_x_shard_broadcast() + self.test_bitwise_and_x_y_shard() + self.test_bitwise_and_x_y_shard_broadcast() + self.test_bitwise_not_x_shard() + + +if __name__ == '__main__': + TestBitwiseApiForSemiAutoParallel().run_test_case() diff --git a/test/auto_parallel/semi_auto_parallel_for_compare.py b/test/auto_parallel/semi_auto_parallel_for_compare.py new file mode 100644 index 0000000000000..a174a9c9180e1 --- /dev/null +++ b/test/auto_parallel/semi_auto_parallel_for_compare.py @@ -0,0 +1,172 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np + +import paddle +import paddle.distributed as dist + + +class TestCompareApiForSemiAutoParallel: + def __init__(self): + self._dtype = os.getenv("dtype") + self._backend = os.getenv("backend") + self._seed = eval(os.getenv("seed")) + self._mesh = dist.ProcessMesh([0, 1], dim_names=["x"]) + self._check_grad = False + self._rtol = 1e-6 + self._atol = 0.0 + paddle.seed(self._seed) + np.random.seed(self._seed) + + def check_tensor_eq(self, a, b): + np1 = a.numpy() + np2 = b.numpy() + np.testing.assert_allclose( + np1, np2, rtol=self._rtol, atol=self._atol, verbose=True + ) + + def test_binary_body( + self, x_shape, y_shape, out_shape, x_specs, y_specs, binary_func + ): + x = paddle.randn(x_shape, self._dtype) + y = paddle.randn(y_shape, self._dtype) + x.stop_gradient = False + y.stop_gradient = False + + x_dist_attr = dist.DistAttr(mesh=self._mesh, sharding_specs=x_specs) + y_dist_attr = dist.DistAttr(mesh=self._mesh, sharding_specs=y_specs) + + dist_x = dist.shard_tensor(x, dist_attr=x_dist_attr) + dist_y = dist.shard_tensor(y, dist_attr=y_dist_attr) + dist_x.stop_gradient = False + dist_y.stop_gradient = False + + dist_out = binary_func(dist_x, dist_y) + out = binary_func(x, y) + self.check_tensor_eq(out, dist_out) + + if self._check_grad: + dist_out.backward() + out.backward() + self.check_tensor_eq(x.grad, dist_x.grad) + self.check_tensor_eq(y.grad, dist_y.grad) + + def test_equal_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.equal, + ) + + def test_equal_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.equal, + ) + + def test_equal_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.equal, + ) + + def test_equal_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 16, 32], + y_shape=[16, 32], + out_shape=[4, 16, 32], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.equal, + ) + + def test_not_equal_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.not_equal, + ) + + def test_not_equal_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.not_equal, + ) + + def test_not_equal_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.not_equal, + ) + + def test_not_equal_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 16, 32], + y_shape=[16, 32], + out_shape=[4, 16, 32], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.not_equal, + ) + + def run_test_case(self): + if self._backend == "cpu": + paddle.set_device("cpu") + elif self._backend == "gpu": + paddle.set_device("gpu:" + str(dist.get_rank())) + else: + raise ValueError("Only support cpu or gpu backend.") + + self.test_equal_x_shard() + self.test_equal_x_shard_broadcast() + self.test_equal_x_y_shard() + self.test_equal_x_y_shard_broadcast() + + self.test_not_equal_x_shard() + self.test_not_equal_x_shard_broadcast() + self.test_not_equal_x_y_shard() + self.test_not_equal_x_y_shard_broadcast() + + +if __name__ == '__main__': + TestCompareApiForSemiAutoParallel().run_test_case() diff --git a/test/auto_parallel/semi_auto_parallel_for_elementwise.py b/test/auto_parallel/semi_auto_parallel_for_elementwise.py index 2a55f7d02df03..0e737db45ecaf 100644 --- a/test/auto_parallel/semi_auto_parallel_for_elementwise.py +++ b/test/auto_parallel/semi_auto_parallel_for_elementwise.py @@ -27,14 +27,17 @@ def __init__(self): self._backend = os.getenv("backend") self._seed = eval(os.getenv("seed")) self._mesh = dist.ProcessMesh([0, 1], dim_names=["x"]) - + self._rtol = 1e-6 + self._atol = 0.0 paddle.seed(self._seed) np.random.seed(self._seed) def check_tensor_eq(self, a, b): np1 = a.numpy() np2 = b.numpy() - np.testing.assert_allclose(np1, np2, rtol=1e-05, verbose=True) + np.testing.assert_allclose( + np1, np2, rtol=self._rtol, atol=self._atol, verbose=True + ) def test_unary_body(self, x_shape, out_shape, x_specs, unary_func): x = paddle.randn(x_shape, self._dtype) @@ -100,9 +103,9 @@ def test_sub_x_shard(self): def test_add_x_shard_broadcast(self): self.test_binary_body( - x_shape=[16, 32], - y_shape=[2, 16, 32], - out_shape=[2, 16, 32], + x_shape=[8, 16], + y_shape=[2, 8, 16], + out_shape=[2, 8, 16], x_specs=['x', None], y_specs=[None, None, None], binary_func=paddle.add, @@ -206,6 +209,256 @@ def test_maximum_x_y_shard_broadcast(self): binary_func=paddle.maximum, ) + def test_multiply_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.multiply, + ) + + def test_multiply_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.multiply, + ) + + def test_multiply_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.multiply, + ) + + def test_multiply_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 6, 8], + y_shape=[6, 8], + out_shape=[4, 6, 8], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.multiply, + ) + + def test_divide_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.divide, + ) + + def test_divide_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.divide, + ) + + def test_divide_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.divide, + ) + + def test_divide_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[2, 4, 6], + y_shape=[4, 6], + out_shape=[2, 4, 6], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.divide, + ) + + def test_bitwise_and_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.bitwise_and, + ) + + def test_bitwise_and_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 16, 32], + y_shape=[16, 32], + out_shape=[4, 16, 32], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.bitwise_and, + ) + + def test_elementwise_pow_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.pow, + ) + + def test_elementwise_pow_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.pow, + ) + + def test_elementwise_pow_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.pow, + ) + + def test_elementwise_pow_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[4, 6, 8], + y_shape=[6, 8], + out_shape=[4, 6, 8], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.pow, + ) + + def test_equal_x_shard(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, None], + binary_func=paddle.equal, + ) + + def test_equal_x_shard_broadcast(self): + self.test_binary_body( + x_shape=[16, 32], + y_shape=[2, 16, 32], + out_shape=[2, 16, 32], + x_specs=['x', None], + y_specs=[None, None, None], + binary_func=paddle.equal, + ) + + def test_equal_x_y_shard(self): + if self._backend == "cpu": + return + self.test_binary_body( + x_shape=[16, 32], + y_shape=[16, 32], + out_shape=[16, 32], + x_specs=['x', None], + y_specs=[None, 'x'], + binary_func=paddle.equal, + ) + + def test_equal_x_y_shard_broadcast(self): + self.test_binary_body( + x_shape=[2, 6, 4], + y_shape=[6, 4], + out_shape=[2, 6, 4], + x_specs=['x', None, None], + y_specs=[None, None], + binary_func=paddle.equal, + ) + + def test_exp_x_shard(self): + self.test_unary_body( + x_shape=[4, 16], + out_shape=[4, 16], + x_specs=['x', None], + unary_func=paddle.exp, + ) + + def test_rsqrt_x_shard(self): + self.test_unary_body( + x_shape=[4, 16], + out_shape=[4, 16], + x_specs=['x', None], + unary_func=paddle.rsqrt, + ) + + def test_silu_x_shard(self): + self.test_unary_body( + x_shape=[4, 16], + out_shape=[4, 16], + x_specs=['x', None], + unary_func=paddle.nn.functional.silu, + ) + + def test_sin_x_shard(self): + self.test_unary_body( + x_shape=[4, 16], + out_shape=[4, 16], + x_specs=['x', None], + unary_func=paddle.sin, + ) + + def test_cos_x_shard(self): + self.test_unary_body( + x_shape=[4, 16], + out_shape=[4, 16], + x_specs=['x', None], + unary_func=paddle.cos, + ) + def run_test_case(self): if self._backend == "cpu": paddle.set_device("cpu") @@ -222,6 +475,27 @@ def run_test_case(self): self.test_sub_x_y_shard_broadcast() self.test_square_x_shard() self.test_relu_x_shard() + self.test_maximum_x_shard() + self.test_maximum_x_shard_broadcast() + self.test_maximum_x_y_shard() + self.test_maximum_x_y_shard_broadcast() + self.test_multiply_x_shard() + self.test_multiply_x_shard_broadcast() + self.test_multiply_x_y_shard() + self.test_multiply_x_y_shard_broadcast() + self.test_divide_x_shard() + self.test_divide_x_shard_broadcast() + self.test_divide_x_y_shard() + self.test_divide_x_y_shard_broadcast() + self.test_elementwise_pow_x_shard() + self.test_elementwise_pow_x_shard_broadcast() + self.test_elementwise_pow_x_y_shard() + self.test_elementwise_pow_x_y_shard_broadcast() + self.test_exp_x_shard() + self.test_rsqrt_x_shard() + self.test_silu_x_shard() + self.test_sin_x_shard() + self.test_cos_x_shard() if __name__ == '__main__': diff --git a/test/auto_parallel/test_semi_auto_parallel_basic.py b/test/auto_parallel/test_semi_auto_parallel_basic.py index 56cabcb318f3d..3730e019f7506 100644 --- a/test/auto_parallel/test_semi_auto_parallel_basic.py +++ b/test/auto_parallel/test_semi_auto_parallel_basic.py @@ -56,6 +56,16 @@ def test_reduction_api(self): user_defined_envs=envs, ) + def test_bitwise_api(self): + envs_list = test_base.gen_product_envs_list( + {"dtype": "int32", "seed": "2023"}, self._changeable_envs + ) + for envs in envs_list: + self.run_test_case( + "semi_auto_parallel_for_bitwise.py", + user_defined_envs=envs, + ) + def test_several_replicated_spmd_api(self): envs_list = test_base.gen_product_envs_list( self._default_envs, self._changeable_envs