From 4f57da5fa6866a81f47ba90a8c9573648bdff11d Mon Sep 17 00:00:00 2001 From: Zhou Wei <1183042833@qq.com> Date: Thu, 17 Nov 2022 11:19:58 +0800 Subject: [PATCH 1/5] [Zero-Dim] temporarily revert create_scalar due to input 0D is not fully supported (#48058) --- python/paddle/fluid/layers/math_op_patch.py | 3 ++- python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index fb3979434347f4..f9ba6498671617 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -99,7 +99,8 @@ def create_tensor(block, value, dtype, shape): return var def create_scalar(block, value, dtype): - return create_tensor(block, value, dtype, shape=[]) + # TODO(zhouwei): will change to [] which is 0-D Tensor + return create_tensor(block, value, dtype, shape=[1]) def create_tensor_with_batchsize(ref_var, value, dtype): assert isinstance(ref_var, Variable) diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index c85f5aec42e9f6..174172b026f21f 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -350,7 +350,7 @@ def test_dygraph_binary(self): paddle.enable_static() - def test_static_unary(self): + def test_static_binary(self): paddle.enable_static() for api in binary_api_list + binary_api_list_without_grad: main_prog = fluid.Program() @@ -377,15 +377,19 @@ def test_static_unary(self): # Test runtime shape self.assertEqual(out_np.shape, ()) + # TODO(zhouwei): will open when create_scalar is [] # 2) x is 0D , y is scalar + ''' x = paddle.rand([]) y = 0.5 x.stop_gradient = False + print(api) if isinstance(api, dict): out = getattr(paddle.static.Variable, api['cls_method'])( x, y ) self.assertEqual(out.shape, ()) + ''' for api in binary_int_api_list_without_grad: main_prog = fluid.Program() From e5ed5257083b92b018330812c33c746bae26fb41 Mon Sep 17 00:00:00 2001 From: Yuang Liu Date: Thu, 17 Nov 2022 11:22:47 +0800 Subject: [PATCH 2/5] Support bfloat16 for adamw and adam optimizer. Fit the lr for pure bf16 training with tensor fusion. (#48041) * add bfloat16 for adamw * set lr not to bfloat16 for pure bf16 training * update the logic * update the adamw optimizer * support bfloat for adam --- paddle/fluid/pybind/eager_functions.cc | 3 ++- paddle/phi/kernels/gpu/adamw_kernel.cu | 4 ++- python/paddle/optimizer/adam.py | 34 ++++++++++++-------------- python/paddle/optimizer/adamw.py | 17 ++++++------- python/paddle/optimizer/optimizer.py | 26 +++++++++++++++++--- 5 files changed, 51 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index cdace567b2e9d4..3389daf330c7c8 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -268,7 +268,8 @@ PyObject* eager_api_get_grads_types(PyObject* self, if (meta && grad.initialized()) { if (grad.is_dense_tensor() && (tensor.dtype() == paddle::experimental::DataType::FLOAT32 || - tensor.dtype() == paddle::experimental::DataType::FLOAT16)) { + tensor.dtype() == paddle::experimental::DataType::FLOAT16 || + tensor.dtype() == paddle::experimental::DataType::BFLOAT16)) { ret.emplace_back( paddle::framework::TransToProtoVarType(tensor.dtype())); } diff --git a/paddle/phi/kernels/gpu/adamw_kernel.cu b/paddle/phi/kernels/gpu/adamw_kernel.cu index 9ddaacdd5cc6bb..6994c83f53624a 100644 --- a/paddle/phi/kernels/gpu/adamw_kernel.cu +++ b/paddle/phi/kernels/gpu/adamw_kernel.cu @@ -21,6 +21,7 @@ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" @@ -300,7 +301,8 @@ PD_REGISTER_KERNEL(adamw, phi::AdamwDenseKernel, float, double, - phi::dtype::float16) { + phi::dtype::float16, + phi::dtype::bfloat16) { // Skip beta1_pow, beta2_pow, skip_update data transform kernel->InputAt(5).SetBackend(phi::Backend::ALL_BACKEND); kernel->InputAt(6).SetBackend(phi::Backend::ALL_BACKEND); diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 74499b05f24ae5..aa76fb82759f18 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -28,7 +28,7 @@ __all__ = [] -GRAD_TYPES = [int(paddle.float32), int(paddle.float16)] +GRAD_TYPES = [int(paddle.float32), int(paddle.float16), int(paddle.bfloat16)] class Adam(Optimizer): @@ -265,8 +265,8 @@ def _get_accumulator(self, name, param): """ if self._name is not None: name = self._name + "_" + name - find_master = ( - self._multi_precision and param.dtype == core.VarDesc.VarType.FP16 + find_master = self._multi_precision and self._is_dtype_fp16_or_bf16( + param.dtype ) target_param = ( self._master_weights[param.name] if find_master else param @@ -285,10 +285,7 @@ def _get_accumulator(self, name, param): def _add_moments_pows(self, p): acc_dtype = p.dtype - if ( - acc_dtype == core.VarDesc.VarType.FP16 - or acc_dtype == core.VarDesc.VarType.BF16 - ): + if self._is_dtype_fp16_or_bf16(acc_dtype): acc_dtype = core.VarDesc.VarType.FP32 self._add_accumulator(self._moment1_acc_str, p, dtype=acc_dtype) self._add_accumulator(self._moment2_acc_str, p, dtype=acc_dtype) @@ -322,16 +319,16 @@ def _create_accumulators(self, block, parameters): # Create accumulator tensors for first and second moments for p in parameters: - if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: + if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): master_p = self._create_master_weight(p) self._add_moments_pows(master_p) continue if ( - p.dtype == core.VarDesc.VarType.FP16 + self._is_dtype_fp16_or_bf16(p.dtype) and not self._multi_precision ): warnings.warn( - "Accumulating with FP16 in optimizer can lead to poor accuracy or slow convergence." + "Accumulating with FP16 or BF16 in optimizer can lead to poor accuracy or slow convergence." "Consider using multi_precision=True option of the Adam optimizer." ) self._add_moments_pows(p) @@ -353,9 +350,8 @@ def _append_optimize_op(self, block, param_and_grad): beta2_pow_acc = self._get_accumulator( self._beta2_pow_acc_str, param_and_grad[0] ) - find_master = ( - self._multi_precision - and param_and_grad[0].dtype == core.VarDesc.VarType.FP16 + find_master = self._multi_precision and self._is_dtype_fp16_or_bf16( + param_and_grad[0].dtype ) master_weight = ( self._master_weights[param_and_grad[0].name] @@ -571,7 +567,7 @@ def step(self): def _multi_tensor_init(self, target_block, parameters, param_group_idx): """ - All parameters used for optimizer (such as: parameters, master_weight, velocity_acc for momentum) calculations are grouped into a python list by data type (float16, float32). + All parameters used for optimizer (such as: parameters, master_weight, velocity_acc for momentum) calculations are grouped into a python list by data type (bfloat16, float16, float32). This function will be overridden in the corresponding optimizer file. Args: target_block: the block in which the loss tensor is present @@ -604,7 +600,7 @@ def _multi_tensor_init(self, target_block, parameters, param_group_idx): self._beta2_pow_acc_dict['FP32_LODTensor'][ param_group_idx ].append(beta2_pow_acc) - elif param.dtype == paddle.float16: + elif self._is_dtype_fp16_or_bf16(param.dtype): self._param_dict['FP16_LODTensor'][param_group_idx].append( param ) @@ -628,7 +624,7 @@ def _multi_tensor_init(self, target_block, parameters, param_group_idx): self._master_weight_dict['FP16_LODTensor'] = None else: raise ValueError( - "Now multi_tensor_momentum only support fp32 and fp16 parameters and grad is LOD_TENSOR." + "Now multi_tensor_momentum only support fp32, fp16 or bf16 parameters and grad is LOD_TENSOR." ) def _append_optimize_multi_tensor_op( @@ -656,7 +652,7 @@ def _append_optimize_multi_tensor_op( ) lr = self._create_param_lr(parameters_and_grads[index]) lr_dict['FP32_LODTensor'].append(lr) - elif tp == GRAD_TYPES[1]: + elif tp == GRAD_TYPES[1] or tp == GRAD_TYPES[2]: grad_dict['FP16_LODTensor'].append( parameters_and_grads[index][1] ) @@ -678,7 +674,7 @@ def _append_optimize_multi_tensor_op( lr = self._create_param_lr(param_and_grad) lr_dict['FP32_LODTensor'].append(lr) elif ( - param_and_grad[0].dtype == paddle.float16 + self._is_dtype_fp16_or_bf16(param_and_grad[0].dtype) and param_and_grad[1].type == core.VarDesc.VarType.LOD_TENSOR ): @@ -711,7 +707,7 @@ def _append_optimize_multi_tensor_op( lr = self._create_param_lr(param_and_grad) lr_dict['FP32_LODTensor'].append(lr) elif ( - param_and_grad[0].dtype == paddle.float16 + self._is_dtype_fp16_or_bf16(param_and_grad[0].dtype) and param_and_grad[1].type == core.VarDesc.VarType.LOD_TENSOR ): diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index dca844b6682759..5424331a71fa93 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -369,8 +369,8 @@ def _get_accumulator(self, name, param): """ if self._name is not None: name = self._name + "_" + name - find_master = ( - self._multi_precision and param.dtype == core.VarDesc.VarType.FP16 + find_master = self._multi_precision and self._is_dtype_fp16_or_bf16( + param.dtype ) target_param = ( self._master_weights[param.name] if find_master else param @@ -389,7 +389,7 @@ def _get_accumulator(self, name, param): def _add_moments_pows(self, p): acc_dtype = p.dtype - if acc_dtype == core.VarDesc.VarType.FP16: + if self._is_dtype_fp16_or_bf16(acc_dtype): acc_dtype = core.VarDesc.VarType.FP32 self._add_accumulator(self._moment1_acc_str, p, dtype=acc_dtype) self._add_accumulator(self._moment2_acc_str, p, dtype=acc_dtype) @@ -423,16 +423,16 @@ def _create_accumulators(self, block, parameters): # Create accumulator tensors for first and second moments for p in parameters: - if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16: + if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype): master_p = self._create_master_weight(p) self._add_moments_pows(master_p) continue if ( - p.dtype == core.VarDesc.VarType.FP16 + self._is_dtype_fp16_or_bf16(p.dtype) and not self._multi_precision ): warnings.warn( - "Accumulating with FP16 in optimizer can lead to poor accuracy or slow convergence." + "Accumulating with FP16 or BF16 in optimizer can lead to poor accuracy or slow convergence." "Consider using multi_precision=True option of the Adam optimizer." ) self._add_moments_pows(p) @@ -463,9 +463,8 @@ def _append_optimize_op(self, block, param_and_grad): beta2_pow_acc = self._get_accumulator( self._beta2_pow_acc_str, param_and_grad[0] ) - find_master = ( - self._multi_precision - and param_and_grad[0].dtype == core.VarDesc.VarType.FP16 + find_master = self._multi_precision and self._is_dtype_fp16_or_bf16( + param_and_grad[0].dtype ) master_weight = ( self._master_weights[param_and_grad[0].name] diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index 26ae5b50269b2f..59663bb8190886 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -421,15 +421,21 @@ def get_opti_var_name_list(self): return self._opti_name_list def _create_global_learning_rate(self): - # lr var can't be float16, for pure fp16 training, should extra handle the dtype for lr + # lr var can't be float16 or bfloat16, for pure fp16 or bf16 training, should extra handle the dtype for lr _lr_dtype = ( paddle.get_default_dtype() if self._dtype is None else self._dtype ) _lr_dtype = ( paddle.float32 if ( - paddle.get_default_dtype() != "float16" - and _lr_dtype == paddle.float16 + ( + paddle.get_default_dtype() != "float16" + and _lr_dtype == paddle.float16 + ) + or ( + paddle.get_default_dtype() != "bfloat16" + and _lr_dtype == paddle.bfloat16 + ) ) else _lr_dtype ) @@ -1526,3 +1532,17 @@ def _append_optimize_multi_tensor_op( For Multi Tensor, append optimize merged_operator to block. """ pass + + def _is_dtype_fp16_or_bf16(self, dtype): + """ + check the dtype is fp16 or the dtype is bf16 + :param dtype: instance of core.VarDesc.VarType + :return: True if dtype is one of fp16 or bf16, False otherwise + """ + assert isinstance( + dtype, core.VarDesc.VarType + ), "The dtype should be an instance of core.VarDesc.VarType." + return ( + dtype == core.VarDesc.VarType.FP16 + or dtype == core.VarDesc.VarType.BF16 + ) From f62bd3b490b151fce074d1cd11389161b1b0acbd Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Thu, 17 Nov 2022 11:29:36 +0800 Subject: [PATCH 3/5] [PHI decoupling] move "paddle/fluid/operators/math.h" to phi (#48062) * rm "paddle/fluid/operators/math.h" in phi * rm "paddle/fluid/operators/math.h" in fluit --- paddle/fluid/operators/cross_entropy_op.h | 4 +-- paddle/fluid/operators/dequantize_log_op.cu | 1 - .../detection/sigmoid_focal_loss_op.cu | 32 +++++++++++-------- paddle/fluid/operators/math/cross_entropy.cu | 13 ++++---- .../sequence_ops/sequence_softmax_op.cu | 6 ++-- paddle/phi/kernels/cpu/bce_loss_kernel.cc | 7 ++-- .../phi/kernels/cpu/nll_loss_grad_kernel.cc | 2 +- paddle/phi/kernels/funcs/functors.h | 8 ++--- .../operators => phi/kernels/funcs}/math.h | 20 ++++++------ paddle/phi/kernels/gpu/nll_loss.h | 2 +- .../gpu/sigmoid_cross_entropy_with_logits.h | 2 +- ...d_cross_entropy_with_logits_grad_kernel.cu | 4 +-- ...igmoid_cross_entropy_with_logits_kernel.cu | 5 ++- paddle/phi/kernels/impl/selu_kernel_impl.h | 4 +-- 14 files changed, 55 insertions(+), 55 deletions(-) rename paddle/{fluid/operators => phi/kernels/funcs}/math.h (69%) diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 2949dc8d1fb2a1..4dcaf7b99f0914 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/phi/kernels/funcs/math.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { @@ -190,7 +190,7 @@ struct HardLabelCrossEntropyForwardFunctor { label); auto match_x = x_[idx * feature_size_ + label]; - y_[idx] = -math::TolerableValue()(real_log(match_x)); + y_[idx] = -math::TolerableValue()(phi::funcs::real_log(match_x)); match_x_[idx] = match_x; } else { y_[idx] = 0; diff --git a/paddle/fluid/operators/dequantize_log_op.cu b/paddle/fluid/operators/dequantize_log_op.cu index 360871f9e7251b..4a1976f6fdd685 100644 --- a/paddle/fluid/operators/dequantize_log_op.cu +++ b/paddle/fluid/operators/dequantize_log_op.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/dequantize_log_op.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/core/hostdevice.h" diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu index bad93fd22b2e9b..76a47581e9f727 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/sigmoid_focal_loss_op.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/kernels/funcs/math.h" namespace paddle { namespace operators { @@ -55,15 +55,16 @@ __global__ void GPUSigmoidFocalLossForward(const T *x_data, T s_pos = alpha / fg_num; // p = 1. / 1. + expf(-x) - T p = 1. / (1. + real_exp(-x)); + T p = 1. / (1. + phi::funcs::real_exp(-x)); // (1 - p)**gamma * log(p) T term_pos = std::pow(static_cast(1. - p), gamma) * - real_log(p > FLT_MIN ? p : FLT_MIN); + phi::funcs::real_log(p > FLT_MIN ? p : FLT_MIN); // p**gamma * log(1 - p) - T term_neg = - std::pow(p, gamma) * - (-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0)))); + T term_neg = std::pow(p, gamma) * + (-1. * x * (x >= 0) - + phi::funcs::real_log( + 1. + phi::funcs::real_exp(x - 2. * x * (x >= 0)))); out_data[i] = 0.0; out_data[i] += -c_pos * term_pos * s_pos; @@ -96,17 +97,20 @@ __global__ void GPUSigmoidFocalLossBackward(const T *x_data, T c_pos = static_cast(g == (d + 1)); T c_neg = static_cast((g != -1) & (g != (d + 1))); - T p = 1. / (1. + real_exp(-x)); + T p = 1. / (1. + phi::funcs::real_exp(-x)); // (1-p)**g * (1 - p - g*p*log(p)) - T term_pos = std::pow(static_cast(1. - p), gamma) * - (1. - p - (p * gamma * real_log(p > FLT_MIN ? p : FLT_MIN))); + T term_pos = + std::pow(static_cast(1. - p), gamma) * + (1. - p - + (p * gamma * phi::funcs::real_log(p > FLT_MIN ? p : FLT_MIN))); // (p**g) * (g*(1-p)*log(1-p) - p) - T term_neg = - std::pow(p, gamma) * - ((-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0)))) * - (1. - p) * gamma - - p); + T term_neg = std::pow(p, gamma) * + ((-1. * x * (x >= 0) - + phi::funcs::real_log( + 1. + phi::funcs::real_exp(x - 2. * x * (x >= 0)))) * + (1. - p) * gamma - + p); dx_data[i] = 0.0; dx_data[i] += -c_pos * s_pos * term_pos; diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index 0e5b95542455e3..478c4e0cd6611b 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/kernels/funcs/math.h" namespace paddle { namespace operators { @@ -39,9 +39,10 @@ __global__ void CrossEntropyKernel(T* Y, D, ignore_index, lbl); - Y[i] = ignore_index == lbl - ? static_cast(0) - : -math::TolerableValue()(real_log(X[i * D + lbl])); + Y[i] = + ignore_index == lbl + ? static_cast(0) + : -math::TolerableValue()(phi::funcs::real_log(X[i * D + lbl])); } } @@ -56,7 +57,7 @@ __global__ void SoftCrossEntropyKernel(T* Y, int idx = blockIdx.x * class_num + tid; int end = blockIdx.x * class_num + class_num; for (; idx < end; idx += blockDim.x) { - val += math::TolerableValue()(real_log(X[idx])) * label[idx]; + val += math::TolerableValue()(phi::funcs::real_log(X[idx])) * label[idx]; } val = paddle::platform::reduceSum(val, tid, blockDim.x); @@ -152,7 +153,7 @@ void CrossEntropyFunctor::operator()( template class CrossEntropyFunctor; template class CrossEntropyFunctor; -template class CrossEntropyFunctor; +template class CrossEntropyFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index 29f562ec5eca24..e58cff60aea485 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -23,8 +23,8 @@ limitations under the License. */ namespace cub = hipcub; #endif -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h" +#include "paddle/phi/kernels/funcs/math.h" namespace paddle { namespace operators { @@ -67,7 +67,7 @@ __global__ void sequence_softmax_kernel(const T *in_data, T sum_data = 0; for (int tid = threadIdx.x; tid < span; tid += blockDim.x) { T ele = in_data[start + tid]; - sum_data += real_exp(ele - shared_max_data); + sum_data += phi::funcs::real_exp(ele - shared_max_data); } sum_data = BlockReduce(temp_storage).Reduce(sum_data, cub::Sum()); @@ -79,7 +79,7 @@ __global__ void sequence_softmax_kernel(const T *in_data, // get final resit for (int tid = threadIdx.x; tid < span; tid += blockDim.x) { T ele = in_data[start + tid]; - ele = real_exp(ele - shared_max_data) / shared_sum_data; + ele = phi::funcs::real_exp(ele - shared_max_data) / shared_sum_data; out_data[start + tid] = ele; } } diff --git a/paddle/phi/kernels/cpu/bce_loss_kernel.cc b/paddle/phi/kernels/cpu/bce_loss_kernel.cc index 9d62fabcbe736a..7b980162016664 100644 --- a/paddle/phi/kernels/cpu/bce_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/bce_loss_kernel.cc @@ -16,9 +16,9 @@ #include // for max -#include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math.h" namespace phi { @@ -47,10 +47,9 @@ void BCELossKernel(const Context& dev_ctx, "Illegal input, input must be less than or equal to 1")); out_data[i] = (label_data[i] - static_cast(1)) * - std::max(paddle::operators::real_log(static_cast(1) - x_data[i]), + std::max(phi::funcs::real_log(static_cast(1) - x_data[i]), (T)(-100)) - - label_data[i] * - std::max(paddle::operators::real_log(x_data[i]), (T)(-100)); + label_data[i] * std::max(phi::funcs::real_log(x_data[i]), (T)(-100)); } } diff --git a/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc index 9048e87d049895..c84b3d4efbb88c 100644 --- a/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc @@ -17,9 +17,9 @@ #include #include -#include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math.h" namespace phi { template diff --git a/paddle/phi/kernels/funcs/functors.h b/paddle/phi/kernels/funcs/functors.h index d518a877b26f2c..2e6fe8b2d738bc 100644 --- a/paddle/phi/kernels/funcs/functors.h +++ b/paddle/phi/kernels/funcs/functors.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/operators/amp/fp16_type_traits.h" -#include "paddle/fluid/operators/math.h" +#include "paddle/phi/kernels/funcs/math.h" namespace phi { namespace funcs { @@ -89,8 +89,7 @@ struct TanhFunctor { // y = 2 / (1 + e^-2x) - 1 T t0 = static_cast(2) * x; T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0); - return static_cast(2) / - (static_cast(1) + paddle::operators::real_exp(-t1)) - + return static_cast(2) / (static_cast(1) + phi::funcs::real_exp(-t1)) - static_cast(1); } }; @@ -111,8 +110,7 @@ struct SigmoidFunctor { inline HOSTDEVICE T operator()(T x) { // y = 1 / (1 + e^-x) T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x); - return static_cast(1) / - (static_cast(1) + paddle::operators::real_exp(-tmp)); + return static_cast(1) / (static_cast(1) + phi::funcs::real_exp(-tmp)); } }; diff --git a/paddle/fluid/operators/math.h b/paddle/phi/kernels/funcs/math.h similarity index 69% rename from paddle/fluid/operators/math.h rename to paddle/phi/kernels/funcs/math.h index 47281fb0280f0f..f8c373badf187a 100644 --- a/paddle/fluid/operators/math.h +++ b/paddle/phi/kernels/funcs/math.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,22 +15,22 @@ #pragma once #include "math.h" // NOLINT -#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/hostdevice.h" -namespace paddle { -namespace operators { +namespace phi { +namespace funcs { -inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) { - return static_cast(::expf(static_cast(x))); +inline HOSTDEVICE phi::dtype::float16 real_exp(phi::dtype::float16 x) { + return static_cast(::expf(static_cast(x))); } inline HOSTDEVICE float real_exp(float x) { return ::expf(x); } inline HOSTDEVICE double real_exp(double x) { return ::exp(x); } -inline HOSTDEVICE platform::float16 real_log(platform::float16 x) { - return static_cast(::logf(static_cast(x))); +inline HOSTDEVICE phi::dtype::float16 real_log(phi::dtype::float16 x) { + return static_cast(::logf(static_cast(x))); } inline HOSTDEVICE float real_log(float x) { return ::logf(x); } @@ -41,5 +41,5 @@ inline HOSTDEVICE float real_min(float x, float y) { return ::fminf(x, y); } inline HOSTDEVICE double real_min(double x, double y) { return ::fmin(x, y); } -} // namespace operators -} // namespace paddle +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/gpu/nll_loss.h b/paddle/phi/kernels/gpu/nll_loss.h index 37a67b4767a9bb..9d063d0ef44a0b 100644 --- a/paddle/phi/kernels/gpu/nll_loss.h +++ b/paddle/phi/kernels/gpu/nll_loss.h @@ -19,10 +19,10 @@ #include #include -#include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/hostdevice.h" +#include "paddle/phi/kernels/funcs/math.h" namespace phi { static constexpr int kNumCUDAThreads = 512; diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h index 84a24449b3a1c6..1cc025bac480f4 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits.h @@ -17,13 +17,13 @@ #include #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_helper.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/funcs/math.h" #include "paddle/phi/kernels/gpu/reduce.h" #ifdef __NVCC__ diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu index f61cd2c39674ec..736c5608a6ac7c 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu @@ -37,8 +37,8 @@ struct SigmoidBwdFunctor { dx_data = static_cast(0.); counts = 0; } else { - T simoid_x = static_cast(1) / - (static_cast(1) + paddle::operators::real_exp(-x)); + T simoid_x = + static_cast(1) / (static_cast(1) + phi::funcs::real_exp(-x)); T diff = simoid_x - label; dx_data = dout * diff; counts = 1; diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu index b0e9efe5bbafe6..fb0183ce1efd65 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu @@ -37,9 +37,8 @@ struct SigmoidFwdFunctor { } else { T term1 = (x > 0) ? x : 0; T term2 = x * label; - T term3 = paddle::operators::real_log( - static_cast(1) + - paddle::operators::real_exp(static_cast(-abs(x)))); + T term3 = phi::funcs::real_log( + static_cast(1) + phi::funcs::real_exp(static_cast(-abs(x)))); out_data = term1 - term2 + term3; counts = 1; diff --git a/paddle/phi/kernels/impl/selu_kernel_impl.h b/paddle/phi/kernels/impl/selu_kernel_impl.h index c5d756e6eb4fae..14789a7d61ac84 100644 --- a/paddle/phi/kernels/impl/selu_kernel_impl.h +++ b/paddle/phi/kernels/impl/selu_kernel_impl.h @@ -15,9 +15,9 @@ #pragma once #include -#include "paddle/fluid/operators/math.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/funcs/math.h" namespace phi { @@ -32,7 +32,7 @@ struct SeluFunctor { HOSTDEVICE void operator()(size_t idx) const { T x_ele = x_data_ptr_[idx]; if (x_ele <= 0) { - x_ele = alpha_ * paddle::operators::real_exp(x_ele) - alpha_; + x_ele = alpha_ * phi::funcs::real_exp(x_ele) - alpha_; } y_data_ptr_[idx] = scale_ * x_ele; } From b7e120d264a33d97bb7d946d6197edc488a0976c Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Thu, 17 Nov 2022 11:38:50 +0800 Subject: [PATCH 4/5] rm "paddle/phi/kernels/gpu/batch_norm_utils.h" in phi (#48057) --- .../phi/kernels/cpu/batch_norm_grad_kernel.cc | 2 +- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 2 +- paddle/phi/kernels/gpu/batch_norm_utils.h | 142 ------------------ 4 files changed, 3 insertions(+), 145 deletions(-) delete mode 100644 paddle/phi/kernels/gpu/batch_norm_utils.h diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc index f2054d4d396c60..efd55dee88cd04 100644 --- a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc @@ -16,9 +16,9 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" +#include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/gpu/batch_norm_utils.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 8d072368633ef1..e6c681588e4edb 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -22,10 +22,10 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/funcs/reduce_function.h" -#include "paddle/phi/kernels/gpu/batch_norm_utils.h" #ifdef __HIPCC__ #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim) diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 7b553db274d1f3..44fe99046e1585 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -29,10 +29,10 @@ namespace cub = hipcub; #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" +#include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/funcs/reduce_function.h" -#include "paddle/phi/kernels/gpu/batch_norm_utils.h" #ifdef __HIPCC__ #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim) diff --git a/paddle/phi/kernels/gpu/batch_norm_utils.h b/paddle/phi/kernels/gpu/batch_norm_utils.h deleted file mode 100644 index c9c62026edfa7a..00000000000000 --- a/paddle/phi/kernels/gpu/batch_norm_utils.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace phi { - -using Tensor = DenseTensor; - -template -inline void ResizeToChannelFirst(const DeviceContext& context, - const Tensor* input, - Tensor* transformed_input) { - int dim = input->dims().size() - 2; - if (dim == 3) { - // input - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[4]; - in_dims_vec[2] = input->dims()[1]; - in_dims_vec[3] = input->dims()[2]; - in_dims_vec[4] = input->dims()[3]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - - } else if (dim == 2) { - // input - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[3]; - in_dims_vec[2] = input->dims()[1]; - in_dims_vec[3] = input->dims()[2]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - } else if (dim == 1) { - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[2]; - in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - } -} - -template -inline void ResizeToChannelLast(const DeviceContext& context, - const Tensor* input, - Tensor* transformed_input) { - int dim = input->dims().size() - 2; - if (dim == 3) { - // input - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[2]; - in_dims_vec[2] = input->dims()[3]; - in_dims_vec[3] = input->dims()[4]; - in_dims_vec[4] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - - } else if (dim == 2) { - // input - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[2]; - in_dims_vec[2] = input->dims()[3]; - in_dims_vec[3] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - } else if (dim == 1) { - transformed_input->Resize(input->dims()); - - auto in_dims_vec = phi::vectorize(input->dims()); - in_dims_vec[1] = input->dims()[2]; - in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); - context.template Alloc(transformed_input); - } -} - -template -inline void TransToChannelFirst(const DeviceContext& context, - const Tensor* input, - Tensor* transformed_input) { - VLOG(5) << "Why am I called?"; - int dim = input->dims().size() - 2; - if (dim == 3) { - std::vector axis{0, 4, 1, 2, 3}; - funcs::Transpose trans5; - trans5(context, *input, transformed_input, axis); - - } else if (dim == 2) { - std::vector axis{0, 3, 1, 2}; - funcs::Transpose trans4; - trans4(context, *input, transformed_input, axis); - } else if (dim == 1) { - std::vector axis{0, 2, 1}; - funcs::Transpose trans3; - trans3(context, *input, transformed_input, axis); - } -} - -template -inline void TransToChannelLast(const DeviceContext& context, - const Tensor* input, - Tensor* transformed_input) { - int dim = input->dims().size() - 2; - if (dim == 3) { - std::vector axis{0, 2, 3, 4, 1}; - funcs::Transpose trans5; - trans5(context, *input, transformed_input, axis); - - } else if (dim == 2) { - std::vector axis{0, 2, 3, 1}; - funcs::Transpose trans4; - trans4(context, *input, transformed_input, axis); - } else if (dim == 1) { - std::vector axis{0, 2, 1}; - funcs::Transpose trans3; - trans3(context, *input, transformed_input, axis); - } -} - -} // namespace phi From 460d5040d2e8fd58ab470ba376438b56a0cb8dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=E5=90=B4=E5=98=89=E6=96=87?= <417333277@qq.com> Date: Thu, 17 Nov 2022 11:44:13 +0800 Subject: [PATCH 5/5] Remove reduntant numpy input in Example code, test=document_fix (#47916) --- python/paddle/distributed/utils/moe_utils.py | 33 ++--- python/paddle/fft.py | 124 ++++++++++-------- .../paddle/sparse/nn/functional/activation.py | 39 +++--- python/paddle/sparse/nn/layer/activation.py | 43 +++--- 4 files changed, 124 insertions(+), 115 deletions(-) diff --git a/python/paddle/distributed/utils/moe_utils.py b/python/paddle/distributed/utils/moe_utils.py index cd7c0e758d4e0b..eb7e73c363bf2c 100644 --- a/python/paddle/distributed/utils/moe_utils.py +++ b/python/paddle/distributed/utils/moe_utils.py @@ -71,7 +71,6 @@ def global_scatter( .. code-block:: python # required: distributed - import numpy as np import paddle from paddle.distributed import init_parallel_env init_parallel_env() @@ -79,17 +78,14 @@ def global_scatter( world_size = 2 d_model = 2 in_feat = d_model - local_input_buf = np.array([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]], \ - dtype=np.float32) + local_input_buf = paddle.to_tensor([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]], \ + dtype='float32', stop_gradient=False) if paddle.distributed.ParallelEnv().local_rank == 0: - local_count = np.array([2, 1, 1, 1]) - global_count = np.array([2, 1, 1, 1]) + local_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") + global_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") else: - local_count = np.array([1, 1, 2, 1]) - global_count = np.array([1, 1, 2, 1]) - local_input_buf = paddle.to_tensor(local_input_buf, dtype="float32", stop_gradient=False) - local_count = paddle.to_tensor(local_count, dtype="int64") - global_count = paddle.to_tensor(global_count, dtype="int64") + local_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + global_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") a = paddle.distributed.utils.global_scatter(local_input_buf, \ local_count, global_count) a.stop_gradient = False @@ -193,7 +189,6 @@ def global_gather( .. code-block:: python # required: distributed - import numpy as np import paddle from paddle.distributed import init_parallel_env init_parallel_env() @@ -201,17 +196,15 @@ def global_gather( world_size = 2 d_model = 2 in_feat = d_model - local_input_buf = np.array([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]],\ - dtype=np.float32) + local_input_buf = paddle._to_tensor([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]],\ + dtype='float32', stop_gradient=False) if paddle.distributed.ParallelEnv().local_rank == 0: - local_count = np.array([2, 1, 1, 1]) - global_count = np.array([2, 1, 1, 1]) + local_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") + global_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") else: - local_count = np.array([1, 1, 2, 1]) - global_count = np.array([1, 1, 2, 1]) - local_input_buf = paddle.to_tensor(local_input_buf, dtype="float32", stop_gradient=False) - local_count = paddle.to_tensor(local_count, dtype="int64") - global_count = paddle.to_tensor(global_count, dtype="int64") + local_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + global_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + a = paddle.distributed.utils.global_gather(local_input_buf, local_count, global_count) print(a) # out for rank 0: [[1, 2], [3, 4], [7, 8], [1, 2], [7, 8]] diff --git a/python/paddle/fft.py b/python/paddle/fft.py index 8bc95cd37e9f24..1e4ca9237469ba 100644 --- a/python/paddle/fft.py +++ b/python/paddle/fft.py @@ -521,26 +521,29 @@ def fftn(x, s=None, axes=None, norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:4, :4, :4][1] - xp = paddle.to_tensor(x) - fftn_xp = paddle.fft.fftn(xp, axes=(1, 2)).numpy() - print(fftn_xp) - # [[[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + arr = paddle.arange(4, dtype="float64") + x = paddle.meshgrid(arr, arr, arr)[1] + + fftn_xp = paddle.fft.fftn(x, axes=(1, 2)) + print(fftn_xp.numpy()) + # [[[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]]] @@ -901,15 +904,16 @@ def fft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:2, :2][1] - xp = paddle.to_tensor(x) - fft2_xp = paddle.fft.fft2(xp).numpy() + arr = paddle.arange(2, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + fft2_xp = paddle.fft.fft2(x) print(fft2_xp) - # [[ 2.+0.j -2.+0.j] - # [ 0.+0.j 0.+0.j]] + # Tensor(shape=[2, 2], dtype=complex128, place=Place(gpu:0), stop_gradient=True, + # [[ (2+0j), 0j ], + # [(-2+0j), 0j ]]) """ _check_at_least_ndim(x, 2) @@ -971,15 +975,16 @@ def ifft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:2, :2][1] - xp = paddle.to_tensor(x) - ifft2_xp = paddle.fft.ifft2(xp).numpy() + arr = paddle.arange(2, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + ifft2_xp = paddle.fft.ifft2(x) print(ifft2_xp) - # [[ 0.5+0.j -0.5+0.j] - # [ 0. +0.j 0. +0.j]] + # Tensor(shape=[2, 2], dtype=complex128, place=Place(gpu:0), stop_gradient=True, + # [[ (0.5+0j), 0j ], + # [(-0.5+0j), 0j ]]) """ _check_at_least_ndim(x, 2) if s is not None: @@ -1033,16 +1038,17 @@ def rfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python import paddle - import numpy as np - - x = paddle.to_tensor(np.mgrid[:5, :5][0].astype(np.float32)) - print(paddle.fft.rfft2(x)) - # Tensor(shape=[5, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True, - # [[ (50+0j) , (1.1920928955078125e-07+0j) , 0j ], - # [(-12.5+17.204774856567383j) , (-9.644234211236835e-08+7.006946134424652e-08j) , 0j ], - # [(-12.500000953674316+4.061495304107666j) , (3.6837697336977726e-08-1.1337477445749755e-07j), 0j ], - # [(-12.500000953674316-4.061495304107666j) , (3.6837697336977726e-08+1.1337477445749755e-07j), 0j ], - # [(-12.5-17.204774856567383j) , (-9.644234211236835e-08-7.006946134424652e-08j) , 0j ]]) + + arr = paddle.arange(5, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + result = paddle.fft.rfft2(x) + print(result.numpy()) + # [[ 50. +0.j 0. +0.j 0. +0.j ] + # [-12.5+17.20477401j 0. +0.j 0. +0.j ] + # [-12.5 +4.0614962j 0. +0.j 0. +0.j ] + # [-12.5 -4.0614962j 0. +0.j 0. +0.j ] + # [-12.5-17.20477401j 0. +0.j 0. +0.j ]] """ _check_at_least_ndim(x, 2) if s is not None: @@ -1192,13 +1198,20 @@ def ihfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:5, :5][0].astype(np.float64) - xp = paddle.to_tensor(x) - ihfft2_xp = paddle.fft.ihfft2(xp).numpy() - print(ihfft2_xp) + arr = paddle.arange(5, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + print(x) + # Tensor(shape=[5, 5], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # [[0., 0., 0., 0., 0.], + # [1., 1., 1., 1., 1.], + # [2., 2., 2., 2., 2.], + # [3., 3., 3., 3., 3.], + # [4., 4., 4., 4., 4.]]) + + ihfft2_xp = paddle.fft.ihfft2(x) + print(ihfft2_xp.numpy()) # [[ 2. +0.j 0. +0.j 0. +0.j ] # [-0.5-0.68819096j 0. +0.j 0. +0.j ] # [-0.5-0.16245985j 0. +0.j 0. +0.j ] @@ -1250,15 +1263,11 @@ def fftfreq(n, d=1.0, dtype=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) scalar_temp = 0.5 - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=scalar_temp) + fftfreq_xp = paddle.fft.fftfreq(5, d=scalar_temp) print(fftfreq_xp) - # Tensor(shape=[5], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # [ 0. , 0.40000001, 0.80000001, -0.80000001, -0.40000001]) """ @@ -1301,13 +1310,10 @@ def rfftfreq(n, d=1.0, dtype=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) scalar_temp = 0.3 - n = x.size - rfftfreq_xp = paddle.fft.rfftfreq(n, d=scalar_temp) + rfftfreq_xp = paddle.fft.rfftfreq(5, d=scalar_temp) print(rfftfreq_xp) # Tensor(shape=[3], dtype=float32, place=CUDAPlace(0), stop_gradient=True, @@ -1343,15 +1349,17 @@ def fftshift(x, axes=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=0.3) - res = paddle.fft.fftshift(fftfreq_xp).numpy() + fftfreq_xp = paddle.fft.fftfreq(5, d=0.3) + print(fftfreq_xp) + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 0. , 0.66666669, 1.33333337, -1.33333337, -0.66666669]) + + res = paddle.fft.fftshift(fftfreq_xp) print(res) - # [-1.3333334 -0.6666667 0. 0.6666667 1.3333334] + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [-1.33333337, -0.66666669, 0. , 0.66666669, 1.33333337]) """ shape = paddle.shape(x) @@ -1386,15 +1394,17 @@ def ifftshift(x, axes=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=0.3) - res = paddle.fft.ifftshift(fftfreq_xp).numpy() + fftfreq_xp = paddle.fft.fftfreq(5, d=0.3) + print(fftfreq_xp) + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 0. , 0.66666669, 1.33333337, -1.33333337, -0.66666669]) + + res = paddle.fft.ifftshift(fftfreq_xp) print(res) - # [ 1.3333334 -1.3333334 -0.6666667 0. 0.6666667] + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 1.33333337, -1.33333337, -0.66666669, 0. , 0.66666669]) """ shape = paddle.shape(x) diff --git a/python/paddle/sparse/nn/functional/activation.py b/python/paddle/sparse/nn/functional/activation.py index cbe2ddd0d79dbf..93c5e74014f3e0 100644 --- a/python/paddle/sparse/nn/functional/activation.py +++ b/python/paddle/sparse/nn/functional/activation.py @@ -87,28 +87,31 @@ def softmax(x, axis=-1, name=None): .. code-block:: python import paddle - import numpy as np paddle.seed(100) - mask = np.random.rand(3, 4) < 0.5 - np_x = np.random.rand(3, 4) * mask - # [[0. 0. 0.96823406 0.19722934] - # [0.94373937 0. 0.02060066 0.71456372] - # [0. 0. 0. 0.98275049]] - - csr = paddle.to_tensor(np_x).to_sparse_csr() - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, - # 0.98275049]) + mask = paddle.rand((3, 4)) < 0.5 + x = paddle.rand((3, 4)) * mask + print(x) + # Tensor(shape=[3, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [[0.83438963, 0.70008713, 0. , 0.88831252], + # [0.02200012, 0. , 0.75432241, 0.65136462], + # [0.96088767, 0.82938021, 0.35367414, 0.86653489]]) + + csr = x.to_sparse_csr() + print(csr) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0 , 3 , 6 , 10], + # cols=[0, 1, 3, 0, 2, 3, 0, 1, 2, 3], + # values=[0.83438963, 0.70008713, 0.88831252, 0.02200012, 0.75432241, + # 0.65136462, 0.96088767, 0.82938021, 0.35367414, 0.86653489]) out = paddle.sparse.nn.functional.softmax(csr) - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, - # 1. ]) + print(out) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0 , 3 , 6 , 10], + # cols=[0, 1, 3, 0, 2, 3, 0, 1, 2, 3], + # values=[0.34132850, 0.29843223, 0.36023921, 0.20176248, 0.41964680, + # 0.37859070, 0.30015594, 0.26316854, 0.16354506, 0.27313042]) """ return _C_ops.sparse_softmax(x, axis) diff --git a/python/paddle/sparse/nn/layer/activation.py b/python/paddle/sparse/nn/layer/activation.py index 3ad856f69fbec1..91d5c198189dd9 100644 --- a/python/paddle/sparse/nn/layer/activation.py +++ b/python/paddle/sparse/nn/layer/activation.py @@ -86,29 +86,32 @@ class Softmax(Layer): .. code-block:: python import paddle - import numpy as np - paddle.seed(100) - - mask = np.random.rand(3, 4) < 0.5 - np_x = np.random.rand(3, 4) * mask - # [[0. 0. 0.96823406 0.19722934] - # [0.94373937 0. 0.02060066 0.71456372] - # [0. 0. 0. 0.98275049]] - - csr = paddle.to_tensor(np_x).to_sparse_csr() - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, - # 0.98275049]) + paddle.seed(2022) + + mask = paddle.rand((3, 4)) < 0.7 + x = paddle.rand((3, 4)) * mask + print(x) + # Tensor(shape=[3, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [[0.08325022, 0.27030438, 0. , 0.83883715], + # [0. , 0.95856029, 0.24004589, 0. ], + # [0.14500992, 0.17088132, 0. , 0. ]]) + + csr = x.to_sparse_csr() + print(csr) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 3, 5, 7], + # cols=[0, 1, 3, 1, 2, 0, 1], + # values=[0.08325022, 0.27030438, 0.83883715, 0.95856029, 0.24004589, + # 0.14500992, 0.17088132]) softmax = paddle.sparse.nn.Softmax() out = softmax(csr) - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, - # 1. ]) + print(out) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 3, 5, 7], + # cols=[0, 1, 3, 1, 2, 0, 1], + # values=[0.23070428, 0.27815846, 0.49113727, 0.67227983, 0.32772022, + # 0.49353254, 0.50646752]) """ def __init__(self, axis=-1, name=None):