From 35de47b3fc0fadfe317eec9500238d627bcc4b3f Mon Sep 17 00:00:00 2001 From: zhwesky2010 <1183042833@qq.com> Date: Wed, 14 Jun 2023 20:50:29 +0800 Subject: [PATCH] [cherry-pick 2.5][Zero-Dim] paddle.nanmedian/count_nonzero/logspace support 0D, add some 0D case (#54649) * [Zero-Dim] add 0D test case (#54581) * [Zero-Dim] paddle.nanmedian/nanquantile support 0D Tensor (#54500) * [Zero-Dim] paddle.nanmedian support 0D Tensor * fix CI --- paddle/phi/infermeta/multiary.cc | 38 +- paddle/phi/infermeta/unary.cc | 50 +- .../phi/kernels/cpu/nanmedian_grad_kernel.cc | 73 ++- paddle/phi/kernels/cpu/nanmedian_kernel.cc | 69 +-- .../nanmedian_utils.h} | 45 +- .../phi/kernels/gpu/nanmedian_grad_kernel.cu | 81 +-- paddle/phi/kernels/gpu/nanmedian_kernel.cu | 69 +-- .../kernels/impl/nanmedian_grad_kernel_impl.h | 65 --- python/paddle/nn/functional/distance.py | 4 +- python/paddle/tensor/creation.py | 8 +- python/paddle/tensor/math.py | 15 +- python/paddle/tensor/stat.py | 33 +- test/legacy_test/test_nanmedian.py | 35 +- test/legacy_test/test_zero_dim_tensor.py | 530 ++++++++++++++---- 14 files changed, 693 insertions(+), 422 deletions(-) rename paddle/phi/kernels/{impl/nanmedian_kernel_impl.h => funcs/nanmedian_utils.h} (61%) delete mode 100644 paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 8b55e87aaa4f5..8fc3b285486d4 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -2162,32 +2162,32 @@ void LogspaceInferMeta(const MetaTensor& start, MetaTensor* out) { auto s_dims = start.dims(); PADDLE_ENFORCE_EQ( - (s_dims.size() == 1) && (s_dims[0] == 1), - true, - phi::errors::InvalidArgument("The shape of Input(Start) must be [1]," - "but received input shape is [%s].", - s_dims)); + phi::product(s_dims), + 1, + phi::errors::InvalidArgument("The size of Input(Start) must be 1," + "but received input size is %s.", + phi::product(s_dims))); auto e_dims = stop.dims(); PADDLE_ENFORCE_EQ( - (e_dims.size() == 1) && (e_dims[0] == 1), + phi::product(e_dims), true, - phi::errors::InvalidArgument("The shape of Input(Stop) must be [1]," - "but received input shape is [%s].", - e_dims)); + phi::errors::InvalidArgument("The size of Input(Stop) must be 1," + "but received input size is %s.", + phi::product(e_dims))); auto num_dims = number.dims(); PADDLE_ENFORCE_EQ( - (num_dims.size() == 1) && (num_dims[0] == 1), + phi::product(num_dims), true, - phi::errors::InvalidArgument("The shape of Input(Num) must be [1]," - "but received input shape is [%s].", - num_dims)); + phi::errors::InvalidArgument("The size of Input(Num) must be 1," + "but received input size is %s.", + phi::product(num_dims))); auto b_dims = base.dims(); - PADDLE_ENFORCE_EQ( - (b_dims.size() == 1) && (b_dims[0] == 1), - true, - phi::errors::InvalidArgument("The shape of Input(Base) must be [1]," - "but received input shape is [%s].", - b_dims)); + PADDLE_ENFORCE_EQ(phi::product(b_dims), + true, + phi::errors::InvalidArgument( + "The size of Input(Base) must be 1," + "but received input size is phi::product(b_dims).", + phi::product(b_dims))); out->set_dims(phi::make_ddim({-1})); out->set_dtype(dtype); } diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index e43e945f3750c..764ca8bf7811e 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -2260,37 +2260,47 @@ void NanmedianInferMeta(const MetaTensor& x, for (int64_t i = 0; i < x_rank; i++) { out_dim.push_back(1); } - } else { - out_dim.push_back(1); } } else { - std::vector cleaned_axis; + std::vector formated_axis; for (auto& axis : axis_list) { + if (x_rank == 0) { + PADDLE_ENFORCE_EQ(axis == 0 || axis == -1, + true, + phi::errors::InvalidArgument( + "When input 0D Tensor, each element of the axis " + "can only be -1, 0, None")); + } else { + PADDLE_ENFORCE_LT(axis, + x_rank, + errors::InvalidArgument( + "each element of the axis should be in the " + "range [ -dimension(X), dimension(X) ) " + "which dimesion = %d. But received axis = %d.", + x_rank, + axis)); + PADDLE_ENFORCE_GE(axis, + -x_rank, + errors::InvalidArgument( + "each element of the axis should be in the " + "range [ -dimension(X), dimension(X) ) " + "which dimesion = %d. But received axis = %d.", + x_rank, + axis)); + } if (axis < 0) axis += x_rank; - - PADDLE_ENFORCE_LT( - axis, - x_rank, - errors::InvalidArgument( - "Attr(axis) value should be in range [-R, R-1], R is " - "the rank of Input(X). But received axis: %d, R: %d. " - "Current Input(X)'s shape is=[%s].", - axis, - x_rank, - x_dim)); - PADDLE_ENFORCE_EQ( - std::find(cleaned_axis.begin(), cleaned_axis.end(), axis), - cleaned_axis.end(), + std::find(formated_axis.begin(), formated_axis.end(), axis), + formated_axis.end(), errors::InvalidArgument("Attr(axes) has duplicated elements: %d.", static_cast(axis))); - cleaned_axis.push_back(axis); + formated_axis.push_back(axis); } for (int64_t i = 0; i < x_rank; i++) { - if (std::find(cleaned_axis.begin(), cleaned_axis.end(), i) == - cleaned_axis.end()) { + if (std::find(formated_axis.begin(), formated_axis.end(), i) == + formated_axis.end()) { out_dim.push_back(x_dim[i]); } else if (keep_dim) { out_dim.push_back(1); diff --git a/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc index bce234375e529..5a858a66e65b9 100644 --- a/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc @@ -17,7 +17,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h" +#include "paddle/phi/kernels/funcs/nanmedian_utils.h" namespace phi { @@ -26,67 +26,64 @@ void CalcMedianGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& median_index, const DenseTensor& out_grad, - const IntArray& axes UNUSED, - DenseTensor* x_grad, - T* x_grad_ptr) { + DenseTensor* x_grad) { + T* dx_data = dev_ctx.template Alloc(x_grad); + if (!dx_data) return; + phi::funcs::SetConstant set_zero; set_zero(dev_ctx, x_grad, static_cast(0)); - if (!x_grad_ptr) return; - const int64_t* m_ptr = median_index.data(); - const T* out_grad_ptr = out_grad.data(); + const int64_t* m_data = median_index.data(); + const T* dout_data = out_grad.data(); int64_t numel = x.numel(); auto x_dim = x.dims(); int64_t rank = x_dim.size(); int64_t stride = x_dim[rank - 1]; - int64_t pre_dim = numel / stride; + int64_t i = 0; int64_t offset = 0; - T div_factor = static_cast(2.0); for (i = 0; i < pre_dim; i++) { - if (m_ptr[2 * i] >= 0) { - if (m_ptr[2 * i] == m_ptr[2 * i + 1]) { - x_grad_ptr[offset + m_ptr[2 * i]] = out_grad_ptr[i]; + if (m_data[2 * i] >= 0) { + if (m_data[2 * i] == m_data[2 * i + 1]) { + dx_data[offset + m_data[2 * i]] = dout_data[i]; } else { - x_grad_ptr[offset + m_ptr[2 * i]] = out_grad_ptr[i] / div_factor; - x_grad_ptr[offset + m_ptr[2 * i + 1]] = out_grad_ptr[i] / div_factor; + dx_data[offset + m_data[2 * i]] = dout_data[i] / static_cast(2.0); + dx_data[offset + m_data[2 * i + 1]] = + dout_data[i] / static_cast(2.0); } } offset += stride; } } -template -void BaseMedianGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& median_index, - const DenseTensor& out_grad, - const IntArray& axes, - DenseTensor* x_grad) { - auto rank = x.dims().size(); - T* x_grad_ptr = dev_ctx.template Alloc(x_grad); - if (axes.size() && (rank > 1)) { - DenseTensor tmp_x_grad(*x_grad); - CalcMedianGradKernel( - dev_ctx, x, median_index, out_grad, axes, &tmp_x_grad, x_grad_ptr); - PostprocessMedianGradKernel(dev_ctx, &tmp_x_grad, axes, x_grad); - } else { - CalcMedianGradKernel( - dev_ctx, x, median_index, out_grad, axes, x_grad, x_grad_ptr); - } -} - template void NanmedianGradKernel(const Context& dev_ctx, - const DenseTensor& input, + const DenseTensor& x, const DenseTensor& median_index, const DenseTensor& out_grad, const IntArray& axes, - bool keep_dim UNUSED, + bool keepdim UNUSED, DenseTensor* x_grad) { - BaseMedianGradKernel( - dev_ctx, input, median_index, out_grad, axes, x_grad); + DenseTensor tmp_x; + auto rank = x.dims().size(); + if ((axes.size() == 0) || rank <= 1) { + tmp_x = x; + tmp_x.Resize({x.numel()}); + CalcMedianGradKernel( + dev_ctx, tmp_x, median_index, out_grad, x_grad); + } else { + funcs::PreprocessMedianKernel(dev_ctx, x, axes, &tmp_x); + + DenseTensor tmp_x_grad; + tmp_x_grad.Resize(x_grad->dims()); + CalcMedianGradKernel( + dev_ctx, tmp_x, median_index, out_grad, &tmp_x_grad); + + dev_ctx.template Alloc(x_grad); + funcs::PostprocessMedianGradKernel( + dev_ctx, &tmp_x_grad, axes, x_grad); + } } } // namespace phi diff --git a/paddle/phi/kernels/cpu/nanmedian_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_kernel.cc index 660c115f92f89..558d5aaebfef8 100644 --- a/paddle/phi/kernels/cpu/nanmedian_kernel.cc +++ b/paddle/phi/kernels/cpu/nanmedian_kernel.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h" +#include "paddle/phi/kernels/funcs/nanmedian_utils.h" #include "paddle/phi/kernels/top_k_kernel.h" namespace phi { @@ -31,7 +31,6 @@ void CalcMedianFunc(const Context& dev_ctx, int64_t pre_dim, T* o_ptr, int64_t* m_ptr) { - bool should_ignore_nan = ignore_nan; DenseTensor sort_out; DenseTensor sort_indices; auto sort_dim = x.dims(); @@ -52,7 +51,7 @@ void CalcMedianFunc(const Context& dev_ctx, int64_t offset = 0; int64_t i = 0; bool is_ori_odd = stride & 1; - if (should_ignore_nan) { + if (ignore_nan) { for (i = 0; i < pre_dim; i++) { offset = i * sort_k; if (nan_counts[i] == stride) { @@ -107,11 +106,11 @@ void CalcMedianFunc(const Context& dev_ctx, template void ProcessMedianKernel(const Context& dev_ctx, const DenseTensor& x, - T* o_ptr, - int64_t* m_ptr, - bool ignore_nan) { - bool should_ignore_nan = ignore_nan; - const T* x_ptr = x.data(); + DenseTensor* out, + DenseTensor* median_index) { + const T* x_data = x.data(); + T* out_data = dev_ctx.template Alloc(out); + int64_t* m_data = dev_ctx.template Alloc(median_index); int64_t numel = x.numel(); auto x_dim = x.dims(); @@ -122,7 +121,8 @@ void ProcessMedianKernel(const Context& dev_ctx, int64_t max_valid_num = 0; std::vector nan_counts; - if (should_ignore_nan) { + bool ignore_nan = true; + if (ignore_nan) { int64_t total_nan_num = 0; std::vector col_vec; col_vec.reserve(stride); @@ -133,7 +133,7 @@ void ProcessMedianKernel(const Context& dev_ctx, for (int64_t i = 0; i < pre_dim; i++) { col_vec.clear(); col_vec.insert( - col_vec.begin(), x_ptr + i * stride, x_ptr + (i + 1) * stride); + col_vec.begin(), x_data + i * stride, x_data + (i + 1) * stride); nan_counts[i] = std::count_if(col_vec.begin(), col_vec.end(), [&](const T& val) { return std::isnan(static_cast(val)); @@ -145,47 +145,25 @@ void ProcessMedianKernel(const Context& dev_ctx, // all elems are nan if (total_nan_num == numel) { for (i = 0; i < pre_dim; i++) { - o_ptr[i] = x_ptr[0]; - m_ptr[2 * i] = -1; - m_ptr[2 * i + 1] = -1; + out_data[i] = std::numeric_limits::quiet_NaN(); + m_data[2 * i] = -1; + m_data[2 * i + 1] = -1; } return; } - should_ignore_nan = total_nan_num > 0; + ignore_nan = total_nan_num > 0; } - int64_t sort_k = should_ignore_nan ? max_valid_num : ((stride >> 1) + 1); + int64_t sort_k = ignore_nan ? max_valid_num : ((stride >> 1) + 1); CalcMedianFunc(dev_ctx, x, nan_counts, - should_ignore_nan, + ignore_nan, sort_k, stride, pre_dim, - o_ptr, - m_ptr); -} - -template -void BaseMedianKernel(const Context& dev_ctx, - const DenseTensor& input, - const IntArray& axes, - DenseTensor* out, - DenseTensor* median_index, - bool ignore_nan) { - DenseTensor x; - auto rank = input.dims().size(); - if ((axes.size() == 0) || rank <= 1) { - x = input; - x.Resize({input.numel()}); - } else { - PreprocessMedianKernel(dev_ctx, input, axes, &x); - } - - T* o_ptr = dev_ctx.template Alloc(out); - int64_t* m_ptr = dev_ctx.template Alloc(median_index); - ProcessMedianKernel(dev_ctx, x, o_ptr, m_ptr, ignore_nan); - out->Resize(out->dims()); + out_data, + m_data); } template @@ -195,7 +173,16 @@ void NanmedianKernel(const Context& dev_ctx, bool keepdim UNUSED, DenseTensor* out, DenseTensor* median_index) { - BaseMedianKernel(dev_ctx, x, axes, out, median_index, true); + DenseTensor tmp_x; + auto rank = x.dims().size(); + if ((axes.size() == 0) || rank <= 1) { + tmp_x = x; + tmp_x.Resize({x.numel()}); + } else { + funcs::PreprocessMedianKernel(dev_ctx, x, axes, &tmp_x); + } + + ProcessMedianKernel(dev_ctx, tmp_x, out, median_index); } } // namespace phi diff --git a/paddle/phi/kernels/impl/nanmedian_kernel_impl.h b/paddle/phi/kernels/funcs/nanmedian_utils.h similarity index 61% rename from paddle/phi/kernels/impl/nanmedian_kernel_impl.h rename to paddle/phi/kernels/funcs/nanmedian_utils.h index 0d3585eb1ce06..edcdc10b88595 100644 --- a/paddle/phi/kernels/impl/nanmedian_kernel_impl.h +++ b/paddle/phi/kernels/funcs/nanmedian_utils.h @@ -15,9 +15,51 @@ #pragma once #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/nanmedian_kernel.h" namespace phi { +namespace funcs { + +template +void PostprocessMedianGradKernel(const Context& dev_ctx, + DenseTensor* input, + const IntArray& raw_axes, + DenseTensor* x) { + auto input_dim = input->dims(); + auto rank = input_dim.size(); + + std::vector axes = raw_axes.GetData(); + int64_t axes_size = static_cast(axes.size()); + for (int64_t i = 0; i < axes_size; i++) { + if (axes[i] < 0) { + axes[i] += rank; + } + } + + std::vector trans_back; + std::vector reshape_back; + trans_back.resize(rank); + + int offset = 0; + for (int64_t i = 0; i < rank; i++) { + if (std::find(axes.begin(), axes.end(), i) == axes.end()) { + reshape_back.push_back(input_dim[i]); + trans_back[i] = offset; + offset += 1; + } + } + + for (int64_t i = 0; i < rank; i++) { + if (std::find(axes.begin(), axes.end(), i) != axes.end()) { + trans_back[i] = offset; + reshape_back.push_back(input_dim[i]); + offset += 1; + } + } + + input->Resize(make_ddim(reshape_back)); + funcs::TransCompute( + static_cast(trans_back.size()), dev_ctx, *input, x, trans_back); +} template void PreprocessMedianKernel(const Context& dev_ctx, @@ -65,4 +107,5 @@ void PreprocessMedianKernel(const Context& dev_ctx, x->Resize(make_ddim(reshape)); } +} // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu index 99b1c1a8c0af8..706a89def1fe9 100644 --- a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu @@ -20,7 +20,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h" +#include "paddle/phi/kernels/funcs/nanmedian_utils.h" namespace phi { @@ -30,23 +30,26 @@ inline int GET_BLOCKS(const int N) { } template -__global__ void KernelNanmedianGrad(const T* x_ptr, +__global__ void KernelNanmedianGrad(const T* x_data, const int64_t* medians_ptr, const T* out_grad_ptr, - T* x_grad_ptr, + T* dx_data, int64_t stride, - int64_t pre_dim, - T div_factor) { + int64_t pre_dim) { CUDA_KERNEL_LOOP(index, pre_dim) { int64_t offset = index * stride; + printf("index: %d\n", index); + printf("medians_ptr[2 * index]: %d\n", medians_ptr[2 * index]); + printf("medians_ptr[2 * index+1]: %d\n", medians_ptr[2 * index + 1]); + if (medians_ptr[2 * index] >= 0) { if (medians_ptr[2 * index] == medians_ptr[2 * index + 1]) { - x_grad_ptr[offset + medians_ptr[2 * index]] = out_grad_ptr[index]; + dx_data[offset + medians_ptr[2 * index]] = out_grad_ptr[index]; } else { - x_grad_ptr[offset + medians_ptr[2 * index]] = - out_grad_ptr[index] / div_factor; - x_grad_ptr[offset + medians_ptr[2 * index + 1]] = - out_grad_ptr[index] / div_factor; + dx_data[offset + medians_ptr[2 * index]] = + out_grad_ptr[index] / static_cast(2.0); + dx_data[offset + medians_ptr[2 * index + 1]] = + out_grad_ptr[index] / static_cast(2.0); } } } @@ -57,14 +60,16 @@ void CalcMedianGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& median_index, const DenseTensor& out_grad, - DenseTensor* x_grad, - T* x_grad_ptr) { + DenseTensor* x_grad) { + T* dx_data = dev_ctx.template Alloc(x_grad); + if (!dx_data) return; + phi::funcs::SetConstant set_zero; set_zero(dev_ctx, x_grad, static_cast(0)); auto stream = dev_ctx.stream(); - const T* x_ptr = x.data(); - const int64_t* m_ptr = median_index.data(); + const T* x_data = x.data(); + const int64_t* m_data = median_index.data(); const T* out_grad_ptr = out_grad.data(); int64_t numel = x.numel(); @@ -73,42 +78,38 @@ void CalcMedianGradKernel(const Context& dev_ctx, int64_t stride = x_dim[x_rank - 1]; int64_t pre_dim = numel / stride; - T div_factor = static_cast(2.0); KernelNanmedianGrad <<>>( - x_ptr, m_ptr, out_grad_ptr, x_grad_ptr, stride, pre_dim, div_factor); -} - -template -void BaseMedianGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& median_index, - const DenseTensor& out_grad, - const IntArray& axes, - DenseTensor* x_grad) { - auto rank = x.dims().size(); - T* x_grad_ptr = dev_ctx.template Alloc(x_grad); - if (axes.size() && (rank > 1)) { - DenseTensor tmp_x_grad(*x_grad); - CalcMedianGradKernel( - dev_ctx, x, median_index, out_grad, &tmp_x_grad, x_grad_ptr); - PostprocessMedianGradKernel(dev_ctx, &tmp_x_grad, axes, x_grad); - } else { - CalcMedianGradKernel( - dev_ctx, x, median_index, out_grad, x_grad, x_grad_ptr); - } + x_data, m_data, out_grad_ptr, dx_data, stride, pre_dim); } template void NanmedianGradKernel(const Context& dev_ctx, - const DenseTensor& input, + const DenseTensor& x, const DenseTensor& median_index, const DenseTensor& out_grad, const IntArray& axes, - bool keep_dim, + bool keepdim UNUSED, DenseTensor* x_grad) { - BaseMedianGradKernel( - dev_ctx, input, median_index, out_grad, axes, x_grad); + DenseTensor tmp_x; + auto rank = x.dims().size(); + if ((axes.size() == 0) || rank <= 1) { + tmp_x = x; + tmp_x.Resize({x.numel()}); + CalcMedianGradKernel( + dev_ctx, tmp_x, median_index, out_grad, x_grad); + } else { + funcs::PreprocessMedianKernel(dev_ctx, x, axes, &tmp_x); + + DenseTensor tmp_x_grad; + tmp_x_grad.Resize(x_grad->dims()); + CalcMedianGradKernel( + dev_ctx, tmp_x, median_index, out_grad, &tmp_x_grad); + + dev_ctx.template Alloc(x_grad); + funcs::PostprocessMedianGradKernel( + dev_ctx, &tmp_x_grad, axes, x_grad); + } } } // namespace phi diff --git a/paddle/phi/kernels/gpu/nanmedian_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_kernel.cu index 0b2ac39600068..8a6be7a9bdef0 100644 --- a/paddle/phi/kernels/gpu/nanmedian_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_kernel.cu @@ -20,7 +20,7 @@ #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" -#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h" +#include "paddle/phi/kernels/funcs/nanmedian_utils.h" #include "paddle/phi/kernels/top_k_kernel.h" namespace phi { @@ -138,14 +138,13 @@ __global__ void CalcNanmedianKernel(const T* sort_out_ptr, template void ProcessMedianKernel(const Context& dev_ctx, const DenseTensor& x, - bool ignore_nan, DenseTensor* out, - int64_t* m_ptr) { - bool should_ignore_nan = ignore_nan; + DenseTensor* median_index) { auto stream = dev_ctx.stream(); + const T* x_data = x.data(); + T* out_data = dev_ctx.template Alloc(out); + int64_t* m_data = dev_ctx.template Alloc(median_index); - const T* x_ptr = x.data(); - T* o_ptr = dev_ctx.template Alloc(out); int64_t numel = x.numel(); auto x_dim = x.dims(); int64_t x_rank = x_dim.size(); @@ -156,7 +155,9 @@ void ProcessMedianKernel(const Context& dev_ctx, DenseTensor nan_counts, nan_stat; int64_t* nan_counts_ptr; int64_t max_valid_num = 0; - if (should_ignore_nan) { + + bool ignore_nan = true; + if (ignore_nan) { nan_counts.Resize(phi::make_ddim({pre_dim})); dev_ctx.template Alloc(&nan_counts); nan_counts_ptr = nan_counts.data(); @@ -167,7 +168,7 @@ void ProcessMedianKernel(const Context& dev_ctx, KernelNanCounts<<>>(x_ptr, + stream>>>(x_data, numel, pre_dim, stride, @@ -189,15 +190,19 @@ void ProcessMedianKernel(const Context& dev_ctx, // all elements are nan values T nan_val = std::numeric_limits::quiet_NaN(); if (nan_stat_cpu_ptr[0] == numel) { - FullLikeKernel(dev_ctx, x, nan_val, x.dtype(), out); + phi::funcs::SetConstant set_nan; + set_nan(dev_ctx, out, nan_val); + + phi::funcs::SetConstant set_negatvie; + set_negatvie(dev_ctx, median_index, static_cast(-1)); return; } - should_ignore_nan = nan_stat_cpu_ptr[0] > 0; + ignore_nan = nan_stat_cpu_ptr[0] > 0; max_valid_num = nan_stat_cpu_ptr[1]; } - int64_t sort_k = should_ignore_nan ? max_valid_num : ((stride >> 1) + 1); + int64_t sort_k = ignore_nan ? max_valid_num : ((stride >> 1) + 1); bool is_ori_odd = stride & 1; DenseTensor sort_out, sort_indices; @@ -217,14 +222,14 @@ void ProcessMedianKernel(const Context& dev_ctx, T div_factor = static_cast(2.0); T nan_val = std::numeric_limits::quiet_NaN(); - if (should_ignore_nan) { + if (ignore_nan) { CalcNanmedianKernel <<>>( sort_out_ptr, sort_indices_ptr, nan_counts_ptr, - m_ptr, - o_ptr, + m_data, + out_data, is_ori_odd, pre_dim, max_valid_num, @@ -236,8 +241,8 @@ void ProcessMedianKernel(const Context& dev_ctx, <<>>( sort_out_ptr, sort_indices_ptr, - m_ptr, - o_ptr, + m_data, + out_data, div_factor, is_ori_odd, pre_dim, @@ -245,27 +250,6 @@ void ProcessMedianKernel(const Context& dev_ctx, } } -template -void BaseMedianKernel(const Context& dev_ctx, - const DenseTensor& input, - const IntArray& axes, - bool ignore_nan, - DenseTensor* out, - DenseTensor* median_index) { - DenseTensor x; - auto rank = input.dims().size(); - if ((axes.size() == 0) || rank <= 1) { - x = input; - x.Resize({input.numel()}); - } else { - PreprocessMedianKernel(dev_ctx, input, axes, &x); - } - - int64_t* m_ptr = dev_ctx.template Alloc(median_index); - ProcessMedianKernel(dev_ctx, x, ignore_nan, out, m_ptr); - out->Resize(out->dims()); -} - template void NanmedianKernel(const Context& dev_ctx, const DenseTensor& x, @@ -273,7 +257,16 @@ void NanmedianKernel(const Context& dev_ctx, bool keepdim, DenseTensor* out, DenseTensor* median_index) { - BaseMedianKernel(dev_ctx, x, axes, true, out, median_index); + DenseTensor tmp_x; + auto rank = x.dims().size(); + if ((axes.size() == 0) || rank <= 1) { + tmp_x = x; + tmp_x.Resize({x.numel()}); + } else { + funcs::PreprocessMedianKernel(dev_ctx, x, axes, &tmp_x); + } + + ProcessMedianKernel(dev_ctx, tmp_x, out, median_index); } } // namespace phi diff --git a/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h b/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h deleted file mode 100644 index 8a30082ac366e..0000000000000 --- a/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/phi/kernels/nanmedian_grad_kernel.h" - -namespace phi { - -template -void PostprocessMedianGradKernel(const Context& dev_ctx, - DenseTensor* input, - const IntArray& raw_axes, - DenseTensor* x) { - auto input_dim = input->dims(); - auto rank = input_dim.size(); - - std::vector axes = raw_axes.GetData(); - int64_t axes_size = static_cast(axes.size()); - for (int64_t i = 0; i < axes_size; i++) { - if (axes[i] < 0) { - axes[i] += rank; - } - } - - std::vector trans_back; - std::vector reshape_back; - trans_back.reserve(rank); - trans_back.resize(rank); - - int offset = 0; - for (int64_t i = 0; i < rank; i++) { - if (std::find(axes.begin(), axes.end(), i) == axes.end()) { - reshape_back.push_back(input_dim[i]); - trans_back[i] = offset; - offset += 1; - } - } - - for (int64_t i = 0; i < rank; i++) { - if (std::find(axes.begin(), axes.end(), i) != axes.end()) { - trans_back[i] = offset; - reshape_back.push_back(input_dim[i]); - offset += 1; - } - } - - input->Resize(make_ddim(reshape_back)); - funcs::TransCompute( - static_cast(trans_back.size()), dev_ctx, *input, x, trans_back); -} - -} // namespace phi diff --git a/python/paddle/nn/functional/distance.py b/python/paddle/nn/functional/distance.py index f1155852b00a9..cb004fe9b622f 100644 --- a/python/paddle/nn/functional/distance.py +++ b/python/paddle/nn/functional/distance.py @@ -64,8 +64,8 @@ def pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False, name=None): y = paddle.to_tensor([[5., 6.], [7., 8.]], dtype=paddle.float64) distance = paddle.nn.functional.pairwise_distance(x, y) print(distance) - # Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [4.99999860, 4.99999860]) + # Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # [4.99999860, 4.99999860]) """ if in_dynamic_mode(): diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index d6e378396f354..80f4d9a2b6066 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -394,15 +394,15 @@ def logspace(start, stop, num, base=10.0, dtype=None, name=None): Args: start(int|float|Tensor): The input :attr:`start` is exponent of first entry in \ - the sequence. It is a scalar, or a Tensor of shape [1] with input data \ + the sequence. It is a scalar, or a 0-D Tensor of shape [] with input data \ type int32, int64, float32 or float64. stop(int|float|Tensor): The input :attr:`stop` is exponent of last entry in the \ - sequence. It is a scalar, or a Tensor of shape [1] with input data \ + sequence. It is a scalar, or a 0-D Tensor of shape [] with input data \ type int32, int64, float32 or float64. num(int|Tensor): The input :attr:`num` is given number of items in the sequence. \ - It is an int scalar, or a Tensor of shape [1] with data type int32. + It is an int scalar, or a 0-D Tensor of shape [] with data type int32. base(int|float|Tensor): The input :attr:`base` is base of the logarithm function. \ - It is a scalar, or a Tensor of shape [1] with input data type int32, int64, \ + It is a scalar, or a 0-D Tensor of shape [] with input data type int32, int64, \ float32 or float64. dtype(np.dtype|str, optional): The data type of output tensor, it could be \ int32, int64, float32 or float64. Default: if None, the data type is float32. \ diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 5100b47158626..ebb1d960df690 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -1615,7 +1615,7 @@ def count_nonzero(x, axis=None, keepdim=False, name=None): # x is a 2-D Tensor: x = paddle.to_tensor([[0., 1.1, 1.2], [0., 0., 1.3], [0., 0., 0.]]) out1 = paddle.count_nonzero(x) - # [3] + # 3 out2 = paddle.count_nonzero(x, axis=0) # [0, 1, 2] out3 = paddle.count_nonzero(x, axis=0, keepdim=True) @@ -1636,17 +1636,8 @@ def count_nonzero(x, axis=None, keepdim=False, name=None): # [1, 3, 5] """ - if axis is not None: - if isinstance(axis, int): - axis = [axis] - dims = len(x.shape) - for i in range(len(axis)): - if not isinstance(axis[i], int) or not ( - axis[i] < dims and axis[i] >= -dims - ): - raise ValueError( - "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." - ) + if isinstance(axis, int): + axis = [axis] bool_tensor = paddle.cast(x, 'bool') int_tensor = paddle.cast(bool_tensor, 'int64') diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index fa41f5a93b82a..004db97089f72 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -255,7 +255,7 @@ def numel(x, name=None): return out -def nanmedian(x, axis=None, keepdim=True, name=None): +def nanmedian(x, axis=None, keepdim=False, name=None): r""" Compute the median along the specified axis, while ignoring NaNs. @@ -273,7 +273,7 @@ def nanmedian(x, axis=None, keepdim=True, name=None): in the output Tensor. If ``keepdim`` is True, the dimensions of the output Tensor is the same as ``x`` except in the reduced dimensions(it is of size 1 in this case). Otherwise, the shape of - the output Tensor is squeezed in ``axis`` . Default is True. + the output Tensor is squeezed in ``axis`` . Default is False. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. @@ -287,16 +287,16 @@ def nanmedian(x, axis=None, keepdim=True, name=None): x = paddle.to_tensor([[float('nan'), 2. , 3. ], [0. , 1. , 2. ]]) y1 = x.nanmedian() - # y1 is [[2.]] + # y1 is 2. y2 = x.nanmedian(0) - # y2 is [[0., 1.5, 2.5]] + # y2 is [0., 1.5, 2.5] - y3 = x.nanmedian(0, keepdim=False) - # y3 is [0., 1.5, 2.5] + y3 = x.nanmedian(0, keepdim=True) + # y3 is [[0., 1.5, 2.5]] y4 = x.nanmedian((0, 1)) - # y4 is [[2.]] + # y4 is 2. """ if not isinstance(x, Variable): raise TypeError("In median, the input x should be a Tensor.") @@ -304,7 +304,6 @@ def nanmedian(x, axis=None, keepdim=True, name=None): if isinstance(axis, (list, tuple)) and len(axis) == 0: raise ValueError("Axis list should not be empty.") - dims = len(x.shape) if axis is None: axis = [] elif isinstance(axis, tuple): @@ -312,24 +311,6 @@ def nanmedian(x, axis=None, keepdim=True, name=None): elif isinstance(axis, int): axis = [axis] - if not isinstance(axis, list): - raise ValueError( - "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." - ) - - for i in range(len(axis)): - if not isinstance(axis[i], int) or not ( - axis[i] < dims and axis[i] >= -dims - ): - raise ValueError( - "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))." - ) - if axis[i] < 0: - axis[i] += dims - - if len(axis) != len(set(axis)): - raise ValueError("Axis has duplicated elements.") - if in_dynamic_mode(): return _C_ops.nanmedian(x, axis, keepdim) else: diff --git a/test/legacy_test/test_nanmedian.py b/test/legacy_test/test_nanmedian.py index 88950271d32b9..8b5f86232f0e3 100644 --- a/test/legacy_test/test_nanmedian.py +++ b/test/legacy_test/test_nanmedian.py @@ -125,6 +125,7 @@ def test_data_case(data): pd_res = paddle.nanmedian( paddle.to_tensor(data), keepdim=keep_dim ) + assert np_res.shape == pd_res.numpy().shape np.testing.assert_allclose( np_res, pd_res.numpy(), rtol=1e-05, equal_nan=True ) @@ -187,6 +188,23 @@ def test_check_grad(self): x_np[0, :] = np.nan x_np[1, :3] = np.nan x_np[2, 3:] = np.nan + + x_tensor = paddle.to_tensor(x_np, stop_gradient=False) + y = paddle.nanmedian(x_tensor, keepdim=True) + dx = paddle.grad(y, x_tensor)[0].numpy() + + np_grad = np.zeros(shape) + np_grad[1, 3] = 0.5 + np_grad[3, 2] = 0.5 + np.testing.assert_allclose(np_grad, dx, rtol=1e-05, equal_nan=True) + + def test_check_grad_axis(self): + paddle.disable_static(place=self.place) + shape = (4, 5) + x_np = np.random.uniform(-1, 1, shape).astype(np.float64) + x_np[0, :] = np.nan + x_np[1, :3] = np.nan + x_np[2, 3:] = np.nan x_np_sorted = np.sort(x_np) nan_counts = np.count_nonzero(np.isnan(x_np).astype(np.int32), axis=1) np_grad = np.zeros(shape) @@ -205,10 +223,25 @@ def test_check_grad(self): np_grad[i, j] = 1 if is_odd else 0.5 x_tensor = paddle.to_tensor(x_np, stop_gradient=False) - y = paddle.nanmedian(x_tensor, axis=1, keepdim=True) + y = paddle.nanmedian(x_tensor, axis=1) dx = paddle.grad(y, x_tensor)[0].numpy() np.testing.assert_allclose(np_grad, dx, rtol=1e-05, equal_nan=True) + def test_check_grad_0d(self): + paddle.disable_static(place=self.place) + x = paddle.rand([]) + x.stop_gradient = False + y = paddle.nanmedian(x) + y.backward() + self.assertEqual(x.grad.shape, []) + np.testing.assert_allclose(x.grad, np.array(1.0)) + + x = paddle.to_tensor(float('nan'), stop_gradient=False) + y = paddle.nanmedian(x) + y.backward() + self.assertEqual(x.grad.shape, []) + np.testing.assert_allclose(x.grad, np.array(0.0)) + if __name__ == "__main__": unittest.main() diff --git a/test/legacy_test/test_zero_dim_tensor.py b/test/legacy_test/test_zero_dim_tensor.py index 63b3e77c07d02..210906e87a960 100644 --- a/test/legacy_test/test_zero_dim_tensor.py +++ b/test/legacy_test/test_zero_dim_tensor.py @@ -28,6 +28,14 @@ unary_api_list = [ paddle.nn.functional.elu, + paddle.nn.functional.rrelu, + paddle.frac, + paddle.sgn, + paddle.nan_to_num, + paddle.i0, + paddle.i0e, + paddle.i1, + paddle.i1e, paddle.nn.functional.gelu, paddle.nn.functional.hardsigmoid, paddle.nn.functional.hardswish, @@ -95,9 +103,15 @@ paddle.nn.functional.alpha_dropout, ] -inplace_api_list = [ +inplace_unary_api_list = [ paddle.nn.functional.relu_, paddle.nn.functional.tanh_, + paddle.tensor.sigmoid_, + paddle.tensor.ceil_, + paddle.tensor.floor_, + paddle.tensor.reciprocal_, + paddle.tensor.exp_, + paddle.tensor.sqrt_, ] @@ -119,7 +133,7 @@ def test_dygraph_unary(self): self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, []) - for api in inplace_api_list: + for api in inplace_unary_api_list: x = paddle.rand([]) out = api(x) self.assertEqual(x.shape, []) @@ -165,6 +179,8 @@ def test_static_unary(self): paddle.mean, paddle.nansum, paddle.nanmean, + paddle.median, + paddle.nanmedian, paddle.min, paddle.max, paddle.amin, @@ -173,6 +189,7 @@ def test_static_unary(self): paddle.logsumexp, paddle.all, paddle.any, + paddle.count_nonzero, ] @@ -187,18 +204,20 @@ def test_dygraph_reduce(self): else: x = paddle.rand([]) x.stop_gradient = False - out = api(x, None) + out = api(x, axis=None) out.retain_grads() out.backward() self.assertEqual(x.shape, []) self.assertEqual(out.shape, []) - np.testing.assert_allclose(out.numpy(), x.numpy()) + if api not in [paddle.count_nonzero]: + np.testing.assert_allclose(out.numpy(), x.numpy()) - out_empty_list = api(x, []) - self.assertEqual(out_empty_list, out) - self.assertEqual(out_empty_list.shape, []) + if api not in [paddle.median, paddle.nanmedian]: + out_empty_list = api(x, axis=[]) + self.assertEqual(out_empty_list, out) + self.assertEqual(out_empty_list.shape, []) if x.grad is not None: self.assertEqual(x.grad.shape, []) @@ -206,12 +225,12 @@ def test_dygraph_reduce(self): np.testing.assert_allclose(x.grad.numpy(), np.array(1.0)) np.testing.assert_allclose(out.grad.numpy(), np.array(1.0)) - out1 = api(x, 0) + out1 = api(x, axis=0) self.assertEqual(out1.shape, []) self.assertEqual(out1, out) out1.backward() - out2 = api(x, -1) + out2 = api(x, axis=-1) self.assertEqual(out2.shape, []) self.assertEqual(out2, out) out2.backward() @@ -220,13 +239,28 @@ def test_dygraph_reduce(self): self.assertEqual(x.grad.shape, []) np.testing.assert_allclose(x.grad.numpy(), np.array(3.0)) - # 2) x is ND, reduce to 0D + # 2) x is 1D, axis=0, reduce to 0D + if api in [paddle.all, paddle.any]: + x = paddle.randint(0, 2, [5]).astype('bool') + else: + x = paddle.rand([5]) + x.stop_gradient = False + out = api(x, axis=0) + out.retain_grads() + out.backward() + + self.assertEqual(out.shape, []) + if x.grad is not None: + self.assertEqual(out.grad.shape, []) + self.assertEqual(x.grad.shape, [5]) + + # 3) x is ND, reduce to 0D if api in [paddle.all, paddle.any]: x = paddle.randint(0, 2, [3, 5]).astype('bool') else: x = paddle.rand([3, 5]) x.stop_gradient = False - out = api(x, None) + out = api(x, axis=None) out.retain_grads() out.backward() @@ -235,20 +269,20 @@ def test_dygraph_reduce(self): self.assertEqual(out.grad.shape, []) self.assertEqual(x.grad.shape, [3, 5]) - # 3) x is 1D, axis=0, reduce to 0D + # 4) x is ND, reduce to 0D, keepdim=True if api in [paddle.all, paddle.any]: - x = paddle.randint(0, 2, [5]).astype('bool') + x = paddle.randint(0, 2, [3, 5]).astype('bool') else: - x = paddle.rand([5]) + x = paddle.rand([3, 5]) x.stop_gradient = False - out = api(x, 0) + out = api(x, keepdim=True) out.retain_grads() out.backward() - self.assertEqual(out.shape, []) + self.assertEqual(out.shape, [1, 1]) if x.grad is not None: - self.assertEqual(out.grad.shape, []) - self.assertEqual(x.grad.shape, [5]) + self.assertEqual(out.grad.shape, [1, 1]) + self.assertEqual(x.grad.shape, [3, 5]) paddle.enable_static() @@ -267,16 +301,17 @@ def test_static_reduce(self): else: x = paddle.rand([]) x.stop_gradient = False - out = api(x, None) + out = api(x, axis=None) paddle.static.append_backward(out) - out_empty_list = api(x, None) - self.assertEqual(out_empty_list.shape, ()) + if api not in [paddle.median, paddle.nanmedian]: + out_empty_list = api(x, axis=[]) + self.assertEqual(out_empty_list.shape, ()) - out1 = api(x, 0) + out1 = api(x, axis=0) self.assertEqual(out1.shape, ()) - out2 = api(x, -1) + out2 = api(x, axis=-1) self.assertEqual(out2.shape, ()) fetch_list = [x, out] @@ -286,7 +321,8 @@ def test_static_reduce(self): res = exe.run(main_prog, fetch_list=fetch_list) self.assertEqual(res[0].shape, ()) self.assertEqual(res[1].shape, ()) - np.testing.assert_allclose(res[0], res[1]) + if api not in [paddle.count_nonzero]: + np.testing.assert_allclose(res[0], res[1]) if len(res) > 2: self.assertEqual(res[2].shape, ()) @@ -300,7 +336,7 @@ def test_static_reduce(self): else: x = paddle.rand([3, 5]) x.stop_gradient = False - out = api(x, None) + out = api(x, axis=None) paddle.static.append_backward(out) fetch_list = [out] @@ -319,7 +355,7 @@ def test_static_reduce(self): else: x = paddle.rand([5]) x.stop_gradient = False - out = api(x, 0) + out = api(x, axis=0) paddle.static.append_backward(out) fetch_list = [out] @@ -359,6 +395,11 @@ def test_static_reduce(self): paddle.fmin, paddle.complex, paddle.kron, + paddle.logaddexp, + paddle.nextafter, + paddle.ldexp, + paddle.polar, + paddle.heaviside, ] binary_int_api_list = [ @@ -370,6 +411,15 @@ def test_static_reduce(self): ] +inplace_binary_api_list = [ + paddle.tensor.add_, + paddle.tensor.subtract_, + paddle.tensor.multiply_, + paddle.tensor.remainder_, + paddle.tensor.remainder_, +] + + # Use to test zero-dim of binary API class TestBinaryAPI(unittest.TestCase): def test_dygraph_binary(self): @@ -497,6 +547,20 @@ def test_dygraph_binary(self): self.assertEqual(out.shape, [3, 5]) np.testing.assert_array_equal(out.numpy(), out_np) + for api in inplace_binary_api_list: + with paddle.no_grad(): + x = paddle.rand([]) + y = paddle.rand([]) + out = api(x, y) + self.assertEqual(x.shape, []) + self.assertEqual(out.shape, []) + + x = paddle.rand([3, 5]) + y = paddle.rand([]) + out = api(x, y) + self.assertEqual(x.shape, [3, 5]) + self.assertEqual(out.shape, [3, 5]) + paddle.enable_static() def test_static_binary(self): @@ -640,6 +704,65 @@ def setUp(self): paddle.disable_static() self.x = paddle.rand([]) + def test_polygamma(self): + x = paddle.rand([]) + x.stop_gradient = False + out = paddle.polygamma(x, 2) + out.backward() + + self.assertEqual(out.shape, []) + self.assertEqual(x.grad.shape, []) + + def test_frexp(self): + x = paddle.rand([]) + x.stop_gradient = False + out1, out2 = paddle.frexp(x) + out1.backward() + + self.assertEqual(out1.shape, []) + self.assertEqual(out2.shape, []) + self.assertEqual(x.grad.shape, []) + + def test_pairwise_distance(self): + x = paddle.rand([5]) + x.stop_gradient = False + y = paddle.rand([5]) + y.stop_gradient = False + + out = paddle.nn.functional.pairwise_distance(x, y) + out.backward() + self.assertEqual(out.shape, []) + self.assertEqual(x.grad.shape, [5]) + + def test_take(self): + x = paddle.rand([4, 5]) + x.stop_gradient = False + out = paddle.take(x, paddle.to_tensor(2)) + out.backward() + + self.assertEqual(out.shape, []) + self.assertEqual(x.grad.shape, [4, 5]) + np.testing.assert_allclose(x.grad[0, 2], 1.0) + + x = paddle.rand([]) + x.stop_gradient = False + out = paddle.take(x, paddle.to_tensor(0)) + out.backward() + + self.assertEqual(out.shape, []) + np.testing.assert_allclose(out, x) + self.assertEqual(x.grad.shape, []) + np.testing.assert_allclose(x.grad.numpy(), 1.0) + + def test_trapezoid(self): + y = paddle.rand([5]) + y.stop_gradient = False + out = paddle.trapezoid(y, dx=2.0) + out.backward() + + self.assertEqual(out.shape, []) + self.assertEqual(y.grad.shape, [5]) + def test_create_parameter_var(self): zero_dim_param = paddle.create_parameter(shape=[], dtype='float32') self.assertEqual(zero_dim_param.shape, []) @@ -1096,54 +1219,6 @@ def test_argmax(self): out = paddle.argmax(x, keepdim=True) self.assertEqual(out.shape, [1, 1]) - def test_median(self): - # 1) x is 0D - x = paddle.rand([]) - x.stop_gradient = False - out1 = paddle.median(x, 0) - out2 = paddle.median(x, -1) - out3 = paddle.median(x, None) - - out1.backward() - out2.backward() - out3.backward() - - self.assertEqual(out1.shape, []) - np.testing.assert_allclose(out1, x) - - self.assertEqual(out2.shape, []) - np.testing.assert_allclose(out2, x) - - self.assertEqual(out3.shape, []) - np.testing.assert_allclose(out3, x) - - self.assertEqual(x.grad.shape, []) - np.testing.assert_allclose(x.grad, 3.0) - - # 2) x is 1D - x = paddle.rand([5]) - x.stop_gradient = False - out = paddle.median(x, 0) - out.backward() - self.assertEqual(out.shape, []) - self.assertEqual(x.grad.shape, [5]) - - # 3) x is ND - x = paddle.rand([3, 5]) - x.stop_gradient = False - out = paddle.median(x, None) - out.backward() - self.assertEqual(out.shape, []) - self.assertEqual(x.grad.shape, [3, 5]) - - # 4) x is ND, keepdim=True - x = paddle.rand([3, 5]) - x.stop_gradient = False - out = paddle.median(x, keepdim=True) - out.backward() - self.assertEqual(out.shape, [1, 1]) - self.assertEqual(x.grad.shape, [3, 5]) - def test_kthvalue(self): # 1) x is 0D x = paddle.randn([]) @@ -1432,6 +1507,40 @@ def test_quantile(self): self.assertEqual(out.grad, 1.0) self.assertEqual(x.grad.shape, [2, 3]) + def test_nanquantile(self): + # 1) x is 0D + x = paddle.rand([]) + x.stop_gradient = False + out = paddle.quantile(x, 0.5, axis=None) + + out.retain_grads() + out.backward() + + out_empty_list = paddle.quantile(x, 0.5, axis=[]) + self.assertEqual(out_empty_list, out) + + self.assertEqual(x.shape, []) + self.assertEqual(out.shape, []) + self.assertEqual(out, x) + + self.assertEqual(x.grad.shape, []) + self.assertEqual(x.grad, 1.0) + self.assertEqual(out.grad.shape, []) + self.assertEqual(out.grad, 1.0) + + # 2) x is ND with 'nan' + x = paddle.to_tensor([[float('nan'), 2.0, 3.0], [0.0, 1.0, 2.0]]) + x.stop_gradient = False + out = paddle.quantile(x, 0.5, axis=None) + + out.retain_grads() + out.backward() + + self.assertEqual(out.shape, []) + self.assertEqual(out.grad.shape, []) + self.assertEqual(out.grad, 1.0) + self.assertEqual(x.grad.shape, [2, 3]) + def test_flip(self): x = paddle.rand([]) x.stop_gradient = False @@ -1965,6 +2074,25 @@ def test_cumsum(self): self.assertEqual(out3.grad.shape, []) self.assertTrue(out3.grad.numpy() == 1) + def test_logcumsumexp(self): + x = paddle.rand([]) + x.stop_gradient = False + + out1 = paddle.logcumsumexp(x) + out2 = paddle.logcumsumexp(x, axis=0) + out3 = paddle.logcumsumexp(x, axis=-1) + + out1.backward() + out2.backward() + out3.backward() + + self.assertEqual(out1.shape, [1]) + self.assertEqual(out2.shape, []) + self.assertEqual(out3.shape, []) + + self.assertEqual(x.grad.shape, []) + self.assertTrue(x.grad.numpy() == 3) + def test_add_n(self): x1 = paddle.rand([]) x1.stop_gradient = False @@ -2654,6 +2782,15 @@ def test_cov(self): self.assertEqual(xt_1_out.shape, []) self.assertEqual(xt_1.grad.shape, [12]) + def test_corrcoef(self): + x = paddle.randn((12,)) + x.stop_gradient = False + out = paddle.linalg.corrcoef(x) + out.backward() + + self.assertEqual(out.shape, []) + self.assertEqual(x.grad.shape, [12]) + def test_det(self): xt = paddle.randn([3, 3, 3]) xt.stop_gradient = False @@ -2852,6 +2989,81 @@ def setUp(self): paddle.enable_static() self.exe = paddle.static.Executor() + @prog_scope() + def test_polygamma(self): + x = paddle.rand([]) + x.stop_gradient = False + out = paddle.polygamma(x, 2) + paddle.static.append_backward(out) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, ()) + + @prog_scope() + def test_frexp(self): + x = paddle.rand([]) + x.stop_gradient = False + out1, out2 = paddle.frexp(x) + paddle.static.append_backward(out1) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out1, out2, x.grad_name]) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, ()) + self.assertEqual(res[2].shape, ()) + + @prog_scope() + def test_pairwise_distance(self): + x = paddle.rand([5]) + x.stop_gradient = False + y = paddle.rand([5]) + y.stop_gradient = False + + out = paddle.nn.functional.pairwise_distance(x, y) + paddle.static.append_backward(out) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name, y.grad_name]) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, (5,)) + self.assertEqual(res[2].shape, (5,)) + + @prog_scope() + def test_take(self): + x1 = paddle.rand([4, 5]) + x1.stop_gradient = False + out1 = paddle.take(x1, paddle.to_tensor(2)) + paddle.static.append_backward(out1) + + x2 = paddle.rand([]) + x2.stop_gradient = False + out2 = paddle.take(x2, paddle.to_tensor(0)) + paddle.static.append_backward(out2) + + prog = paddle.static.default_main_program() + res = self.exe.run( + prog, fetch_list=[out1, x1.grad_name, out2, x2.grad_name] + ) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, (4, 5)) + self.assertEqual(res[2].shape, ()) + self.assertEqual(res[3].shape, ()) + np.testing.assert_allclose(res[3], 1.0) + + @prog_scope() + def test_trapezoid(self): + y = paddle.rand([5]) + y.stop_gradient = False + out = paddle.trapezoid(y, dx=2.0) + paddle.static.append_backward(out) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, y.grad_name]) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, (5,)) + @prog_scope() def test_create_parameter_var(self): zero_dim_param = paddle.create_parameter(shape=[], dtype='float32') @@ -3236,40 +3448,6 @@ def test_argmax(self): np.testing.assert_allclose(res[2], 0.0) self.assertEqual(res[3].shape, ()) - @prog_scope() - def test_median(self): - # 1) x is 0D - x = paddle.rand([]) - x.stop_gradient = False - out = paddle.median(x) - paddle.static.append_backward(out) - - # 2) x is ND - x1 = paddle.rand([3, 5]) - x1.stop_gradient = False - out1 = paddle.median(x1) - paddle.static.append_backward(out1) - - prog = paddle.static.default_main_program() - res = self.exe.run( - prog, - fetch_list=[ - x, - out, - x.grad_name, - out1, - x1.grad_name, - ], - ) - self.assertEqual(res[1].shape, ()) - np.testing.assert_allclose(res[1], res[0]) - - self.assertEqual(res[2].shape, ()) - np.testing.assert_allclose(res[2], 1.0) - - self.assertEqual(res[3].shape, ()) - self.assertEqual(res[4].shape, (3, 5)) - @prog_scope() def test_kthvalue(self): # 1) x is 0D @@ -3607,12 +3785,12 @@ def test_quantile(self): x1 = paddle.rand([]) x1.stop_gradient = False out1 = paddle.quantile(x1, 0.5, axis=None) - paddle.static.append_backward(out1.sum()) + paddle.static.append_backward(out1) x2 = paddle.rand([2, 3]) x2.stop_gradient = False out2 = paddle.quantile(x2, 0.5, axis=None) - paddle.static.append_backward(out2.sum()) + paddle.static.append_backward(out2) out_empty_list = paddle.quantile(x1, 0.5, axis=[]) self.assertEqual(out_empty_list.shape, ()) @@ -3640,6 +3818,37 @@ def test_quantile(self): self.assertEqual(res[5].shape, ()) self.assertEqual(res[5], 1.0) + @prog_scope() + def test_nanquantile(self): + # 1) x is 0D + x1 = paddle.rand([]) + x1.stop_gradient = False + out1 = paddle.nanquantile(x1, 0.5, axis=None) + paddle.static.append_backward(out1) + + # 2) x is ND with 'nan' + x2 = paddle.to_tensor([[float('nan'), 2.0, 3.0], [0.0, 1.0, 2.0]]) + x2.stop_gradient = False + out2 = paddle.nanquantile(x2, 0.5, axis=None) + print(out2) + paddle.static.append_backward(out2) + + prog = paddle.static.default_main_program() + res = self.exe.run( + prog, + fetch_list=[ + out1, + x1.grad_name, + out2, + x2.grad_name, + ], + ) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, ()) + + self.assertEqual(res[2].shape, ()) + self.assertEqual(res[3].shape, (2, 3)) + @prog_scope() def test_flip(self): x = paddle.rand([]) @@ -4108,16 +4317,45 @@ def test_cumsum(self): self.assertEqual(res[1].shape, ()) self.assertEqual(res[2].shape, ()) self.assertEqual(res[3].shape, ()) - self.assertEqual(res[3], 1) + self.assertEqual(res[3], 1.0) self.assertEqual(res[4].shape, (1,)) - self.assertEqual(res[4], 1) + self.assertEqual(res[4], 1.0) self.assertEqual(res[5].shape, ()) - self.assertEqual(res[5], 1) + self.assertEqual(res[5], 1.0) self.assertEqual(res[6].shape, ()) - self.assertEqual(res[6], 1) + self.assertEqual(res[6], 1.0) self.assertEqual(out2.shape, ()) self.assertEqual(out3.shape, ()) + @prog_scope() + def test_logcumsumexp(self): + x = paddle.rand([]) + x.stop_gradient = False + + out1 = paddle.logcumsumexp(x) + out2 = paddle.logcumsumexp(x, axis=0) + out3 = paddle.logcumsumexp(x, axis=-1) + + paddle.static.append_backward(out1) + paddle.static.append_backward(out2) + paddle.static.append_backward(out3) + + prog = paddle.static.default_main_program() + res = self.exe.run( + prog, + fetch_list=[ + out1, + out2, + out3, + x.grad_name, + ], + ) + self.assertEqual(res[0].shape, (1,)) + self.assertEqual(res[1].shape, ()) + self.assertEqual(res[2].shape, ()) + self.assertEqual(res[3].shape, ()) + self.assertEqual(res[3], 1.0) + @prog_scope() def test_add_n(self): x1 = paddle.rand([]) @@ -4986,11 +5224,22 @@ def test_cov(self): paddle.static.append_backward(out) prog = paddle.static.default_main_program() - res = self.exe.run(prog, fetch_list=[out, xt_1.grad_name]) self.assertEqual(res[0].shape, ()) self.assertEqual(res[1].shape, (12,)) + @prog_scope() + def test_corrcoef(self): + x = paddle.randn((12,)) + x.stop_gradient = False + out = paddle.linalg.corrcoef(x) + paddle.static.append_backward(out) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out, x.grad_name]) + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, (12,)) + @prog_scope() def test_det(self): xt_1 = paddle.randn((3, 3)) @@ -5261,6 +5510,14 @@ def test_linspace(self): out = paddle.linspace(start, stop, num) np.testing.assert_array_equal(out.numpy(), [1.0, 2.0, 3.0, 4.0, 5.0]) + def test_logspace(self): + start = paddle.full([], 1.0) + stop = paddle.full([], 3.0) + num = paddle.full([], 5, 'int32') + base = paddle.full([], 2.0) + out = paddle.logspace(start, stop, num, base) + self.assertEqual(out.shape, [5]) + def test_arange(self): start = paddle.full([], 1.0) stop = paddle.full([], 6.0) @@ -5883,6 +6140,49 @@ class TestDistribution(unittest.TestCase): def setUp(self): self.x = paddle.full([], 2.0) + def test_Bernoulli(self): + d = paddle.distribution.Bernoulli(probs=0.3) + self.assertEqual(d.mean.shape, []) + self.assertEqual(d.variance.shape, []) + self.assertEqual(d.entropy().shape, []) + self.assertEqual(d.sample([]).shape, []) + self.assertEqual(d.rsample([]).shape, []) + self.assertEqual(d.cdf(self.x).shape, []) + self.assertEqual(d.prob(self.x).shape, []) + self.assertEqual(d.log_prob(self.x).shape, []) + + d_other = paddle.distribution.Bernoulli(probs=0.7) + self.assertEqual(d.kl_divergence(d_other).shape, []) + + def test_Geometric(self): + d = paddle.distribution.Geometric(0.5) + self.assertEqual(d.mean.shape, []) + self.assertEqual(d.variance.shape, []) + self.assertEqual(d.entropy().shape, []) + self.assertEqual(d.stddev.shape, []) + self.assertEqual(d.pmf(self.x).shape, []) + self.assertEqual(d.log_pmf(self.x).shape, []) + self.assertEqual(d.sample([]).shape, []) + self.assertEqual(d.rsample([]).shape, []) + self.assertEqual(d.cdf(self.x).shape, []) + + d_other = paddle.distribution.Geometric(probs=0.7) + self.assertEqual(d.kl_divergence(d_other).shape, []) + + def test_Cauchy(self): + d = paddle.distribution.Cauchy(loc=0.1, scale=1.2) + self.assertEqual(d.sample([]).shape, []) + self.assertEqual(d.rsample([]).shape, []) + self.assertEqual(d.prob(self.x).shape, []) + self.assertEqual(d.log_prob(self.x).shape, []) + self.assertEqual(d.cdf(self.x).shape, []) + self.assertEqual(d.entropy().shape, []) + + d_other = paddle.distribution.Cauchy( + loc=paddle.to_tensor(1.2), scale=paddle.to_tensor(2.3) + ) + self.assertEqual(d.kl_divergence(d_other).shape, []) + def test_Categorical(self): logits = paddle.rand([6]) d = paddle.distribution.Categorical(logits)