From 35de47b3fc0fadfe317eec9500238d627bcc4b3f Mon Sep 17 00:00:00 2001
From: zhwesky2010 <1183042833@qq.com>
Date: Wed, 14 Jun 2023 20:50:29 +0800
Subject: [PATCH] [cherry-pick 2.5][Zero-Dim]
 paddle.nanmedian/count_nonzero/logspace support 0D, add some 0D case (#54649)

* [Zero-Dim] add 0D test case (#54581)

* [Zero-Dim] paddle.nanmedian/nanquantile support 0D Tensor (#54500)

* [Zero-Dim] paddle.nanmedian support 0D Tensor

* fix CI
---
 paddle/phi/infermeta/multiary.cc              |  38 +-
 paddle/phi/infermeta/unary.cc                 |  50 +-
 .../phi/kernels/cpu/nanmedian_grad_kernel.cc  |  73 ++-
 paddle/phi/kernels/cpu/nanmedian_kernel.cc    |  69 +--
 .../nanmedian_utils.h}                        |  45 +-
 .../phi/kernels/gpu/nanmedian_grad_kernel.cu  |  81 +--
 paddle/phi/kernels/gpu/nanmedian_kernel.cu    |  69 +--
 .../kernels/impl/nanmedian_grad_kernel_impl.h |  65 ---
 python/paddle/nn/functional/distance.py       |   4 +-
 python/paddle/tensor/creation.py              |   8 +-
 python/paddle/tensor/math.py                  |  15 +-
 python/paddle/tensor/stat.py                  |  33 +-
 test/legacy_test/test_nanmedian.py            |  35 +-
 test/legacy_test/test_zero_dim_tensor.py      | 530 ++++++++++++++----
 14 files changed, 693 insertions(+), 422 deletions(-)
 rename paddle/phi/kernels/{impl/nanmedian_kernel_impl.h => funcs/nanmedian_utils.h} (61%)
 delete mode 100644 paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h

diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc
index 8b55e87aaa4f5..8fc3b285486d4 100644
--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -2162,32 +2162,32 @@ void LogspaceInferMeta(const MetaTensor& start,
                        MetaTensor* out) {
   auto s_dims = start.dims();
   PADDLE_ENFORCE_EQ(
-      (s_dims.size() == 1) && (s_dims[0] == 1),
-      true,
-      phi::errors::InvalidArgument("The shape of Input(Start) must be [1],"
-                                   "but received input shape is [%s].",
-                                   s_dims));
+      phi::product(s_dims),
+      1,
+      phi::errors::InvalidArgument("The size of Input(Start) must be 1,"
+                                   "but received input size is %s.",
+                                   phi::product(s_dims)));
   auto e_dims = stop.dims();
   PADDLE_ENFORCE_EQ(
-      (e_dims.size() == 1) && (e_dims[0] == 1),
+      phi::product(e_dims),
       true,
-      phi::errors::InvalidArgument("The shape of Input(Stop) must be [1],"
-                                   "but received input shape is [%s].",
-                                   e_dims));
+      phi::errors::InvalidArgument("The size of Input(Stop) must be 1,"
+                                   "but received input size is %s.",
+                                   phi::product(e_dims)));
   auto num_dims = number.dims();
   PADDLE_ENFORCE_EQ(
-      (num_dims.size() == 1) && (num_dims[0] == 1),
+      phi::product(num_dims),
       true,
-      phi::errors::InvalidArgument("The shape of Input(Num) must be [1],"
-                                   "but received input shape is [%s].",
-                                   num_dims));
+      phi::errors::InvalidArgument("The size of Input(Num) must be 1,"
+                                   "but received input size is %s.",
+                                   phi::product(num_dims)));
   auto b_dims = base.dims();
-  PADDLE_ENFORCE_EQ(
-      (b_dims.size() == 1) && (b_dims[0] == 1),
-      true,
-      phi::errors::InvalidArgument("The shape of Input(Base) must be [1],"
-                                   "but received input shape is [%s].",
-                                   b_dims));
+  PADDLE_ENFORCE_EQ(phi::product(b_dims),
+                    true,
+                    phi::errors::InvalidArgument(
+                        "The size of Input(Base) must be 1,"
+                        "but received input size is phi::product(b_dims).",
+                        phi::product(b_dims)));
   out->set_dims(phi::make_ddim({-1}));
   out->set_dtype(dtype);
 }
diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc
index e43e945f3750c..764ca8bf7811e 100644
--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -2260,37 +2260,47 @@ void NanmedianInferMeta(const MetaTensor& x,
       for (int64_t i = 0; i < x_rank; i++) {
         out_dim.push_back(1);
       }
-    } else {
-      out_dim.push_back(1);
     }
   } else {
-    std::vector<int64_t> cleaned_axis;
+    std::vector<int64_t> formated_axis;
     for (auto& axis : axis_list) {
+      if (x_rank == 0) {
+        PADDLE_ENFORCE_EQ(axis == 0 || axis == -1,
+                          true,
+                          phi::errors::InvalidArgument(
+                              "When input 0D Tensor, each element of the axis "
+                              "can only be -1, 0, None"));
+      } else {
+        PADDLE_ENFORCE_LT(axis,
+                          x_rank,
+                          errors::InvalidArgument(
+                              "each element of the axis should be in the "
+                              "range [ -dimension(X), dimension(X) ) "
+                              "which dimesion = %d. But received axis = %d.",
+                              x_rank,
+                              axis));
+        PADDLE_ENFORCE_GE(axis,
+                          -x_rank,
+                          errors::InvalidArgument(
+                              "each element of the axis should be in the "
+                              "range [ -dimension(X), dimension(X) ) "
+                              "which dimesion = %d. But received axis = %d.",
+                              x_rank,
+                              axis));
+      }
       if (axis < 0) axis += x_rank;
-
-      PADDLE_ENFORCE_LT(
-          axis,
-          x_rank,
-          errors::InvalidArgument(
-              "Attr(axis) value should be in range [-R, R-1], R is "
-              "the rank of Input(X). But received axis: %d, R: %d. "
-              "Current Input(X)'s shape is=[%s].",
-              axis,
-              x_rank,
-              x_dim));
-
       PADDLE_ENFORCE_EQ(
-          std::find(cleaned_axis.begin(), cleaned_axis.end(), axis),
-          cleaned_axis.end(),
+          std::find(formated_axis.begin(), formated_axis.end(), axis),
+          formated_axis.end(),
           errors::InvalidArgument("Attr(axes) has duplicated elements: %d.",
                                   static_cast<int>(axis)));
 
-      cleaned_axis.push_back(axis);
+      formated_axis.push_back(axis);
     }
 
     for (int64_t i = 0; i < x_rank; i++) {
-      if (std::find(cleaned_axis.begin(), cleaned_axis.end(), i) ==
-          cleaned_axis.end()) {
+      if (std::find(formated_axis.begin(), formated_axis.end(), i) ==
+          formated_axis.end()) {
         out_dim.push_back(x_dim[i]);
       } else if (keep_dim) {
         out_dim.push_back(1);
diff --git a/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
index bce234375e529..5a858a66e65b9 100644
--- a/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/nanmedian_grad_kernel.cc
@@ -17,7 +17,7 @@
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h"
+#include "paddle/phi/kernels/funcs/nanmedian_utils.h"
 
 namespace phi {
 
@@ -26,67 +26,64 @@ void CalcMedianGradKernel(const Context& dev_ctx,
                           const DenseTensor& x,
                           const DenseTensor& median_index,
                           const DenseTensor& out_grad,
-                          const IntArray& axes UNUSED,
-                          DenseTensor* x_grad,
-                          T* x_grad_ptr) {
+                          DenseTensor* x_grad) {
+  T* dx_data = dev_ctx.template Alloc<T>(x_grad);
+  if (!dx_data) return;
+
   phi::funcs::SetConstant<Context, T> set_zero;
   set_zero(dev_ctx, x_grad, static_cast<T>(0));
-  if (!x_grad_ptr) return;
 
-  const int64_t* m_ptr = median_index.data<int64_t>();
-  const T* out_grad_ptr = out_grad.data<T>();
+  const int64_t* m_data = median_index.data<int64_t>();
+  const T* dout_data = out_grad.data<T>();
   int64_t numel = x.numel();
   auto x_dim = x.dims();
   int64_t rank = x_dim.size();
   int64_t stride = x_dim[rank - 1];
-
   int64_t pre_dim = numel / stride;
+
   int64_t i = 0;
   int64_t offset = 0;
-  T div_factor = static_cast<T>(2.0);
   for (i = 0; i < pre_dim; i++) {
-    if (m_ptr[2 * i] >= 0) {
-      if (m_ptr[2 * i] == m_ptr[2 * i + 1]) {
-        x_grad_ptr[offset + m_ptr[2 * i]] = out_grad_ptr[i];
+    if (m_data[2 * i] >= 0) {
+      if (m_data[2 * i] == m_data[2 * i + 1]) {
+        dx_data[offset + m_data[2 * i]] = dout_data[i];
       } else {
-        x_grad_ptr[offset + m_ptr[2 * i]] = out_grad_ptr[i] / div_factor;
-        x_grad_ptr[offset + m_ptr[2 * i + 1]] = out_grad_ptr[i] / div_factor;
+        dx_data[offset + m_data[2 * i]] = dout_data[i] / static_cast<T>(2.0);
+        dx_data[offset + m_data[2 * i + 1]] =
+            dout_data[i] / static_cast<T>(2.0);
       }
     }
     offset += stride;
   }
 }
 
-template <typename T, typename Context>
-void BaseMedianGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& median_index,
-                          const DenseTensor& out_grad,
-                          const IntArray& axes,
-                          DenseTensor* x_grad) {
-  auto rank = x.dims().size();
-  T* x_grad_ptr = dev_ctx.template Alloc<T>(x_grad);
-  if (axes.size() && (rank > 1)) {
-    DenseTensor tmp_x_grad(*x_grad);
-    CalcMedianGradKernel<T, Context>(
-        dev_ctx, x, median_index, out_grad, axes, &tmp_x_grad, x_grad_ptr);
-    PostprocessMedianGradKernel<T, Context>(dev_ctx, &tmp_x_grad, axes, x_grad);
-  } else {
-    CalcMedianGradKernel<T, Context>(
-        dev_ctx, x, median_index, out_grad, axes, x_grad, x_grad_ptr);
-  }
-}
-
 template <typename T, typename Context>
 void NanmedianGradKernel(const Context& dev_ctx,
-                         const DenseTensor& input,
+                         const DenseTensor& x,
                          const DenseTensor& median_index,
                          const DenseTensor& out_grad,
                          const IntArray& axes,
-                         bool keep_dim UNUSED,
+                         bool keepdim UNUSED,
                          DenseTensor* x_grad) {
-  BaseMedianGradKernel<T, Context>(
-      dev_ctx, input, median_index, out_grad, axes, x_grad);
+  DenseTensor tmp_x;
+  auto rank = x.dims().size();
+  if ((axes.size() == 0) || rank <= 1) {
+    tmp_x = x;
+    tmp_x.Resize({x.numel()});
+    CalcMedianGradKernel<T, Context>(
+        dev_ctx, tmp_x, median_index, out_grad, x_grad);
+  } else {
+    funcs::PreprocessMedianKernel<T, Context>(dev_ctx, x, axes, &tmp_x);
+
+    DenseTensor tmp_x_grad;
+    tmp_x_grad.Resize(x_grad->dims());
+    CalcMedianGradKernel<T, Context>(
+        dev_ctx, tmp_x, median_index, out_grad, &tmp_x_grad);
+
+    dev_ctx.template Alloc<T>(x_grad);
+    funcs::PostprocessMedianGradKernel<T, Context>(
+        dev_ctx, &tmp_x_grad, axes, x_grad);
+  }
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/cpu/nanmedian_kernel.cc b/paddle/phi/kernels/cpu/nanmedian_kernel.cc
index 660c115f92f89..558d5aaebfef8 100644
--- a/paddle/phi/kernels/cpu/nanmedian_kernel.cc
+++ b/paddle/phi/kernels/cpu/nanmedian_kernel.cc
@@ -16,7 +16,7 @@
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h"
+#include "paddle/phi/kernels/funcs/nanmedian_utils.h"
 #include "paddle/phi/kernels/top_k_kernel.h"
 
 namespace phi {
@@ -31,7 +31,6 @@ void CalcMedianFunc(const Context& dev_ctx,
                     int64_t pre_dim,
                     T* o_ptr,
                     int64_t* m_ptr) {
-  bool should_ignore_nan = ignore_nan;
   DenseTensor sort_out;
   DenseTensor sort_indices;
   auto sort_dim = x.dims();
@@ -52,7 +51,7 @@ void CalcMedianFunc(const Context& dev_ctx,
   int64_t offset = 0;
   int64_t i = 0;
   bool is_ori_odd = stride & 1;
-  if (should_ignore_nan) {
+  if (ignore_nan) {
     for (i = 0; i < pre_dim; i++) {
       offset = i * sort_k;
       if (nan_counts[i] == stride) {
@@ -107,11 +106,11 @@ void CalcMedianFunc(const Context& dev_ctx,
 template <typename T, typename Context>
 void ProcessMedianKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         T* o_ptr,
-                         int64_t* m_ptr,
-                         bool ignore_nan) {
-  bool should_ignore_nan = ignore_nan;
-  const T* x_ptr = x.data<T>();
+                         DenseTensor* out,
+                         DenseTensor* median_index) {
+  const T* x_data = x.data<T>();
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  int64_t* m_data = dev_ctx.template Alloc<int64_t>(median_index);
 
   int64_t numel = x.numel();
   auto x_dim = x.dims();
@@ -122,7 +121,8 @@ void ProcessMedianKernel(const Context& dev_ctx,
 
   int64_t max_valid_num = 0;
   std::vector<int64_t> nan_counts;
-  if (should_ignore_nan) {
+  bool ignore_nan = true;
+  if (ignore_nan) {
     int64_t total_nan_num = 0;
     std::vector<T> col_vec;
     col_vec.reserve(stride);
@@ -133,7 +133,7 @@ void ProcessMedianKernel(const Context& dev_ctx,
     for (int64_t i = 0; i < pre_dim; i++) {
       col_vec.clear();
       col_vec.insert(
-          col_vec.begin(), x_ptr + i * stride, x_ptr + (i + 1) * stride);
+          col_vec.begin(), x_data + i * stride, x_data + (i + 1) * stride);
       nan_counts[i] =
           std::count_if(col_vec.begin(), col_vec.end(), [&](const T& val) {
             return std::isnan(static_cast<float>(val));
@@ -145,47 +145,25 @@ void ProcessMedianKernel(const Context& dev_ctx,
     // all elems are nan
     if (total_nan_num == numel) {
       for (i = 0; i < pre_dim; i++) {
-        o_ptr[i] = x_ptr[0];
-        m_ptr[2 * i] = -1;
-        m_ptr[2 * i + 1] = -1;
+        out_data[i] = std::numeric_limits<T>::quiet_NaN();
+        m_data[2 * i] = -1;
+        m_data[2 * i + 1] = -1;
       }
       return;
     }
-    should_ignore_nan = total_nan_num > 0;
+    ignore_nan = total_nan_num > 0;
   }
 
-  int64_t sort_k = should_ignore_nan ? max_valid_num : ((stride >> 1) + 1);
+  int64_t sort_k = ignore_nan ? max_valid_num : ((stride >> 1) + 1);
   CalcMedianFunc<T, Context>(dev_ctx,
                              x,
                              nan_counts,
-                             should_ignore_nan,
+                             ignore_nan,
                              sort_k,
                              stride,
                              pre_dim,
-                             o_ptr,
-                             m_ptr);
-}
-
-template <typename T, typename Context>
-void BaseMedianKernel(const Context& dev_ctx,
-                      const DenseTensor& input,
-                      const IntArray& axes,
-                      DenseTensor* out,
-                      DenseTensor* median_index,
-                      bool ignore_nan) {
-  DenseTensor x;
-  auto rank = input.dims().size();
-  if ((axes.size() == 0) || rank <= 1) {
-    x = input;
-    x.Resize({input.numel()});
-  } else {
-    PreprocessMedianKernel<T, Context>(dev_ctx, input, axes, &x);
-  }
-
-  T* o_ptr = dev_ctx.template Alloc<T>(out);
-  int64_t* m_ptr = dev_ctx.template Alloc<int64_t>(median_index);
-  ProcessMedianKernel<T, Context>(dev_ctx, x, o_ptr, m_ptr, ignore_nan);
-  out->Resize(out->dims());
+                             out_data,
+                             m_data);
 }
 
 template <typename T, typename Context>
@@ -195,7 +173,16 @@ void NanmedianKernel(const Context& dev_ctx,
                      bool keepdim UNUSED,
                      DenseTensor* out,
                      DenseTensor* median_index) {
-  BaseMedianKernel<T, Context>(dev_ctx, x, axes, out, median_index, true);
+  DenseTensor tmp_x;
+  auto rank = x.dims().size();
+  if ((axes.size() == 0) || rank <= 1) {
+    tmp_x = x;
+    tmp_x.Resize({x.numel()});
+  } else {
+    funcs::PreprocessMedianKernel<T, Context>(dev_ctx, x, axes, &tmp_x);
+  }
+
+  ProcessMedianKernel<T, Context>(dev_ctx, tmp_x, out, median_index);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/impl/nanmedian_kernel_impl.h b/paddle/phi/kernels/funcs/nanmedian_utils.h
similarity index 61%
rename from paddle/phi/kernels/impl/nanmedian_kernel_impl.h
rename to paddle/phi/kernels/funcs/nanmedian_utils.h
index 0d3585eb1ce06..edcdc10b88595 100644
--- a/paddle/phi/kernels/impl/nanmedian_kernel_impl.h
+++ b/paddle/phi/kernels/funcs/nanmedian_utils.h
@@ -15,9 +15,51 @@
 #pragma once
 
 #include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/nanmedian_kernel.h"
 
 namespace phi {
+namespace funcs {
+
+template <typename T, typename Context>
+void PostprocessMedianGradKernel(const Context& dev_ctx,
+                                 DenseTensor* input,
+                                 const IntArray& raw_axes,
+                                 DenseTensor* x) {
+  auto input_dim = input->dims();
+  auto rank = input_dim.size();
+
+  std::vector<int64_t> axes = raw_axes.GetData();
+  int64_t axes_size = static_cast<int>(axes.size());
+  for (int64_t i = 0; i < axes_size; i++) {
+    if (axes[i] < 0) {
+      axes[i] += rank;
+    }
+  }
+
+  std::vector<int> trans_back;
+  std::vector<int> reshape_back;
+  trans_back.resize(rank);
+
+  int offset = 0;
+  for (int64_t i = 0; i < rank; i++) {
+    if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
+      reshape_back.push_back(input_dim[i]);
+      trans_back[i] = offset;
+      offset += 1;
+    }
+  }
+
+  for (int64_t i = 0; i < rank; i++) {
+    if (std::find(axes.begin(), axes.end(), i) != axes.end()) {
+      trans_back[i] = offset;
+      reshape_back.push_back(input_dim[i]);
+      offset += 1;
+    }
+  }
+
+  input->Resize(make_ddim(reshape_back));
+  funcs::TransCompute<Context, T>(
+      static_cast<int>(trans_back.size()), dev_ctx, *input, x, trans_back);
+}
 
 template <typename T, typename Context>
 void PreprocessMedianKernel(const Context& dev_ctx,
@@ -65,4 +107,5 @@ void PreprocessMedianKernel(const Context& dev_ctx,
   x->Resize(make_ddim(reshape));
 }
 
+}  // namespace funcs
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
index 99b1c1a8c0af8..706a89def1fe9 100644
--- a/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
@@ -20,7 +20,7 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_meta.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h"
+#include "paddle/phi/kernels/funcs/nanmedian_utils.h"
 
 namespace phi {
 
@@ -30,23 +30,26 @@ inline int GET_BLOCKS(const int N) {
 }
 
 template <typename T>
-__global__ void KernelNanmedianGrad(const T* x_ptr,
+__global__ void KernelNanmedianGrad(const T* x_data,
                                     const int64_t* medians_ptr,
                                     const T* out_grad_ptr,
-                                    T* x_grad_ptr,
+                                    T* dx_data,
                                     int64_t stride,
-                                    int64_t pre_dim,
-                                    T div_factor) {
+                                    int64_t pre_dim) {
   CUDA_KERNEL_LOOP(index, pre_dim) {
     int64_t offset = index * stride;
+    printf("index: %d\n", index);
+    printf("medians_ptr[2 * index]: %d\n", medians_ptr[2 * index]);
+    printf("medians_ptr[2 * index+1]: %d\n", medians_ptr[2 * index + 1]);
+
     if (medians_ptr[2 * index] >= 0) {
       if (medians_ptr[2 * index] == medians_ptr[2 * index + 1]) {
-        x_grad_ptr[offset + medians_ptr[2 * index]] = out_grad_ptr[index];
+        dx_data[offset + medians_ptr[2 * index]] = out_grad_ptr[index];
       } else {
-        x_grad_ptr[offset + medians_ptr[2 * index]] =
-            out_grad_ptr[index] / div_factor;
-        x_grad_ptr[offset + medians_ptr[2 * index + 1]] =
-            out_grad_ptr[index] / div_factor;
+        dx_data[offset + medians_ptr[2 * index]] =
+            out_grad_ptr[index] / static_cast<T>(2.0);
+        dx_data[offset + medians_ptr[2 * index + 1]] =
+            out_grad_ptr[index] / static_cast<T>(2.0);
       }
     }
   }
@@ -57,14 +60,16 @@ void CalcMedianGradKernel(const Context& dev_ctx,
                           const DenseTensor& x,
                           const DenseTensor& median_index,
                           const DenseTensor& out_grad,
-                          DenseTensor* x_grad,
-                          T* x_grad_ptr) {
+                          DenseTensor* x_grad) {
+  T* dx_data = dev_ctx.template Alloc<T>(x_grad);
+  if (!dx_data) return;
+
   phi::funcs::SetConstant<Context, T> set_zero;
   set_zero(dev_ctx, x_grad, static_cast<T>(0));
 
   auto stream = dev_ctx.stream();
-  const T* x_ptr = x.data<T>();
-  const int64_t* m_ptr = median_index.data<int64_t>();
+  const T* x_data = x.data<T>();
+  const int64_t* m_data = median_index.data<int64_t>();
   const T* out_grad_ptr = out_grad.data<T>();
 
   int64_t numel = x.numel();
@@ -73,42 +78,38 @@ void CalcMedianGradKernel(const Context& dev_ctx,
   int64_t stride = x_dim[x_rank - 1];
   int64_t pre_dim = numel / stride;
 
-  T div_factor = static_cast<T>(2.0);
   KernelNanmedianGrad<T>
       <<<GET_BLOCKS(pre_dim), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
-          x_ptr, m_ptr, out_grad_ptr, x_grad_ptr, stride, pre_dim, div_factor);
-}
-
-template <typename T, typename Context>
-void BaseMedianGradKernel(const Context& dev_ctx,
-                          const DenseTensor& x,
-                          const DenseTensor& median_index,
-                          const DenseTensor& out_grad,
-                          const IntArray& axes,
-                          DenseTensor* x_grad) {
-  auto rank = x.dims().size();
-  T* x_grad_ptr = dev_ctx.template Alloc<T>(x_grad);
-  if (axes.size() && (rank > 1)) {
-    DenseTensor tmp_x_grad(*x_grad);
-    CalcMedianGradKernel<T, Context>(
-        dev_ctx, x, median_index, out_grad, &tmp_x_grad, x_grad_ptr);
-    PostprocessMedianGradKernel<T, Context>(dev_ctx, &tmp_x_grad, axes, x_grad);
-  } else {
-    CalcMedianGradKernel<T, Context>(
-        dev_ctx, x, median_index, out_grad, x_grad, x_grad_ptr);
-  }
+          x_data, m_data, out_grad_ptr, dx_data, stride, pre_dim);
 }
 
 template <typename T, typename Context>
 void NanmedianGradKernel(const Context& dev_ctx,
-                         const DenseTensor& input,
+                         const DenseTensor& x,
                          const DenseTensor& median_index,
                          const DenseTensor& out_grad,
                          const IntArray& axes,
-                         bool keep_dim,
+                         bool keepdim UNUSED,
                          DenseTensor* x_grad) {
-  BaseMedianGradKernel<T, Context>(
-      dev_ctx, input, median_index, out_grad, axes, x_grad);
+  DenseTensor tmp_x;
+  auto rank = x.dims().size();
+  if ((axes.size() == 0) || rank <= 1) {
+    tmp_x = x;
+    tmp_x.Resize({x.numel()});
+    CalcMedianGradKernel<T, Context>(
+        dev_ctx, tmp_x, median_index, out_grad, x_grad);
+  } else {
+    funcs::PreprocessMedianKernel<T, Context>(dev_ctx, x, axes, &tmp_x);
+
+    DenseTensor tmp_x_grad;
+    tmp_x_grad.Resize(x_grad->dims());
+    CalcMedianGradKernel<T, Context>(
+        dev_ctx, tmp_x, median_index, out_grad, &tmp_x_grad);
+
+    dev_ctx.template Alloc<T>(x_grad);
+    funcs::PostprocessMedianGradKernel<T, Context>(
+        dev_ctx, &tmp_x_grad, axes, x_grad);
+  }
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/nanmedian_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_kernel.cu
index 0b2ac39600068..8a6be7a9bdef0 100644
--- a/paddle/phi/kernels/gpu/nanmedian_kernel.cu
+++ b/paddle/phi/kernels/gpu/nanmedian_kernel.cu
@@ -20,7 +20,7 @@
 #include "paddle/phi/common/memory_utils.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/full_kernel.h"
-#include "paddle/phi/kernels/impl/nanmedian_kernel_impl.h"
+#include "paddle/phi/kernels/funcs/nanmedian_utils.h"
 #include "paddle/phi/kernels/top_k_kernel.h"
 
 namespace phi {
@@ -138,14 +138,13 @@ __global__ void CalcNanmedianKernel(const T* sort_out_ptr,
 template <typename T, typename Context>
 void ProcessMedianKernel(const Context& dev_ctx,
                          const DenseTensor& x,
-                         bool ignore_nan,
                          DenseTensor* out,
-                         int64_t* m_ptr) {
-  bool should_ignore_nan = ignore_nan;
+                         DenseTensor* median_index) {
   auto stream = dev_ctx.stream();
+  const T* x_data = x.data<T>();
+  T* out_data = dev_ctx.template Alloc<T>(out);
+  int64_t* m_data = dev_ctx.template Alloc<int64_t>(median_index);
 
-  const T* x_ptr = x.data<T>();
-  T* o_ptr = dev_ctx.template Alloc<T>(out);
   int64_t numel = x.numel();
   auto x_dim = x.dims();
   int64_t x_rank = x_dim.size();
@@ -156,7 +155,9 @@ void ProcessMedianKernel(const Context& dev_ctx,
   DenseTensor nan_counts, nan_stat;
   int64_t* nan_counts_ptr;
   int64_t max_valid_num = 0;
-  if (should_ignore_nan) {
+
+  bool ignore_nan = true;
+  if (ignore_nan) {
     nan_counts.Resize(phi::make_ddim({pre_dim}));
     dev_ctx.template Alloc<int64_t>(&nan_counts);
     nan_counts_ptr = nan_counts.data<int64_t>();
@@ -167,7 +168,7 @@ void ProcessMedianKernel(const Context& dev_ctx,
     KernelNanCounts<T><<<GET_BLOCKS(numel),
                          PADDLE_CUDA_NUM_THREADS,
                          pre_dim * sizeof(int64_t),
-                         stream>>>(x_ptr,
+                         stream>>>(x_data,
                                    numel,
                                    pre_dim,
                                    stride,
@@ -189,15 +190,19 @@ void ProcessMedianKernel(const Context& dev_ctx,
     // all elements are nan values
     T nan_val = std::numeric_limits<T>::quiet_NaN();
     if (nan_stat_cpu_ptr[0] == numel) {
-      FullLikeKernel<T, Context>(dev_ctx, x, nan_val, x.dtype(), out);
+      phi::funcs::SetConstant<Context, T> set_nan;
+      set_nan(dev_ctx, out, nan_val);
+
+      phi::funcs::SetConstant<Context, int64_t> set_negatvie;
+      set_negatvie(dev_ctx, median_index, static_cast<int64_t>(-1));
       return;
     }
 
-    should_ignore_nan = nan_stat_cpu_ptr[0] > 0;
+    ignore_nan = nan_stat_cpu_ptr[0] > 0;
     max_valid_num = nan_stat_cpu_ptr[1];
   }
 
-  int64_t sort_k = should_ignore_nan ? max_valid_num : ((stride >> 1) + 1);
+  int64_t sort_k = ignore_nan ? max_valid_num : ((stride >> 1) + 1);
   bool is_ori_odd = stride & 1;
 
   DenseTensor sort_out, sort_indices;
@@ -217,14 +222,14 @@ void ProcessMedianKernel(const Context& dev_ctx,
 
   T div_factor = static_cast<T>(2.0);
   T nan_val = std::numeric_limits<T>::quiet_NaN();
-  if (should_ignore_nan) {
+  if (ignore_nan) {
     CalcNanmedianKernel<T>
         <<<GET_BLOCKS(pre_dim), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
             sort_out_ptr,
             sort_indices_ptr,
             nan_counts_ptr,
-            m_ptr,
-            o_ptr,
+            m_data,
+            out_data,
             is_ori_odd,
             pre_dim,
             max_valid_num,
@@ -236,8 +241,8 @@ void ProcessMedianKernel(const Context& dev_ctx,
         <<<GET_BLOCKS(pre_dim), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
             sort_out_ptr,
             sort_indices_ptr,
-            m_ptr,
-            o_ptr,
+            m_data,
+            out_data,
             div_factor,
             is_ori_odd,
             pre_dim,
@@ -245,27 +250,6 @@ void ProcessMedianKernel(const Context& dev_ctx,
   }
 }
 
-template <typename T, typename Context>
-void BaseMedianKernel(const Context& dev_ctx,
-                      const DenseTensor& input,
-                      const IntArray& axes,
-                      bool ignore_nan,
-                      DenseTensor* out,
-                      DenseTensor* median_index) {
-  DenseTensor x;
-  auto rank = input.dims().size();
-  if ((axes.size() == 0) || rank <= 1) {
-    x = input;
-    x.Resize({input.numel()});
-  } else {
-    PreprocessMedianKernel<T, Context>(dev_ctx, input, axes, &x);
-  }
-
-  int64_t* m_ptr = dev_ctx.template Alloc<int64_t>(median_index);
-  ProcessMedianKernel<T, Context>(dev_ctx, x, ignore_nan, out, m_ptr);
-  out->Resize(out->dims());
-}
-
 template <typename T, typename Context>
 void NanmedianKernel(const Context& dev_ctx,
                      const DenseTensor& x,
@@ -273,7 +257,16 @@ void NanmedianKernel(const Context& dev_ctx,
                      bool keepdim,
                      DenseTensor* out,
                      DenseTensor* median_index) {
-  BaseMedianKernel<T, Context>(dev_ctx, x, axes, true, out, median_index);
+  DenseTensor tmp_x;
+  auto rank = x.dims().size();
+  if ((axes.size() == 0) || rank <= 1) {
+    tmp_x = x;
+    tmp_x.Resize({x.numel()});
+  } else {
+    funcs::PreprocessMedianKernel<T, Context>(dev_ctx, x, axes, &tmp_x);
+  }
+
+  ProcessMedianKernel<T, Context>(dev_ctx, tmp_x, out, median_index);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h b/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h
deleted file mode 100644
index 8a30082ac366e..0000000000000
--- a/paddle/phi/kernels/impl/nanmedian_grad_kernel_impl.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/nanmedian_grad_kernel.h"
-
-namespace phi {
-
-template <typename T, typename Context>
-void PostprocessMedianGradKernel(const Context& dev_ctx,
-                                 DenseTensor* input,
-                                 const IntArray& raw_axes,
-                                 DenseTensor* x) {
-  auto input_dim = input->dims();
-  auto rank = input_dim.size();
-
-  std::vector<int64_t> axes = raw_axes.GetData();
-  int64_t axes_size = static_cast<int>(axes.size());
-  for (int64_t i = 0; i < axes_size; i++) {
-    if (axes[i] < 0) {
-      axes[i] += rank;
-    }
-  }
-
-  std::vector<int> trans_back;
-  std::vector<int> reshape_back;
-  trans_back.reserve(rank);
-  trans_back.resize(rank);
-
-  int offset = 0;
-  for (int64_t i = 0; i < rank; i++) {
-    if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
-      reshape_back.push_back(input_dim[i]);
-      trans_back[i] = offset;
-      offset += 1;
-    }
-  }
-
-  for (int64_t i = 0; i < rank; i++) {
-    if (std::find(axes.begin(), axes.end(), i) != axes.end()) {
-      trans_back[i] = offset;
-      reshape_back.push_back(input_dim[i]);
-      offset += 1;
-    }
-  }
-
-  input->Resize(make_ddim(reshape_back));
-  funcs::TransCompute<Context, T>(
-      static_cast<int>(trans_back.size()), dev_ctx, *input, x, trans_back);
-}
-
-}  // namespace phi
diff --git a/python/paddle/nn/functional/distance.py b/python/paddle/nn/functional/distance.py
index f1155852b00a9..cb004fe9b622f 100644
--- a/python/paddle/nn/functional/distance.py
+++ b/python/paddle/nn/functional/distance.py
@@ -64,8 +64,8 @@ def pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False, name=None):
             y = paddle.to_tensor([[5., 6.], [7., 8.]], dtype=paddle.float64)
             distance = paddle.nn.functional.pairwise_distance(x, y)
             print(distance)
-    #       Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
-    #              [4.99999860, 4.99999860])
+            # Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=True,
+            #        [4.99999860, 4.99999860])
 
     """
     if in_dynamic_mode():
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index d6e378396f354..80f4d9a2b6066 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -394,15 +394,15 @@ def logspace(start, stop, num, base=10.0, dtype=None, name=None):
 
     Args:
         start(int|float|Tensor): The input :attr:`start` is exponent of first entry in \
-            the sequence. It is a scalar, or a Tensor of shape [1] with input data \
+            the sequence. It is a scalar, or a 0-D Tensor of shape [] with input data \
             type int32, int64, float32 or float64.
         stop(int|float|Tensor): The input :attr:`stop` is exponent of last entry in the \
-            sequence. It is a scalar, or a Tensor of shape [1] with input data \
+            sequence. It is a scalar, or a 0-D Tensor of shape [] with input data \
             type int32, int64, float32 or float64.
         num(int|Tensor): The input :attr:`num` is given number of items in the sequence. \
-            It is an int scalar, or a Tensor of shape [1] with data type int32.
+            It is an int scalar, or a 0-D Tensor of shape [] with data type int32.
         base(int|float|Tensor): The input :attr:`base` is base of the logarithm function. \
-            It is a scalar, or a Tensor of shape [1] with input data type int32, int64, \
+            It is a scalar, or a 0-D Tensor of shape [] with input data type int32, int64, \
             float32 or float64.
         dtype(np.dtype|str, optional): The data type of output tensor, it could be \
             int32, int64, float32 or float64. Default: if None, the data type is float32. \
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 5100b47158626..ebb1d960df690 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -1615,7 +1615,7 @@ def count_nonzero(x, axis=None, keepdim=False, name=None):
             # x is a 2-D Tensor:
             x = paddle.to_tensor([[0., 1.1, 1.2], [0., 0., 1.3], [0., 0., 0.]])
             out1 = paddle.count_nonzero(x)
-            # [3]
+            # 3
             out2 = paddle.count_nonzero(x, axis=0)
             # [0, 1, 2]
             out3 = paddle.count_nonzero(x, axis=0, keepdim=True)
@@ -1636,17 +1636,8 @@ def count_nonzero(x, axis=None, keepdim=False, name=None):
             # [1, 3, 5]
     """
 
-    if axis is not None:
-        if isinstance(axis, int):
-            axis = [axis]
-        dims = len(x.shape)
-        for i in range(len(axis)):
-            if not isinstance(axis[i], int) or not (
-                axis[i] < dims and axis[i] >= -dims
-            ):
-                raise ValueError(
-                    "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
-                )
+    if isinstance(axis, int):
+        axis = [axis]
 
     bool_tensor = paddle.cast(x, 'bool')
     int_tensor = paddle.cast(bool_tensor, 'int64')
diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py
index fa41f5a93b82a..004db97089f72 100644
--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
@@ -255,7 +255,7 @@ def numel(x, name=None):
         return out
 
 
-def nanmedian(x, axis=None, keepdim=True, name=None):
+def nanmedian(x, axis=None, keepdim=False, name=None):
     r"""
     Compute the median along the specified axis, while ignoring NaNs.
 
@@ -273,7 +273,7 @@ def nanmedian(x, axis=None, keepdim=True, name=None):
             in the output Tensor. If ``keepdim`` is True, the dimensions of
             the output Tensor is the same as ``x`` except in the reduced
             dimensions(it is of size 1 in this case). Otherwise, the shape of
-            the output Tensor is squeezed in ``axis`` . Default is True.
+            the output Tensor is squeezed in ``axis`` . Default is False.
         name (str, optional): Name for the operation (optional, default is None).
             For more information, please refer to :ref:`api_guide_Name`.
 
@@ -287,16 +287,16 @@ def nanmedian(x, axis=None, keepdim=True, name=None):
             x = paddle.to_tensor([[float('nan'), 2. , 3. ], [0. , 1. , 2. ]])
 
             y1 = x.nanmedian()
-            # y1 is [[2.]]
+            # y1 is 2.
 
             y2 = x.nanmedian(0)
-            # y2 is [[0.,  1.5, 2.5]]
+            # y2 is [0., 1.5, 2.5]
 
-            y3 = x.nanmedian(0, keepdim=False)
-            # y3 is [0.,  1.5, 2.5]
+            y3 = x.nanmedian(0, keepdim=True)
+            # y3 is [[0.,  1.5, 2.5]]
 
             y4 = x.nanmedian((0, 1))
-            # y4 is [[2.]]
+            # y4 is 2.
     """
     if not isinstance(x, Variable):
         raise TypeError("In median, the input x should be a Tensor.")
@@ -304,7 +304,6 @@ def nanmedian(x, axis=None, keepdim=True, name=None):
     if isinstance(axis, (list, tuple)) and len(axis) == 0:
         raise ValueError("Axis list should not be empty.")
 
-    dims = len(x.shape)
     if axis is None:
         axis = []
     elif isinstance(axis, tuple):
@@ -312,24 +311,6 @@ def nanmedian(x, axis=None, keepdim=True, name=None):
     elif isinstance(axis, int):
         axis = [axis]
 
-    if not isinstance(axis, list):
-        raise ValueError(
-            "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
-        )
-
-    for i in range(len(axis)):
-        if not isinstance(axis[i], int) or not (
-            axis[i] < dims and axis[i] >= -dims
-        ):
-            raise ValueError(
-                "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
-            )
-        if axis[i] < 0:
-            axis[i] += dims
-
-    if len(axis) != len(set(axis)):
-        raise ValueError("Axis has duplicated elements.")
-
     if in_dynamic_mode():
         return _C_ops.nanmedian(x, axis, keepdim)
     else:
diff --git a/test/legacy_test/test_nanmedian.py b/test/legacy_test/test_nanmedian.py
index 88950271d32b9..8b5f86232f0e3 100644
--- a/test/legacy_test/test_nanmedian.py
+++ b/test/legacy_test/test_nanmedian.py
@@ -125,6 +125,7 @@ def test_data_case(data):
                 pd_res = paddle.nanmedian(
                     paddle.to_tensor(data), keepdim=keep_dim
                 )
+                assert np_res.shape == pd_res.numpy().shape
                 np.testing.assert_allclose(
                     np_res, pd_res.numpy(), rtol=1e-05, equal_nan=True
                 )
@@ -187,6 +188,23 @@ def test_check_grad(self):
         x_np[0, :] = np.nan
         x_np[1, :3] = np.nan
         x_np[2, 3:] = np.nan
+
+        x_tensor = paddle.to_tensor(x_np, stop_gradient=False)
+        y = paddle.nanmedian(x_tensor, keepdim=True)
+        dx = paddle.grad(y, x_tensor)[0].numpy()
+
+        np_grad = np.zeros(shape)
+        np_grad[1, 3] = 0.5
+        np_grad[3, 2] = 0.5
+        np.testing.assert_allclose(np_grad, dx, rtol=1e-05, equal_nan=True)
+
+    def test_check_grad_axis(self):
+        paddle.disable_static(place=self.place)
+        shape = (4, 5)
+        x_np = np.random.uniform(-1, 1, shape).astype(np.float64)
+        x_np[0, :] = np.nan
+        x_np[1, :3] = np.nan
+        x_np[2, 3:] = np.nan
         x_np_sorted = np.sort(x_np)
         nan_counts = np.count_nonzero(np.isnan(x_np).astype(np.int32), axis=1)
         np_grad = np.zeros(shape)
@@ -205,10 +223,25 @@ def test_check_grad(self):
                     np_grad[i, j] = 1 if is_odd else 0.5
 
         x_tensor = paddle.to_tensor(x_np, stop_gradient=False)
-        y = paddle.nanmedian(x_tensor, axis=1, keepdim=True)
+        y = paddle.nanmedian(x_tensor, axis=1)
         dx = paddle.grad(y, x_tensor)[0].numpy()
         np.testing.assert_allclose(np_grad, dx, rtol=1e-05, equal_nan=True)
 
+    def test_check_grad_0d(self):
+        paddle.disable_static(place=self.place)
+        x = paddle.rand([])
+        x.stop_gradient = False
+        y = paddle.nanmedian(x)
+        y.backward()
+        self.assertEqual(x.grad.shape, [])
+        np.testing.assert_allclose(x.grad, np.array(1.0))
+
+        x = paddle.to_tensor(float('nan'), stop_gradient=False)
+        y = paddle.nanmedian(x)
+        y.backward()
+        self.assertEqual(x.grad.shape, [])
+        np.testing.assert_allclose(x.grad, np.array(0.0))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_zero_dim_tensor.py b/test/legacy_test/test_zero_dim_tensor.py
index 63b3e77c07d02..210906e87a960 100644
--- a/test/legacy_test/test_zero_dim_tensor.py
+++ b/test/legacy_test/test_zero_dim_tensor.py
@@ -28,6 +28,14 @@
 
 unary_api_list = [
     paddle.nn.functional.elu,
+    paddle.nn.functional.rrelu,
+    paddle.frac,
+    paddle.sgn,
+    paddle.nan_to_num,
+    paddle.i0,
+    paddle.i0e,
+    paddle.i1,
+    paddle.i1e,
     paddle.nn.functional.gelu,
     paddle.nn.functional.hardsigmoid,
     paddle.nn.functional.hardswish,
@@ -95,9 +103,15 @@
     paddle.nn.functional.alpha_dropout,
 ]
 
-inplace_api_list = [
+inplace_unary_api_list = [
     paddle.nn.functional.relu_,
     paddle.nn.functional.tanh_,
+    paddle.tensor.sigmoid_,
+    paddle.tensor.ceil_,
+    paddle.tensor.floor_,
+    paddle.tensor.reciprocal_,
+    paddle.tensor.exp_,
+    paddle.tensor.sqrt_,
 ]
 
 
@@ -119,7 +133,7 @@ def test_dygraph_unary(self):
                 self.assertEqual(x.grad.shape, [])
                 self.assertEqual(out.grad.shape, [])
 
-        for api in inplace_api_list:
+        for api in inplace_unary_api_list:
             x = paddle.rand([])
             out = api(x)
             self.assertEqual(x.shape, [])
@@ -165,6 +179,8 @@ def test_static_unary(self):
     paddle.mean,
     paddle.nansum,
     paddle.nanmean,
+    paddle.median,
+    paddle.nanmedian,
     paddle.min,
     paddle.max,
     paddle.amin,
@@ -173,6 +189,7 @@ def test_static_unary(self):
     paddle.logsumexp,
     paddle.all,
     paddle.any,
+    paddle.count_nonzero,
 ]
 
 
@@ -187,18 +204,20 @@ def test_dygraph_reduce(self):
             else:
                 x = paddle.rand([])
             x.stop_gradient = False
-            out = api(x, None)
+            out = api(x, axis=None)
 
             out.retain_grads()
             out.backward()
 
             self.assertEqual(x.shape, [])
             self.assertEqual(out.shape, [])
-            np.testing.assert_allclose(out.numpy(), x.numpy())
+            if api not in [paddle.count_nonzero]:
+                np.testing.assert_allclose(out.numpy(), x.numpy())
 
-            out_empty_list = api(x, [])
-            self.assertEqual(out_empty_list, out)
-            self.assertEqual(out_empty_list.shape, [])
+            if api not in [paddle.median, paddle.nanmedian]:
+                out_empty_list = api(x, axis=[])
+                self.assertEqual(out_empty_list, out)
+                self.assertEqual(out_empty_list.shape, [])
 
             if x.grad is not None:
                 self.assertEqual(x.grad.shape, [])
@@ -206,12 +225,12 @@ def test_dygraph_reduce(self):
                 np.testing.assert_allclose(x.grad.numpy(), np.array(1.0))
                 np.testing.assert_allclose(out.grad.numpy(), np.array(1.0))
 
-            out1 = api(x, 0)
+            out1 = api(x, axis=0)
             self.assertEqual(out1.shape, [])
             self.assertEqual(out1, out)
             out1.backward()
 
-            out2 = api(x, -1)
+            out2 = api(x, axis=-1)
             self.assertEqual(out2.shape, [])
             self.assertEqual(out2, out)
             out2.backward()
@@ -220,13 +239,28 @@ def test_dygraph_reduce(self):
                 self.assertEqual(x.grad.shape, [])
                 np.testing.assert_allclose(x.grad.numpy(), np.array(3.0))
 
-            # 2) x is ND, reduce to 0D
+            # 2) x is 1D, axis=0, reduce to 0D
+            if api in [paddle.all, paddle.any]:
+                x = paddle.randint(0, 2, [5]).astype('bool')
+            else:
+                x = paddle.rand([5])
+            x.stop_gradient = False
+            out = api(x, axis=0)
+            out.retain_grads()
+            out.backward()
+
+            self.assertEqual(out.shape, [])
+            if x.grad is not None:
+                self.assertEqual(out.grad.shape, [])
+                self.assertEqual(x.grad.shape, [5])
+
+            # 3) x is ND, reduce to 0D
             if api in [paddle.all, paddle.any]:
                 x = paddle.randint(0, 2, [3, 5]).astype('bool')
             else:
                 x = paddle.rand([3, 5])
             x.stop_gradient = False
-            out = api(x, None)
+            out = api(x, axis=None)
             out.retain_grads()
             out.backward()
 
@@ -235,20 +269,20 @@ def test_dygraph_reduce(self):
                 self.assertEqual(out.grad.shape, [])
                 self.assertEqual(x.grad.shape, [3, 5])
 
-            # 3) x is 1D, axis=0, reduce to 0D
+            # 4) x is ND, reduce to 0D, keepdim=True
             if api in [paddle.all, paddle.any]:
-                x = paddle.randint(0, 2, [5]).astype('bool')
+                x = paddle.randint(0, 2, [3, 5]).astype('bool')
             else:
-                x = paddle.rand([5])
+                x = paddle.rand([3, 5])
             x.stop_gradient = False
-            out = api(x, 0)
+            out = api(x, keepdim=True)
             out.retain_grads()
             out.backward()
 
-            self.assertEqual(out.shape, [])
+            self.assertEqual(out.shape, [1, 1])
             if x.grad is not None:
-                self.assertEqual(out.grad.shape, [])
-                self.assertEqual(x.grad.shape, [5])
+                self.assertEqual(out.grad.shape, [1, 1])
+                self.assertEqual(x.grad.shape, [3, 5])
 
         paddle.enable_static()
 
@@ -267,16 +301,17 @@ def test_static_reduce(self):
                 else:
                     x = paddle.rand([])
                 x.stop_gradient = False
-                out = api(x, None)
+                out = api(x, axis=None)
                 paddle.static.append_backward(out)
 
-                out_empty_list = api(x, None)
-                self.assertEqual(out_empty_list.shape, ())
+                if api not in [paddle.median, paddle.nanmedian]:
+                    out_empty_list = api(x, axis=[])
+                    self.assertEqual(out_empty_list.shape, ())
 
-                out1 = api(x, 0)
+                out1 = api(x, axis=0)
                 self.assertEqual(out1.shape, ())
 
-                out2 = api(x, -1)
+                out2 = api(x, axis=-1)
                 self.assertEqual(out2.shape, ())
 
                 fetch_list = [x, out]
@@ -286,7 +321,8 @@ def test_static_reduce(self):
                 res = exe.run(main_prog, fetch_list=fetch_list)
                 self.assertEqual(res[0].shape, ())
                 self.assertEqual(res[1].shape, ())
-                np.testing.assert_allclose(res[0], res[1])
+                if api not in [paddle.count_nonzero]:
+                    np.testing.assert_allclose(res[0], res[1])
 
                 if len(res) > 2:
                     self.assertEqual(res[2].shape, ())
@@ -300,7 +336,7 @@ def test_static_reduce(self):
                 else:
                     x = paddle.rand([3, 5])
                 x.stop_gradient = False
-                out = api(x, None)
+                out = api(x, axis=None)
                 paddle.static.append_backward(out)
 
                 fetch_list = [out]
@@ -319,7 +355,7 @@ def test_static_reduce(self):
                 else:
                     x = paddle.rand([5])
                 x.stop_gradient = False
-                out = api(x, 0)
+                out = api(x, axis=0)
                 paddle.static.append_backward(out)
 
                 fetch_list = [out]
@@ -359,6 +395,11 @@ def test_static_reduce(self):
     paddle.fmin,
     paddle.complex,
     paddle.kron,
+    paddle.logaddexp,
+    paddle.nextafter,
+    paddle.ldexp,
+    paddle.polar,
+    paddle.heaviside,
 ]
 
 binary_int_api_list = [
@@ -370,6 +411,15 @@ def test_static_reduce(self):
 ]
 
 
+inplace_binary_api_list = [
+    paddle.tensor.add_,
+    paddle.tensor.subtract_,
+    paddle.tensor.multiply_,
+    paddle.tensor.remainder_,
+    paddle.tensor.remainder_,
+]
+
+
 # Use to test zero-dim of binary API
 class TestBinaryAPI(unittest.TestCase):
     def test_dygraph_binary(self):
@@ -497,6 +547,20 @@ def test_dygraph_binary(self):
             self.assertEqual(out.shape, [3, 5])
             np.testing.assert_array_equal(out.numpy(), out_np)
 
+        for api in inplace_binary_api_list:
+            with paddle.no_grad():
+                x = paddle.rand([])
+                y = paddle.rand([])
+                out = api(x, y)
+                self.assertEqual(x.shape, [])
+                self.assertEqual(out.shape, [])
+
+                x = paddle.rand([3, 5])
+                y = paddle.rand([])
+                out = api(x, y)
+                self.assertEqual(x.shape, [3, 5])
+                self.assertEqual(out.shape, [3, 5])
+
         paddle.enable_static()
 
     def test_static_binary(self):
@@ -640,6 +704,65 @@ def setUp(self):
         paddle.disable_static()
         self.x = paddle.rand([])
 
+    def test_polygamma(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.polygamma(x, 2)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [])
+
+    def test_frexp(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out1, out2 = paddle.frexp(x)
+        out1.backward()
+
+        self.assertEqual(out1.shape, [])
+        self.assertEqual(out2.shape, [])
+        self.assertEqual(x.grad.shape, [])
+
+    def test_pairwise_distance(self):
+        x = paddle.rand([5])
+        x.stop_gradient = False
+        y = paddle.rand([5])
+        y.stop_gradient = False
+
+        out = paddle.nn.functional.pairwise_distance(x, y)
+        out.backward()
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [5])
+
+    def test_take(self):
+        x = paddle.rand([4, 5])
+        x.stop_gradient = False
+        out = paddle.take(x, paddle.to_tensor(2))
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [4, 5])
+        np.testing.assert_allclose(x.grad[0, 2], 1.0)
+
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.take(x, paddle.to_tensor(0))
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        np.testing.assert_allclose(out, x)
+        self.assertEqual(x.grad.shape, [])
+        np.testing.assert_allclose(x.grad.numpy(), 1.0)
+
+    def test_trapezoid(self):
+        y = paddle.rand([5])
+        y.stop_gradient = False
+        out = paddle.trapezoid(y, dx=2.0)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(y.grad.shape, [5])
+
     def test_create_parameter_var(self):
         zero_dim_param = paddle.create_parameter(shape=[], dtype='float32')
         self.assertEqual(zero_dim_param.shape, [])
@@ -1096,54 +1219,6 @@ def test_argmax(self):
         out = paddle.argmax(x, keepdim=True)
         self.assertEqual(out.shape, [1, 1])
 
-    def test_median(self):
-        # 1) x is 0D
-        x = paddle.rand([])
-        x.stop_gradient = False
-        out1 = paddle.median(x, 0)
-        out2 = paddle.median(x, -1)
-        out3 = paddle.median(x, None)
-
-        out1.backward()
-        out2.backward()
-        out3.backward()
-
-        self.assertEqual(out1.shape, [])
-        np.testing.assert_allclose(out1, x)
-
-        self.assertEqual(out2.shape, [])
-        np.testing.assert_allclose(out2, x)
-
-        self.assertEqual(out3.shape, [])
-        np.testing.assert_allclose(out3, x)
-
-        self.assertEqual(x.grad.shape, [])
-        np.testing.assert_allclose(x.grad, 3.0)
-
-        # 2) x is 1D
-        x = paddle.rand([5])
-        x.stop_gradient = False
-        out = paddle.median(x, 0)
-        out.backward()
-        self.assertEqual(out.shape, [])
-        self.assertEqual(x.grad.shape, [5])
-
-        # 3) x is ND
-        x = paddle.rand([3, 5])
-        x.stop_gradient = False
-        out = paddle.median(x, None)
-        out.backward()
-        self.assertEqual(out.shape, [])
-        self.assertEqual(x.grad.shape, [3, 5])
-
-        # 4) x is ND, keepdim=True
-        x = paddle.rand([3, 5])
-        x.stop_gradient = False
-        out = paddle.median(x, keepdim=True)
-        out.backward()
-        self.assertEqual(out.shape, [1, 1])
-        self.assertEqual(x.grad.shape, [3, 5])
-
     def test_kthvalue(self):
         # 1) x is 0D
         x = paddle.randn([])
@@ -1432,6 +1507,40 @@ def test_quantile(self):
         self.assertEqual(out.grad, 1.0)
         self.assertEqual(x.grad.shape, [2, 3])
 
+    def test_nanquantile(self):
+        # 1) x is 0D
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.quantile(x, 0.5, axis=None)
+
+        out.retain_grads()
+        out.backward()
+
+        out_empty_list = paddle.quantile(x, 0.5, axis=[])
+        self.assertEqual(out_empty_list, out)
+
+        self.assertEqual(x.shape, [])
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out, x)
+
+        self.assertEqual(x.grad.shape, [])
+        self.assertEqual(x.grad, 1.0)
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(out.grad, 1.0)
+
+        # 2) x is ND with 'nan'
+        x = paddle.to_tensor([[float('nan'), 2.0, 3.0], [0.0, 1.0, 2.0]])
+        x.stop_gradient = False
+        out = paddle.quantile(x, 0.5, axis=None)
+
+        out.retain_grads()
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(out.grad.shape, [])
+        self.assertEqual(out.grad, 1.0)
+        self.assertEqual(x.grad.shape, [2, 3])
+
     def test_flip(self):
         x = paddle.rand([])
         x.stop_gradient = False
@@ -1965,6 +2074,25 @@ def test_cumsum(self):
         self.assertEqual(out3.grad.shape, [])
         self.assertTrue(out3.grad.numpy() == 1)
 
+    def test_logcumsumexp(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+
+        out1 = paddle.logcumsumexp(x)
+        out2 = paddle.logcumsumexp(x, axis=0)
+        out3 = paddle.logcumsumexp(x, axis=-1)
+
+        out1.backward()
+        out2.backward()
+        out3.backward()
+
+        self.assertEqual(out1.shape, [1])
+        self.assertEqual(out2.shape, [])
+        self.assertEqual(out3.shape, [])
+
+        self.assertEqual(x.grad.shape, [])
+        self.assertTrue(x.grad.numpy() == 3)
+
     def test_add_n(self):
         x1 = paddle.rand([])
         x1.stop_gradient = False
@@ -2654,6 +2782,15 @@ def test_cov(self):
         self.assertEqual(xt_1_out.shape, [])
         self.assertEqual(xt_1.grad.shape, [12])
 
+    def test_corrcoef(self):
+        x = paddle.randn((12,))
+        x.stop_gradient = False
+        out = paddle.linalg.corrcoef(x)
+        out.backward()
+
+        self.assertEqual(out.shape, [])
+        self.assertEqual(x.grad.shape, [12])
+
     def test_det(self):
         xt = paddle.randn([3, 3, 3])
         xt.stop_gradient = False
@@ -2852,6 +2989,81 @@ def setUp(self):
         paddle.enable_static()
         self.exe = paddle.static.Executor()
 
+    @prog_scope()
+    def test_polygamma(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out = paddle.polygamma(x, 2)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+
+    @prog_scope()
+    def test_frexp(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+        out1, out2 = paddle.frexp(x)
+        paddle.static.append_backward(out1)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out1, out2, x.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[2].shape, ())
+
+    @prog_scope()
+    def test_pairwise_distance(self):
+        x = paddle.rand([5])
+        x.stop_gradient = False
+        y = paddle.rand([5])
+        y.stop_gradient = False
+
+        out = paddle.nn.functional.pairwise_distance(x, y)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name, y.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, (5,))
+        self.assertEqual(res[2].shape, (5,))
+
+    @prog_scope()
+    def test_take(self):
+        x1 = paddle.rand([4, 5])
+        x1.stop_gradient = False
+        out1 = paddle.take(x1, paddle.to_tensor(2))
+        paddle.static.append_backward(out1)
+
+        x2 = paddle.rand([])
+        x2.stop_gradient = False
+        out2 = paddle.take(x2, paddle.to_tensor(0))
+        paddle.static.append_backward(out2)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(
+            prog, fetch_list=[out1, x1.grad_name, out2, x2.grad_name]
+        )
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, (4, 5))
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[3].shape, ())
+        np.testing.assert_allclose(res[3], 1.0)
+
+    @prog_scope()
+    def test_trapezoid(self):
+        y = paddle.rand([5])
+        y.stop_gradient = False
+        out = paddle.trapezoid(y, dx=2.0)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, y.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, (5,))
+
     @prog_scope()
     def test_create_parameter_var(self):
         zero_dim_param = paddle.create_parameter(shape=[], dtype='float32')
@@ -3236,40 +3448,6 @@ def test_argmax(self):
         np.testing.assert_allclose(res[2], 0.0)
         self.assertEqual(res[3].shape, ())
 
-    @prog_scope()
-    def test_median(self):
-        # 1) x is 0D
-        x = paddle.rand([])
-        x.stop_gradient = False
-        out = paddle.median(x)
-        paddle.static.append_backward(out)
-
-        # 2) x is ND
-        x1 = paddle.rand([3, 5])
-        x1.stop_gradient = False
-        out1 = paddle.median(x1)
-        paddle.static.append_backward(out1)
-
-        prog = paddle.static.default_main_program()
-        res = self.exe.run(
-            prog,
-            fetch_list=[
-                x,
-                out,
-                x.grad_name,
-                out1,
-                x1.grad_name,
-            ],
-        )
-        self.assertEqual(res[1].shape, ())
-        np.testing.assert_allclose(res[1], res[0])
-
-        self.assertEqual(res[2].shape, ())
-        np.testing.assert_allclose(res[2], 1.0)
-
-        self.assertEqual(res[3].shape, ())
-        self.assertEqual(res[4].shape, (3, 5))
-
     @prog_scope()
     def test_kthvalue(self):
         # 1) x is 0D
@@ -3607,12 +3785,12 @@ def test_quantile(self):
         x1 = paddle.rand([])
         x1.stop_gradient = False
         out1 = paddle.quantile(x1, 0.5, axis=None)
-        paddle.static.append_backward(out1.sum())
+        paddle.static.append_backward(out1)
 
         x2 = paddle.rand([2, 3])
         x2.stop_gradient = False
         out2 = paddle.quantile(x2, 0.5, axis=None)
-        paddle.static.append_backward(out2.sum())
+        paddle.static.append_backward(out2)
 
         out_empty_list = paddle.quantile(x1, 0.5, axis=[])
         self.assertEqual(out_empty_list.shape, ())
@@ -3640,6 +3818,37 @@ def test_quantile(self):
         self.assertEqual(res[5].shape, ())
         self.assertEqual(res[5], 1.0)
 
+    @prog_scope()
+    def test_nanquantile(self):
+        # 1) x is 0D
+        x1 = paddle.rand([])
+        x1.stop_gradient = False
+        out1 = paddle.nanquantile(x1, 0.5, axis=None)
+        paddle.static.append_backward(out1)
+
+        # 2) x is ND with 'nan'
+        x2 = paddle.to_tensor([[float('nan'), 2.0, 3.0], [0.0, 1.0, 2.0]])
+        x2.stop_gradient = False
+        out2 = paddle.nanquantile(x2, 0.5, axis=None)
+        print(out2)
+        paddle.static.append_backward(out2)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(
+            prog,
+            fetch_list=[
+                out1,
+                x1.grad_name,
+                out2,
+                x2.grad_name,
+            ],
+        )
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, ())
+
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[3].shape, (2, 3))
+
     @prog_scope()
     def test_flip(self):
         x = paddle.rand([])
@@ -4108,16 +4317,45 @@ def test_cumsum(self):
         self.assertEqual(res[1].shape, ())
         self.assertEqual(res[2].shape, ())
         self.assertEqual(res[3].shape, ())
-        self.assertEqual(res[3], 1)
+        self.assertEqual(res[3], 1.0)
         self.assertEqual(res[4].shape, (1,))
-        self.assertEqual(res[4], 1)
+        self.assertEqual(res[4], 1.0)
         self.assertEqual(res[5].shape, ())
-        self.assertEqual(res[5], 1)
+        self.assertEqual(res[5], 1.0)
         self.assertEqual(res[6].shape, ())
-        self.assertEqual(res[6], 1)
+        self.assertEqual(res[6], 1.0)
         self.assertEqual(out2.shape, ())
         self.assertEqual(out3.shape, ())
 
+    @prog_scope()
+    def test_logcumsumexp(self):
+        x = paddle.rand([])
+        x.stop_gradient = False
+
+        out1 = paddle.logcumsumexp(x)
+        out2 = paddle.logcumsumexp(x, axis=0)
+        out3 = paddle.logcumsumexp(x, axis=-1)
+
+        paddle.static.append_backward(out1)
+        paddle.static.append_backward(out2)
+        paddle.static.append_backward(out3)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(
+            prog,
+            fetch_list=[
+                out1,
+                out2,
+                out3,
+                x.grad_name,
+            ],
+        )
+        self.assertEqual(res[0].shape, (1,))
+        self.assertEqual(res[1].shape, ())
+        self.assertEqual(res[2].shape, ())
+        self.assertEqual(res[3].shape, ())
+        self.assertEqual(res[3], 1.0)
+
     @prog_scope()
     def test_add_n(self):
         x1 = paddle.rand([])
@@ -4986,11 +5224,22 @@ def test_cov(self):
         paddle.static.append_backward(out)
 
         prog = paddle.static.default_main_program()
-
         res = self.exe.run(prog, fetch_list=[out, xt_1.grad_name])
         self.assertEqual(res[0].shape, ())
         self.assertEqual(res[1].shape, (12,))
 
+    @prog_scope()
+    def test_corrcoef(self):
+        x = paddle.randn((12,))
+        x.stop_gradient = False
+        out = paddle.linalg.corrcoef(x)
+        paddle.static.append_backward(out)
+
+        prog = paddle.static.default_main_program()
+        res = self.exe.run(prog, fetch_list=[out, x.grad_name])
+        self.assertEqual(res[0].shape, ())
+        self.assertEqual(res[1].shape, (12,))
+
     @prog_scope()
     def test_det(self):
         xt_1 = paddle.randn((3, 3))
@@ -5261,6 +5510,14 @@ def test_linspace(self):
         out = paddle.linspace(start, stop, num)
         np.testing.assert_array_equal(out.numpy(), [1.0, 2.0, 3.0, 4.0, 5.0])
 
+    def test_logspace(self):
+        start = paddle.full([], 1.0)
+        stop = paddle.full([], 3.0)
+        num = paddle.full([], 5, 'int32')
+        base = paddle.full([], 2.0)
+        out = paddle.logspace(start, stop, num, base)
+        self.assertEqual(out.shape, [5])
+
     def test_arange(self):
         start = paddle.full([], 1.0)
         stop = paddle.full([], 6.0)
@@ -5883,6 +6140,49 @@ class TestDistribution(unittest.TestCase):
     def setUp(self):
         self.x = paddle.full([], 2.0)
 
+    def test_Bernoulli(self):
+        d = paddle.distribution.Bernoulli(probs=0.3)
+        self.assertEqual(d.mean.shape, [])
+        self.assertEqual(d.variance.shape, [])
+        self.assertEqual(d.entropy().shape, [])
+        self.assertEqual(d.sample([]).shape, [])
+        self.assertEqual(d.rsample([]).shape, [])
+        self.assertEqual(d.cdf(self.x).shape, [])
+        self.assertEqual(d.prob(self.x).shape, [])
+        self.assertEqual(d.log_prob(self.x).shape, [])
+
+        d_other = paddle.distribution.Bernoulli(probs=0.7)
+        self.assertEqual(d.kl_divergence(d_other).shape, [])
+
+    def test_Geometric(self):
+        d = paddle.distribution.Geometric(0.5)
+        self.assertEqual(d.mean.shape, [])
+        self.assertEqual(d.variance.shape, [])
+        self.assertEqual(d.entropy().shape, [])
+        self.assertEqual(d.stddev.shape, [])
+        self.assertEqual(d.pmf(self.x).shape, [])
+        self.assertEqual(d.log_pmf(self.x).shape, [])
+        self.assertEqual(d.sample([]).shape, [])
+        self.assertEqual(d.rsample([]).shape, [])
+        self.assertEqual(d.cdf(self.x).shape, [])
+
+        d_other = paddle.distribution.Geometric(probs=0.7)
+        self.assertEqual(d.kl_divergence(d_other).shape, [])
+
+    def test_Cauchy(self):
+        d = paddle.distribution.Cauchy(loc=0.1, scale=1.2)
+        self.assertEqual(d.sample([]).shape, [])
+        self.assertEqual(d.rsample([]).shape, [])
+        self.assertEqual(d.prob(self.x).shape, [])
+        self.assertEqual(d.log_prob(self.x).shape, [])
+        self.assertEqual(d.cdf(self.x).shape, [])
+        self.assertEqual(d.entropy().shape, [])
+
+        d_other = paddle.distribution.Cauchy(
+            loc=paddle.to_tensor(1.2), scale=paddle.to_tensor(2.3)
+        )
+        self.assertEqual(d.kl_divergence(d_other).shape, [])
+
     def test_Categorical(self):
         logits = paddle.rand([6])
         d = paddle.distribution.Categorical(logits)