From 438a11ec32a0f549df1fd6e4a1655df059d726e9 Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Sun, 30 Aug 2020 05:13:16 +0000
Subject: [PATCH 1/7] pad grad modified

---
 src/operator/numpy/np_pad_op-inl.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/operator/numpy/np_pad_op-inl.h b/src/operator/numpy/np_pad_op-inl.h
index be514b27a6b5..f70a43e6d0d7 100644
--- a/src/operator/numpy/np_pad_op-inl.h
+++ b/src/operator/numpy/np_pad_op-inl.h
@@ -545,8 +545,7 @@ template <typename xpu, int req>
 struct pad_grad {
   template<typename DType>
   MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *a){
-    using namespace mxnet_op;
-    KERNEL_ASSIGN(out[i], req, 1);
+    KERNEL_ASSIGN(out[i], req, a[i]);
   }
 };
 

From 6ccf44d569e80ff26b9da7ea4675b9ff04ae60ee Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Sun, 30 Aug 2020 05:30:47 +0000
Subject: [PATCH 2/7] Fix pad grad error

---
 tests/python/unittest/test_numpy_op.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index cc97821fba94..c459bb0012eb 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8431,8 +8431,8 @@ def hybrid_forward(self,F,A,**kwargs):
             assert_almost_equal(mx_out.asnumpy(), np_out, rtol = rtol, atol = atol)
 
             # test gradient
-            mx_out.backward()
-            np_backward = np.ones(shape)
+            mx_out.backward(x)
+            np_backward = x
             assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=rtol, atol=atol)
 
             # test imperative once again

From 2fe0964151dd6a8f533a5c52ce7a983c905ab4c0 Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Sun, 30 Aug 2020 11:02:37 +0000
Subject: [PATCH 3/7] modify pad constant backward

---
 src/operator/numpy/np_pad_op-inl.h     | 115 ++++++++++++++++++++-----
 tests/python/unittest/test_numpy_op.py |   4 -
 2 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/src/operator/numpy/np_pad_op-inl.h b/src/operator/numpy/np_pad_op-inl.h
index f70a43e6d0d7..d56902ba8221 100644
--- a/src/operator/numpy/np_pad_op-inl.h
+++ b/src/operator/numpy/np_pad_op-inl.h
@@ -540,12 +540,22 @@ struct min_pad {
   }
 };
 
-
-template <typename xpu, int req>
+template <typename xpu, int req, int ndim>
 struct pad_grad {
-  template<typename DType>
-  MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *a){
-    KERNEL_ASSIGN(out[i], req, a[i]);
+template<typename DType>
+MSHADOW_XINLINE static void Map(index_t i, DType *out, const DType *a,
+                                const index_t* ishape,
+                                const index_t* oshape,
+                                mshadow::Shape<ndim*2> width) {
+    auto j = uunravel<ndim>(i, oshape);
+    size_t m;
+    index_t* indexwidth = width.shape_;
+    index_t* indexshape = j.shape_;
+    for (m = 0; m < ndim; m++) {
+      indexshape[m] = indexshape[m] + indexwidth[m * 2];
+    }
+    index_t l = rravel<ndim>(j, ishape);
+    KERNEL_ASSIGN(out[i], req, a[l]);
   }
 };
 
@@ -719,20 +729,44 @@ void NumpyPadOpImpl(const TBlob& in_data,
 template<typename xpu>
 void NumpyPadOpBackImpl(const TBlob& in_data,
                         const TBlob& out_data,
+                        index_t* ishape,
+                        index_t* oshape,
                         index_t dsize,
+                        const NumpyPadParam& param,
                         const std::vector<OpReqType>& req,
                         mxnet_op::Stream<xpu> *s) {
-  using namespace mxnet_op;
-  using namespace mshadow;
-  MSHADOW_TYPE_SWITCH_WITH_BOOL(out_data.type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-      Kernel<pad_grad<xpu, req_type>, xpu>::Launch(
-        s, dsize, out_data.dptr<DType>(), in_data.dptr<DType>());
-    });
-  });
+    using namespace mxnet_op;
+    using namespace mshadow;
+    int mode = param.mode;
+    int ndim = in_data.ndim();
+    MXNET_NDIM_SWITCH(ndim, NDim, {
+      mshadow::Shape<NDim*2> width;
+      int dimcounter = 0;
+      index_t* odptr = reinterpret_cast<index_t*>(oshape);
+      if (ndim == 1) {
+        width[0] = param.pad_width[0][0];
+        width[1] = param.pad_width[1][0];
+      } else {
+        for (dimcounter = 0; dimcounter < NDim; dimcounter++) {
+          width[dimcounter*2] = param.pad_width[dimcounter][0];
+          width[dimcounter*2 + 1] = param.pad_width[dimcounter][1];
+        }
+      }
+      index_t* idptr = reinterpret_cast<index_t*>(ishape);
+      if (mode != 0) {
+        LOG(FATAL) << "Other modes are not supported. ";
+      } else {
+        MSHADOW_TYPE_SWITCH_WITH_BOOL(out_data.type_flag_, DType, {
+          MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+            Kernel<pad_grad<xpu, req_type, NDim>, xpu>::Launch(
+              s, dsize, out_data.dptr<DType>(), in_data.dptr<DType>(),
+              idptr, odptr, width);
+          });
+        });
+      }
+    })
 }
 
-
 template<typename xpu>
 void NumpyPadOpForward(const nnvm::NodeAttrs& attrs,
                        const OpContext& ctx,
@@ -791,15 +825,50 @@ void NumpyPadOpBackward(const nnvm::NodeAttrs& attrs,
                         const std::vector<TBlob>& inputs,
                         const std::vector<OpReqType>& req,
                         const std::vector<TBlob>& outputs) {
-  using namespace mxnet_op;
-  using namespace mshadow;
-  CHECK_EQ(inputs.size(), 1U);
-  CHECK_EQ(outputs.size(), 1U);
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TBlob& in_data = inputs[0];
-  const TBlob& out_data = outputs[0];
-  NumpyPadOpBackImpl<xpu>(in_data, out_data,
-                          out_data.Size(), req, s);
+  MXNET_NDIM_SWITCH(inputs[0].ndim(), NDim, {
+    using namespace mxnet_op;
+    using namespace mshadow;
+    CHECK_EQ(inputs.size(), 1U);
+    CHECK_EQ(outputs.size(), 1U);
+    CHECK_EQ(req.size(), 1U);
+    CHECK_EQ(req[0], kWriteTo);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    const TBlob& in_data = inputs[0];
+    const TBlob& out_data = outputs[0];
+    size_t ts = in_data.ndim();
+    size_t count;
+    mshadow::Shape<NDim> inshape;
+    for (count = 0; count < ts; count++) {
+      inshape[count] = static_cast<index_t>((in_data.shape_)[count]);
+    }
+
+    Tensor<xpu, 1, index_t> tsp = ctx.requested[0].
+                                  get_space_typed<xpu, 1, index_t>(Shape1(2*ts), s);
+    Tensor<cpu, 1, index_t> ta(reinterpret_cast<index_t*>(inshape.shape_),
+                               Shape1(ts), ctx.get_stream<cpu>());
+    Tensor<xpu, 1, index_t> ti(reinterpret_cast<index_t*>(tsp.dptr_),
+                               Shape1(ts), ctx.get_stream<xpu>());
+    mshadow::Copy(ti, ta, ctx.get_stream<xpu>());
+
+    mshadow::Shape<NDim> outshape;
+    for (count = 0; count < ts; count++) {
+      outshape[count] = static_cast<index_t>((out_data.shape_)[count]);
+    }
+    index_t* wcp = tsp.dptr_;
+    wcp += ts;
+    Tensor<cpu, 1, index_t> tb(reinterpret_cast<index_t*>(outshape.shape_),
+                               Shape1(ts), ctx.get_stream<cpu>());
+    Tensor<xpu, 1, index_t> to(reinterpret_cast<index_t*>(wcp), Shape1(ts),
+                               ctx.get_stream<xpu>());
+    mshadow::Copy(to, tb, ctx.get_stream<xpu>());
+    const NumpyPadParam& param = nnvm::get<NumpyPadParam>(attrs.parsed);
+
+    index_t* wt = reinterpret_cast<index_t*>(to.dptr_);
+    index_t* wi = reinterpret_cast<index_t*>(ti.dptr_);
+
+    NumpyPadOpBackImpl<xpu>(in_data, out_data, wi,
+                            wt, out_data.Size(), param, req, s);
+  })
 }
 
 }  // namespace op
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index c459bb0012eb..324fab212347 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8431,10 +8431,6 @@ def hybrid_forward(self,F,A,**kwargs):
             assert_almost_equal(mx_out.asnumpy(), np_out, rtol = rtol, atol = atol)
 
             # test gradient
-            mx_out.backward(x)
-            np_backward = x
-            assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=rtol, atol=atol)
-
             # test imperative once again
 
             if(m != 'constant'):

From e092ac72530bd105cb33fcf2400c8984f6a01fad Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Mon, 31 Aug 2020 03:27:57 +0000
Subject: [PATCH 4/7] Fix test error

---
 tests/python/unittest/test_numpy_op.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 324fab212347..11cfab886fae 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8431,15 +8431,21 @@ def hybrid_forward(self,F,A,**kwargs):
             assert_almost_equal(mx_out.asnumpy(), np_out, rtol = rtol, atol = atol)
 
             # test gradient
-            # test imperative once again
-
-            if(m != 'constant'):
-                np_out = _np.pad(x.asnumpy(), pw, mode=m)
-                mx_out = np.pad(x, pw, mode=m)
-            else:
-                np_out = _np.pad(x.asnumpy(), pw, constant_values=0, mode=m)
-                mx_out = np.pad(x, pw, mode=m, constant_values=0)
-            assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol)
+            if m == "constant":
+                ctx = mx.gpu()
+                x = mx.np.random.uniform(-1.0, 1.0, size=shape)
+                x = mx.np.array(x, ctx=ctx)
+                x.attach_grad()
+                with mx.autograd.record():
+                    mx_out = mx.np.pad(x, pad_width=pw, mode="constant")
+                    out_grad = mx.np.random.normal(0, 1, mx_out.shape)
+                    out_grad = mx.np.array(out_grad, ctx=ctx)
+                    loss = mx_out * out_grad
+                    loss = loss.sum()
+                    loss.backward()
+                in_grad = mx.np.pad(mx.np.ones_like(x.grad), pad_width=pw, mode="constant") * mx.np.array(out_grad, ctx=ctx)
+                mx_grad = x.grad
+                assert_almost_equal(mx.np.pad(mx_grad, pad_width=pw, mode="constant"), in_grad.asnumpy(), rtol=rtol, atol=atol)
 
 
 @with_seed()

From 6950ee04255b3670ee051fc686066b810326be22 Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Mon, 31 Aug 2020 05:39:17 +0000
Subject: [PATCH 5/7] Fix test error

---
 src/operator/numpy/np_pad_op-inl.h     | 19 +++++++++----------
 tests/python/unittest/test_numpy_op.py |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/operator/numpy/np_pad_op-inl.h b/src/operator/numpy/np_pad_op-inl.h
index d56902ba8221..1ba119f5d05b 100644
--- a/src/operator/numpy/np_pad_op-inl.h
+++ b/src/operator/numpy/np_pad_op-inl.h
@@ -739,6 +739,9 @@ void NumpyPadOpBackImpl(const TBlob& in_data,
     using namespace mshadow;
     int mode = param.mode;
     int ndim = in_data.ndim();
+    if (mode != 0) {
+        LOG(FATAL) << "Other modes are not supported. ";
+    }
     MXNET_NDIM_SWITCH(ndim, NDim, {
       mshadow::Shape<NDim*2> width;
       int dimcounter = 0;
@@ -753,17 +756,13 @@ void NumpyPadOpBackImpl(const TBlob& in_data,
         }
       }
       index_t* idptr = reinterpret_cast<index_t*>(ishape);
-      if (mode != 0) {
-        LOG(FATAL) << "Other modes are not supported. ";
-      } else {
-        MSHADOW_TYPE_SWITCH_WITH_BOOL(out_data.type_flag_, DType, {
-          MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-            Kernel<pad_grad<xpu, req_type, NDim>, xpu>::Launch(
-              s, dsize, out_data.dptr<DType>(), in_data.dptr<DType>(),
-              idptr, odptr, width);
-          });
+      MSHADOW_TYPE_SWITCH_WITH_BOOL(out_data.type_flag_, DType, {
+        MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+          Kernel<pad_grad<xpu, req_type, NDim>, xpu>::Launch(
+            s, dsize, out_data.dptr<DType>(), in_data.dptr<DType>(),
+            idptr, odptr, width);
         });
-      }
+      });
     })
 }
 
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 11cfab886fae..129ce13624c2 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8432,7 +8432,7 @@ def hybrid_forward(self,F,A,**kwargs):
 
             # test gradient
             if m == "constant":
-                ctx = mx.gpu()
+                ctx = mx.context.current_context()
                 x = mx.np.random.uniform(-1.0, 1.0, size=shape)
                 x = mx.np.array(x, ctx=ctx)
                 x.attach_grad()

From 25f7f9407f25b55b16fdb5e13f525ba45f6ca32c Mon Sep 17 00:00:00 2001
From: cassiniXu <cassini.Xu@outlook.com>
Date: Mon, 31 Aug 2020 07:41:20 +0000
Subject: [PATCH 6/7] Fix kAddTo supported

---
 src/operator/numpy/np_pad_op-inl.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/operator/numpy/np_pad_op-inl.h b/src/operator/numpy/np_pad_op-inl.h
index 1ba119f5d05b..79baba756845 100644
--- a/src/operator/numpy/np_pad_op-inl.h
+++ b/src/operator/numpy/np_pad_op-inl.h
@@ -778,7 +778,8 @@ void NumpyPadOpForward(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(inputs.size(), 1U);
     CHECK_EQ(outputs.size(), 1U);
     CHECK_EQ(req.size(), 1U);
-    CHECK_EQ(req[0], kWriteTo);
+    CHECK(req[0] != kNullOp);
+    CHECK(req[0] != kWriteInplace);
     Stream<xpu> *s = ctx.get_stream<xpu>();
     const TBlob& in_data = inputs[0];
     const TBlob& out_data = outputs[0];
@@ -830,7 +831,8 @@ void NumpyPadOpBackward(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(inputs.size(), 1U);
     CHECK_EQ(outputs.size(), 1U);
     CHECK_EQ(req.size(), 1U);
-    CHECK_EQ(req[0], kWriteTo);
+    CHECK(req[0] != kNullOp);
+    CHECK(req[0] != kWriteInplace);
     Stream<xpu> *s = ctx.get_stream<xpu>();
     const TBlob& in_data = inputs[0];
     const TBlob& out_data = outputs[0];

From 44d1e692ffac8433167559f7893ba168cb8601a2 Mon Sep 17 00:00:00 2001
From: Xingjian Shi <xshiab@connect.ust.hk>
Date: Mon, 31 Aug 2020 10:43:26 -0700
Subject: [PATCH 7/7] Add test for grad_req='add'

---
 tests/python/unittest/test_numpy_op.py | 29 ++++++++++++++++----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 129ce13624c2..16e25b234ed5 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8435,17 +8435,24 @@ def hybrid_forward(self,F,A,**kwargs):
                 ctx = mx.context.current_context()
                 x = mx.np.random.uniform(-1.0, 1.0, size=shape)
                 x = mx.np.array(x, ctx=ctx)
-                x.attach_grad()
-                with mx.autograd.record():
-                    mx_out = mx.np.pad(x, pad_width=pw, mode="constant")
-                    out_grad = mx.np.random.normal(0, 1, mx_out.shape)
-                    out_grad = mx.np.array(out_grad, ctx=ctx)
-                    loss = mx_out * out_grad
-                    loss = loss.sum()
-                    loss.backward()
-                in_grad = mx.np.pad(mx.np.ones_like(x.grad), pad_width=pw, mode="constant") * mx.np.array(out_grad, ctx=ctx)
-                mx_grad = x.grad
-                assert_almost_equal(mx.np.pad(mx_grad, pad_width=pw, mode="constant"), in_grad.asnumpy(), rtol=rtol, atol=atol)
+                for grad_req in ['write', 'add']:
+                    x.attach_grad(grad_req)
+                    if grad_req == 'add':
+                        init_grad = mx.np.random.uniform(-1.0, 1.0, size=shape, ctx=ctx)
+                        x.grad[:] = init_grad
+                    with mx.autograd.record():
+                        mx_out = mx.np.pad(x, pad_width=pw, mode="constant")
+                        out_grad = mx.np.random.normal(0, 1, mx_out.shape)
+                        out_grad = mx.np.array(out_grad, ctx=ctx)
+                        loss = mx_out * out_grad
+                        loss = loss.sum()
+                        loss.backward()
+                    gt_in_grad = mx.np.pad(mx.np.ones_like(x.grad), pad_width=pw, mode="constant") * mx.np.array(out_grad, ctx=ctx)
+                    mx_grad = x.grad
+                    if grad_req == 'add':
+                        assert_almost_equal(mx.np.pad(mx_grad - init_grad, pad_width=pw, mode="constant"), gt_in_grad.asnumpy(), rtol=rtol, atol=atol)
+                    else:
+                        assert_almost_equal(mx.np.pad(mx_grad, pad_width=pw, mode="constant"), gt_in_grad.asnumpy(), rtol=rtol, atol=atol)
 
 
 @with_seed()