From 3f2b68e934c5a756d07ae0fe7158014c7e85ab60 Mon Sep 17 00:00:00 2001
From: andreii <andreii@nvidia.com>
Date: Wed, 22 May 2019 09:25:35 -0700
Subject: [PATCH 1/5] Trigger CI


From d6b4882c6aac711b70664c1764d853a83d39a52c Mon Sep 17 00:00:00 2001
From: andreii <andreii@nvidia.com>
Date: Fri, 11 Oct 2019 12:53:05 -0700
Subject: [PATCH 2/5] Aggregated zeroing of the gradients/arrays

---
 python/mxnet/gluon/parameter.py     | 18 ++++++++-
 tests/python/unittest/test_gluon.py | 63 ++++++++++++++++++++++++-----
 2 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index dedd81496c2f..3a678691e085 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -887,8 +887,22 @@ def initialize(self, init=initializer.Uniform(), ctx=None, verbose=False,
 
     def zero_grad(self):
         """Sets all Parameters' gradient buffer to 0."""
-        for i in self.values():
-            i.zero_grad()
+        # collect gradient arrays for each ctx
+        arrays = defaultdict(list)
+        for p in self.values():
+            if p.grad_req == 'null' or p._grad is None:
+                continue
+            for g in p.list_grad():
+                if g.stype == 'row_sparse':
+                    mx.ndarray.zeros_like(g, out=g)
+                else:
+                    arrays[g.context].append(g)
+
+        if len(arrays) == 0:
+            return
+
+        for arr in arrays.values():
+            mx.nd.reset_arrays(*arr, num_arrays=len(arr))
 
     def reset_ctx(self, ctx):
         """Re-assign all Parameters to other contexts.
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 380ce762a9f7..b1abd14b8a42 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -1504,15 +1504,60 @@ def test_hybrid_multi_context():
 
 @with_seed()
 def test_zero_grad():
-    data = mx.nd.random.uniform(shape=(3,3))
-    net = nn.Embedding(3, 4, sparse_grad=True, prefix='test_zero_grad_')
-    net.initialize()
-    with mx.autograd.record():
-        l = net(data)
-        l.backward()
-    net.collect_params().zero_grad()
-    grad = net.collect_params()['test_zero_grad_weight'].grad()
-    assert_almost_equal(grad.asnumpy(), grad.asnumpy() * 0)
+    def _test_grad_reset(ctx, dtype='float32', sparse=False, embeddingType=None):
+        data = mx.nd.random.uniform(shape=(3,3), dtype=dtype, ctx=ctx)
+        if embeddingType is None:
+            embeddingType = dtype
+        net = nn.Embedding(3, 4, sparse_grad=sparse, prefix='test_zero_grad_', dtype=embeddingType)
+        net.initialize(ctx=ctx)
+        with mx.autograd.record():
+            l = net(data)
+            l.backward()
+        net.collect_params().zero_grad()
+        grad = net.collect_params()['test_zero_grad_weight'].grad()
+        assert_almost_equal(grad.asnumpy(), grad.asnumpy() * 0)
+
+    def _test_multi_reset(nArrays, dtype, ctx):
+        # Construct the list of non-zeros arrays with random shapes
+        arr = []
+        for _ in range(nArrays):
+            shape = ()
+            for _ in range(np.random.randint(1, 5)):
+                shape = shape + (np.random.randint(1, 10),)
+            arr.append(mx.nd.random.uniform(shape=shape, dtype=dtype, ctx=ctx))
+
+        # Reset all arrays
+        mx.nd.reset_arrays(*arr, num_arrays=len(arr))
+
+        # Check results
+        for i in range(nArrays):
+            grad = arr[i].asnumpy()
+            assert_almost_equal(grad, grad * 0)
+
+
+    # Setting context for current test
+    ctx = mx.context.current_context()
+
+    # Launching _test_multi_reset 10 times with different types & randomly chosen nArrays
+    for _ in range(10):
+        for type in ['float16', 'float32', 'float64']:
+            _test_multi_reset(np.random.randint(1, 50), type, ctx)
+
+    # Saving value of environment variable, if it was defined
+    envVarKey = 'MXNET_STORAGE_FALLBACK_LOG_VERBOSE'
+    envVarValue = os.environ[envVarKey] if envVarKey in os.environ else None
+    # Changing value of environment variable
+    os.environ[envVarKey] = '0'
+    for type in ['float16', 'float32', 'float64']:
+        for embType in ['float32', 'float64']:
+            for sparse in [True, False]:
+                _test_grad_reset(ctx, dtype=type, sparse=sparse, embeddingType=embType)
+
+    # Remove or restore the value of environment variable
+    if envVarValue is None:
+        del os.environ[envVarKey]
+    else:
+        os.environ[envVarKey] = envVarValue
 
 def check_hybrid_static_memory(**kwargs):
     x = mx.nd.random.uniform(shape=(2, 3, 32, 32))

From 54866163095628be2da48618723daf7abc57917c Mon Sep 17 00:00:00 2001
From: andreii <andreii@nvidia.com>
Date: Fri, 11 Oct 2019 14:08:00 -0700
Subject: [PATCH 3/5] New files for aggregated zeroing of the gradients/arrays

---
 src/operator/contrib/reset_arrays-inl.h | 102 ++++++++++++++++++++++++
 src/operator/contrib/reset_arrays.cc    |  74 +++++++++++++++++
 src/operator/contrib/reset_arrays.cu    |  40 ++++++++++
 3 files changed, 216 insertions(+)
 create mode 100644 src/operator/contrib/reset_arrays-inl.h
 create mode 100644 src/operator/contrib/reset_arrays.cc
 create mode 100644 src/operator/contrib/reset_arrays.cu

diff --git a/src/operator/contrib/reset_arrays-inl.h b/src/operator/contrib/reset_arrays-inl.h
new file mode 100644
index 000000000000..06440896014f
--- /dev/null
+++ b/src/operator/contrib/reset_arrays-inl.h
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file reset_arrays-inl.h
+ * \brief setting all array element values to zeros
+ * \author Moises Hernandez-Fernandez, Andrei Ivanov
+ */
+
+#ifndef MXNET_OPERATOR_CONTRIB_RESET_ARRAYS_INL_H_
+#define MXNET_OPERATOR_CONTRIB_RESET_ARRAYS_INL_H_
+
+#include <mxnet/operator_util.h>
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include "../elemwise_op_common.h"
+#include "../mshadow_op.h"
+#include "../mxnet_op.h"
+#include "../tensor/init_op.h"
+
+namespace mxnet {
+namespace op {
+
+struct ResetArraysParam : public dmlc::Parameter<ResetArraysParam> {
+  int num_arrays;
+  DMLC_DECLARE_PARAMETER(ResetArraysParam) {
+    DMLC_DECLARE_FIELD(num_arrays)
+    .describe("number of input arrays.");
+  }
+};
+inline bool ResetArraysShape(const NodeAttrs& attrs,
+                            std::vector<mxnet::TShape>* in_shape,
+                            std::vector<mxnet::TShape>* out_shape) {
+  const auto& p = dmlc::get<ResetArraysParam>(attrs.parsed);
+  CHECK_EQ(in_shape->size(), p.num_arrays);
+  for (auto s : *in_shape) {
+    if (s.ndim() == 0)
+      return false;
+  }
+  return true;
+}
+
+inline bool ResetArraysType(const NodeAttrs& attrs,
+                           std::vector<int>* in_type,
+                           std::vector<int>* out_type) {
+  const auto& param_ = dmlc::get<ResetArraysParam>(attrs.parsed);
+  CHECK_EQ(in_type->size(), param_.num_arrays);
+  const int dtype = (*in_type)[0];
+  CHECK_NE(dtype, -1) << "First input must have specified type";
+  for (size_t i = 0; i < in_type->size(); ++i) {
+    const auto currType = (*in_type)[i];
+    if (currType == -1) {
+      (*in_type)[i] = dtype;
+    } else {
+      UNIFORM_TYPE_CHECK((*in_type)[i], dtype, "array_" + std::to_string(i));
+    }
+  }
+
+  return true;
+}
+
+template<typename xpu>
+void ResetMemory(void *pntr, size_t len, mshadow::Stream<xpu> *s);
+
+template<typename xpu>
+void ResetArrays(const nnvm::NodeAttrs& attrs,
+                 const OpContext &ctx,
+                 const std::vector<TBlob> &inputs,
+                 const std::vector<OpReqType> &req,
+                 const std::vector<TBlob> &outputs) {
+  auto s = ctx.get_stream<xpu>();
+  const auto& p = nnvm::get<ResetArraysParam>(attrs.parsed);
+  for (int i = 0; i < p.num_arrays; i++) {  // array index in inputs
+    const size_t size = inputs[i].shape_.Size();
+    MSHADOW_REAL_TYPE_SWITCH(inputs[i].type_flag_, DType,
+      ResetMemory(inputs[i].FlatTo2D<xpu, DType>(s).dptr_, size * sizeof(DType), s);
+    )
+  }
+}
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_CONTRIB_RESET_ARRAYS_INL_H_
diff --git a/src/operator/contrib/reset_arrays.cc b/src/operator/contrib/reset_arrays.cc
new file mode 100644
index 000000000000..f67e0098cd11
--- /dev/null
+++ b/src/operator/contrib/reset_arrays.cc
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file reset_arrays.cc
+ * \brief setting all array element values to zeros
+ * \author Moises Hernandez-Fernandez, Andrei Ivanov
+ */
+
+#include "./reset_arrays-inl.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(ResetArraysParam);
+
+NNVM_REGISTER_OP(reset_arrays)
+.describe(R"code(Set to zero multiple arrays
+)code" ADD_FILELINE)
+.set_num_inputs([](const nnvm::NodeAttrs& attrs) {
+    return static_cast<uint32_t>(dmlc::get<ResetArraysParam>(attrs.parsed).num_arrays);
+  })
+.set_attr<nnvm::FMutateInputs>("FMutateInputs",
+  [](const nnvm::NodeAttrs& attrs) {
+    const uint32_t num_args = dmlc::get<ResetArraysParam>(attrs.parsed).num_arrays;
+    std::vector<uint32_t> ret;
+    for (uint32_t i = 0; i < num_args; ++i) {
+      ret.push_back(i);
+    }
+    return ret;
+  })
+.set_num_outputs(0)
+.set_attr_parser(ParamParser<ResetArraysParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", ResetArraysShape)
+.set_attr<nnvm::FInferType>("FInferType", ResetArraysType)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    const uint32_t num_args = dmlc::get<ResetArraysParam>(attrs.parsed).num_arrays;
+    std::vector<std::string> ret;
+    for (uint32_t i = 0; i < num_args; ++i) {
+      ret.push_back(std::string("array_") + std::to_string(i));
+    }
+    return ret;
+  })
+.add_argument("data", "NDArray-or-Symbol[]", "Arrays")
+.add_arguments(ResetArraysParam::__FIELDS__());
+
+NNVM_REGISTER_OP(reset_arrays)
+.set_attr<FCompute>("FCompute<cpu>", ResetArrays<cpu>);
+
+template<>
+void ResetMemory<cpu>(void *pntr, size_t len, mshadow::Stream<cpu> *s) {
+  memset(pntr, 0, len);
+}
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/contrib/reset_arrays.cu b/src/operator/contrib/reset_arrays.cu
new file mode 100644
index 000000000000..f7a9d0034665
--- /dev/null
+++ b/src/operator/contrib/reset_arrays.cu
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file reset_arrays.cu
+ * \brief setting all array element values to zeros
+ * \author Moises Hernandez-Fernandez, Andrei Ivanov
+ */
+#include "./reset_arrays-inl.h"
+
+namespace mxnet {
+namespace op {
+
+template<>
+void ResetMemory<gpu>(void *pntr, size_t len, mshadow::Stream<gpu> *s) {
+  CUDA_CALL(cudaMemsetAsync(pntr, 0, len, mshadow::Stream<gpu>::GetStream(s)));
+}
+
+NNVM_REGISTER_OP(reset_arrays)
+.set_attr<FCompute>("FCompute<gpu>", ResetArrays<gpu>);
+
+}  // namespace op
+}  // namespace mxnet

From 3d35a8b24e8b669c568c5f350809aedb9dde69a5 Mon Sep 17 00:00:00 2001
From: andreii <andreii@nvidia.com>
Date: Fri, 11 Oct 2019 16:13:25 -0700
Subject: [PATCH 4/5] Adding possibility to reset the arrays of different
 types.

---
 src/operator/contrib/reset_arrays-inl.h | 5 +----
 tests/python/unittest/test_gluon.py     | 7 +++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/operator/contrib/reset_arrays-inl.h b/src/operator/contrib/reset_arrays-inl.h
index 06440896014f..f20ed058a2e3 100644
--- a/src/operator/contrib/reset_arrays-inl.h
+++ b/src/operator/contrib/reset_arrays-inl.h
@@ -67,11 +67,8 @@ inline bool ResetArraysType(const NodeAttrs& attrs,
   CHECK_NE(dtype, -1) << "First input must have specified type";
   for (size_t i = 0; i < in_type->size(); ++i) {
     const auto currType = (*in_type)[i];
-    if (currType == -1) {
+    if (currType == -1)
       (*in_type)[i] = dtype;
-    } else {
-      UNIFORM_TYPE_CHECK((*in_type)[i], dtype, "array_" + std::to_string(i));
-    }
   }
 
   return true;
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index b1abd14b8a42..c6da0f4d9d96 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -33,6 +33,7 @@
 import warnings
 import json
 import unittest
+import random
 
 @with_seed()
 def test_parameter():
@@ -1521,10 +1522,11 @@ def _test_multi_reset(nArrays, dtype, ctx):
         # Construct the list of non-zeros arrays with random shapes
         arr = []
         for _ in range(nArrays):
+            arrType = random.choice(dtype) if isinstance(dtype, list) else dtype
             shape = ()
             for _ in range(np.random.randint(1, 5)):
                 shape = shape + (np.random.randint(1, 10),)
-            arr.append(mx.nd.random.uniform(shape=shape, dtype=dtype, ctx=ctx))
+            arr.append(mx.nd.random.uniform(shape=shape, dtype=arrType, ctx=ctx))
 
         # Reset all arrays
         mx.nd.reset_arrays(*arr, num_arrays=len(arr))
@@ -1539,8 +1541,9 @@ def _test_multi_reset(nArrays, dtype, ctx):
     ctx = mx.context.current_context()
 
     # Launching _test_multi_reset 10 times with different types & randomly chosen nArrays
+    testedTypes = ['float16', 'float32', 'float64']
     for _ in range(10):
-        for type in ['float16', 'float32', 'float64']:
+        for type in [testedTypes] + testedTypes:
             _test_multi_reset(np.random.randint(1, 50), type, ctx)
 
     # Saving value of environment variable, if it was defined

From 5ca58e721d4d7732af4601e7f89618442350d6ee Mon Sep 17 00:00:00 2001
From: andreii <andreii@nvidia.com>
Date: Mon, 14 Oct 2019 10:02:05 -0700
Subject: [PATCH 5/5] Minor cleanup

---
 python/mxnet/gluon/parameter.py         |  3 ++-
 src/operator/contrib/reset_arrays-inl.h | 27 +++++++++----------------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 3a678691e085..8800684ad0b4 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -24,9 +24,10 @@
            'ParameterDict', 'tensor_types']
 
 
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 import warnings
 import numpy as np
+import mxnet as mx
 
 from ..base import mx_real_t, MXNetError
 from .. import symbol, ndarray, initializer, context
diff --git a/src/operator/contrib/reset_arrays-inl.h b/src/operator/contrib/reset_arrays-inl.h
index f20ed058a2e3..3559a21879e4 100644
--- a/src/operator/contrib/reset_arrays-inl.h
+++ b/src/operator/contrib/reset_arrays-inl.h
@@ -27,13 +27,7 @@
 #ifndef MXNET_OPERATOR_CONTRIB_RESET_ARRAYS_INL_H_
 #define MXNET_OPERATOR_CONTRIB_RESET_ARRAYS_INL_H_
 
-#include <mxnet/operator_util.h>
 #include <vector>
-#include <limits>
-#include <algorithm>
-#include "../elemwise_op_common.h"
-#include "../mshadow_op.h"
-#include "../mxnet_op.h"
 #include "../tensor/init_op.h"
 
 namespace mxnet {
@@ -46,29 +40,28 @@ struct ResetArraysParam : public dmlc::Parameter<ResetArraysParam> {
     .describe("number of input arrays.");
   }
 };
+
 inline bool ResetArraysShape(const NodeAttrs& attrs,
                             std::vector<mxnet::TShape>* in_shape,
                             std::vector<mxnet::TShape>* out_shape) {
-  const auto& p = dmlc::get<ResetArraysParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), p.num_arrays);
+  const auto& param = dmlc::get<ResetArraysParam>(attrs.parsed);
+  CHECK_EQ(in_shape->size(), param.num_arrays);
   for (auto s : *in_shape) {
     if (s.ndim() == 0)
       return false;
   }
+
   return true;
 }
 
 inline bool ResetArraysType(const NodeAttrs& attrs,
                            std::vector<int>* in_type,
                            std::vector<int>* out_type) {
-  const auto& param_ = dmlc::get<ResetArraysParam>(attrs.parsed);
-  CHECK_EQ(in_type->size(), param_.num_arrays);
-  const int dtype = (*in_type)[0];
-  CHECK_NE(dtype, -1) << "First input must have specified type";
+  const auto& param = dmlc::get<ResetArraysParam>(attrs.parsed);
+  CHECK_EQ(in_type->size(), param.num_arrays);
   for (size_t i = 0; i < in_type->size(); ++i) {
-    const auto currType = (*in_type)[i];
-    if (currType == -1)
-      (*in_type)[i] = dtype;
+    if ((*in_type)[i] == -1)
+      return false;
   }
 
   return true;
@@ -84,8 +77,8 @@ void ResetArrays(const nnvm::NodeAttrs& attrs,
                  const std::vector<OpReqType> &req,
                  const std::vector<TBlob> &outputs) {
   auto s = ctx.get_stream<xpu>();
-  const auto& p = nnvm::get<ResetArraysParam>(attrs.parsed);
-  for (int i = 0; i < p.num_arrays; i++) {  // array index in inputs
+  const auto& param = nnvm::get<ResetArraysParam>(attrs.parsed);
+  for (int i = 0; i < param.num_arrays; i++) {  // array index in inputs
     const size_t size = inputs[i].shape_.Size();
     MSHADOW_REAL_TYPE_SWITCH(inputs[i].type_flag_, DType,
       ResetMemory(inputs[i].FlatTo2D<xpu, DType>(s).dptr_, size * sizeof(DType), s);