[TOP] Level1 complete (#3)

tqchen · May 26, 2018 · d047238 · d047238
1 parent c943567
commit d047238
Show file tree

Hide file tree

Showing 9 changed files with 355 additions and 170 deletions.
diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
@@ -15,6 +15,7 @@ namespace top {
 struct DenseParam : public dmlc::Parameter<DenseParam> {
   int units;
   bool use_bias;
+
   DMLC_DECLARE_PARAMETER(DenseParam) {
     DMLC_DECLARE_FIELD(units).set_lower_bound(1)
     .describe("Number of hidden units of the dense transformation.");
@@ -27,6 +28,63 @@ struct DenseParam : public dmlc::Parameter<DenseParam> {
   static const constexpr int kBias = 2;
 };
 
+struct DropoutParam : public dmlc::Parameter<DropoutParam> {
+  float rate;
+
+  DMLC_DECLARE_PARAMETER(DropoutParam) {
+    DMLC_DECLARE_FIELD(rate).set_default(0.5)
+        .set_range(0, 1)
+        .describe("Fraction of the input that gets dropped out during training time.");
+  }
+};
+
+struct BatchNormParam : public dmlc::Parameter<BatchNormParam> {
+  int axis;
+  float epsilon;
+  float momentum;
+  bool center;
+  bool scale;
+
+  DMLC_DECLARE_PARAMETER(BatchNormParam) {
+    DMLC_DECLARE_FIELD(axis).set_default(1)
+      .describe("Specify which shape axis the channel is specified.");
+    DMLC_DECLARE_FIELD(epsilon).set_default(1e-5f)
+        .describe("Small float added to variance to avoid dividing by zero.");
+    DMLC_DECLARE_FIELD(center).set_default(true)
+        .describe("If True, add offset of `beta` to normalized tensor."
+                  "If False, `beta` is ignored.");
+    DMLC_DECLARE_FIELD(scale).set_default(true)
+        .describe("If True, multiply by `gamma`. If False, `gamma` is not used."
+                  "When the next layer is piecewise linear (also e.g. `nn.relu`),"
+                  "this can be disabled since the scaling"
+                  "will be done by the next layer.");
+  }
+  // constants
+  static const constexpr int kData = 0;
+  static const constexpr int kGamma = 1;
+  static const constexpr int kBeta = 2;
+  static const constexpr int kMovingMean = 3;
+  static const constexpr int kMovingVariance = 4;
+};
+
+struct SoftmaxParam : public dmlc::Parameter<SoftmaxParam> {
+  int axis;
+
+  DMLC_DECLARE_PARAMETER(SoftmaxParam) {
+    DMLC_DECLARE_FIELD(axis).set_default(-1)
+      .describe("The axis to sum over when computing softmax.");
+  }
+};
+
+struct LogSoftmaxParam : public dmlc::Parameter<LogSoftmaxParam> {
+  int axis;
+
+  DMLC_DECLARE_PARAMETER(LogSoftmaxParam) {
+    DMLC_DECLARE_FIELD(axis).set_default(-1)
+      .describe("The axis to sum over when computing softmax.");
+  }
+};
+
 }  // namespace top
 }  // namespace nnvm
 

diff --git a/nnvm/include/nnvm/top/tensor.h b/nnvm/include/nnvm/top/tensor.h
@@ -9,14 +9,37 @@
 namespace nnvm {
 namespace top {
 
-struct ConcatParam : public dmlc::Parameter<ConcatParam> {
-  int dim;
-  DMLC_DECLARE_PARAMETER(ConcatParam) {
-    DMLC_DECLARE_FIELD(dim).set_range(0,  4).set_default(1)
+struct ConcatenateParam : public dmlc::Parameter<ConcatenateParam> {
+  int axis;
+  DMLC_DECLARE_PARAMETER(ConcatenateParam) {
+    DMLC_DECLARE_FIELD(axis).set_lower_bound(0).set_default(1)
     .describe("the axis to be concated.");
   }
 };
 
+enum TypeFlag {
+  kFloat32 = 0,
+  kFloat64 = 1,
+  kFloat16 = 2,
+  kUint8 = 3,
+  kInt32 = 4,
+  kInt8  = 5,
+  kInt64 = 6,
+};
+
+struct CastParam : public dmlc::Parameter<CastParam> {
+  int dtype;
+  DMLC_DECLARE_PARAMETER(CastParam) {
+    DMLC_DECLARE_FIELD(dtype)
+    .add_enum("float32", kFloat32)
+    .add_enum("float64", kFloat64)
+    .add_enum("float16", kFloat16)
+    .add_enum("uint8", kUint8)
+    .add_enum("int32", kInt32)
+    .describe("Output data type.");
+  }
+};
+
 }  // namespace top
 }  // namespace nnvm
 

diff --git a/nnvm/src/top/elemwise_op_common.h b/nnvm/src/top/elemwise_op_common.h
@@ -57,7 +57,7 @@ inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs,
 }
 
 template<int n_in, int n_out>
-inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs,
+inline bool ElemwiseShape(const NodeAttrs& attrs,
                           std::vector<TShape> *in_attrs,
                           std::vector<TShape> *out_attrs) {
   if (n_in != -1) {
@@ -71,7 +71,7 @@ inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs,
 }
 
 template<int n_in, int n_out>
-inline bool ElemwiseType(const nnvm::NodeAttrs& attrs,
+inline bool ElemwiseType(const NodeAttrs& attrs,
                          std::vector<int> *in_attrs,
                          std::vector<int> *out_attrs) {
   if (n_in != -1) {
@@ -88,13 +88,28 @@ inline bool ElemwiseType(const nnvm::NodeAttrs& attrs,
   NNVM_REGISTER_OP(name)                                            \
   .set_num_inputs(1)                                                \
   .set_num_outputs(1)                                               \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
-  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
-  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
+  .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)        \
+  .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)           \
+  .set_attr<FInplaceOption>("FInplaceOption",                       \
     [](const NodeAttrs& attrs){                                     \
       return std::vector<std::pair<int, int> >{{0, 0}};             \
     })                                                              \
   .add_argument("data", "Tensor", "The input tensor.")
+
+
+#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name)                      \
+  NNVM_REGISTER_OP(name)                                            \
+  .set_num_inputs(2)                                                \
+  .set_num_outputs(1)                                               \
+  .set_attr<FInferShape>("FInferShape", ElemwiseShape<2, 1>)        \
+  .set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)           \
+  .set_attr<FInplaceOption>("FInplaceOption",                       \
+    [](const NodeAttrs& attrs) {                                    \
+      return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}};     \
+    })                                                              \
+  .add_argument("lhs", "NDArray-or-Symbol", "first input")          \
+  .add_argument("rhs", "NDArray-or-Symbol", "second input")
+
 }  // namespace top
 }  // namespace nnvm
 #endif  // NNVM_TOP_ELEMWISE_OP_COMMON_H_
diff --git a/nnvm/src/top/nn.cc b/nnvm/src/top/nn.cc
@@ -73,7 +73,7 @@ If ``use_bias`` is set to be false, then the ``bias`` term is ignored.
 .set_attr_parser(ParamParser<DenseParam>)
 .set_num_outputs(1)
 .set_num_inputs([](const NodeAttrs& attrs) {
-    const DenseParam& param = nnvm::get<DenseParam>(attrs.parsed);
+    const DenseParam& param = get<DenseParam>(attrs.parsed);
     return param.use_bias ? 3 : 2;
   })
 .set_attr<FListInputNames>("FListInputNames", DenseListInputNames)
@@ -90,5 +90,121 @@ NNVM_REGISTER_ELEMWISE_UNARY_OP(relu)
 
 )code" NNVM_ADD_FILELINE)
 .set_support_level(1);
+
+// dropout
+DMLC_REGISTER_PARAMETER(DropoutParam);
+
+NNVM_REGISTER_OP(dropout)
+.describe(R"(Applies dropout operation to input array.
+
+- During training, each element of the input is set to zero with probability p.
+  The whole array is rescaled by :math:`1/(1-p)` to keep the expected
+  sum of the input unchanged.
+
+)" NNVM_ADD_FILELINE)
+.add_argument("data", "Tensor", "Input to which dropout will be applied")
+.set_num_inputs(1)
+.set_num_outputs(2)
+.set_attr_parser(ParamParser<DropoutParam>)
+.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 2>)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 2>)
+.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
+    return 1;
+  })
+.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"output", "mask"};
+  })
+.set_support_level(1);
+
+// batchnorm
+DMLC_REGISTER_PARAMETER(BatchNormParam);
+
+NNVM_REGISTER_OP(batch_norm)
+.describe(R"(Batch normalization layer (Ioffe and Szegedy, 2014).
+Normalizes the input at each batch, i.e. applies a transformation
+that maintains the mean activation close to 0 and the activation
+standard deviation close to 1.
+
+.. math::
+
+  data\_mean[i] = mean(data[:,i,:,...]) \\
+  data\_var[i] = var(data[:,i,:,...])
+
+Then compute the normalized output, which has the same shape as input, as following:
+
+.. math::
+
+  out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]
+
+Both *mean* and *var* returns a scalar by treating the input as a vector.
+
+Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` have shape *(k,)*.
+
+Besides the inputs and the outputs, this operator accepts two auxiliary
+states, ``moving_mean`` and ``moving_var``, which are *k*-length
+vectors. They are global statistics for the whole dataset, which are updated
+by::
+
+  moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
+  moving_var = moving_var * momentum + data_var * (1 - momentum)
+
+The parameter ``axis`` specifies which axis of the input shape denotes
+the 'channel' (separately normalized groups).  The default is 1.  Specifying -1 sets the channel
+axis to be the last item in the input shape.
+)" NNVM_ADD_FILELINE)
+.add_argument("data", "Tensor", "Input to which dropout will be applied")
+.add_argument("gamma", "Tensor", "The gamma scale factor")
+.add_argument("beta", "Tensor", "The beta offset factor")
+.add_argument("moving_mean", "Tensor", "running mean of input")
+.add_argument("moving_var", "Tensor", "running variance of input")
+.set_num_inputs(5)
+.set_num_outputs(3)
+.set_attr_parser(ParamParser<BatchNormParam>)
+.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data", "gamma", "beta", "moving_mean", "moving_var"};
+  })
+.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"output", "mean", "var"};
+  })
+.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
+    return 1;
+  })
+.set_attr<FMutateInputs>("FListMutateInputs", [](const NodeAttrs& attrs) {
+    return std::vector<uint32_t>{3, 4};
+  })
+.set_support_level(1);
+
+// softmax
+DMLC_REGISTER_PARAMETER(SoftmaxParam);
+
+NNVM_REGISTER_OP(softmax)
+.describe(R"code(Computes softmax.
+
+.. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}
+
+)code" NNVM_ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<SoftmaxParam>)
+.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(1);
+
+// log_softmax
+DMLC_REGISTER_PARAMETER(LogSoftmaxParam);
+
+NNVM_REGISTER_OP(log_softmax)
+.describe(R"code(Computes softmax.
+
+.. math:: \text{log_softmax}(x)_i = \log \frac{exp(x_i)}{\sum_j exp(x_j)}
+
+)code" NNVM_ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LogSoftmaxParam>)
+.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(1);
+
 }  // namespace top
 }  // namespace nnvm