diff --git a/doc/python/model.md b/doc/python/model.md index fca3a39ec893..f6f27c99d082 100644 --- a/doc/python/model.md +++ b/doc/python/model.md @@ -23,7 +23,7 @@ data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(fc1, name='relu1', act_type='relu') fc2 = mx.symbol.FullyConnected(act1, name='fc2', num_hidden=64) -softmax = mx.symbol.Softmax(fc2, name='sm') +softmax = mx.symbol.SoftmaxOutput(fc2, name='sm') # create a model model = mx.model.FeedForward.create( softmax, diff --git a/doc/python/symbol.md b/doc/python/symbol.md index b153fdb32773..2ca0ee59d6b9 100644 --- a/doc/python/symbol.md +++ b/doc/python/symbol.md @@ -23,7 +23,7 @@ The following code gives an example of two layer neural network configuration. >>> net = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) >>> net = mx.symbol.Activation(data=net, name='relu1', act_type="relu") >>> net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) ->>> net = mx.symbol.Softmax(data=net, name='out') +>>> net = mx.symbol.SoftmaxOutput(data=net, name='out') >>> type(net) ``` diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index efb1122504a0..533f6714bd96 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -55,7 +55,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3): pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="global_pool") flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") -softmax = mx.symbol.Softmax(data=fc, name="loss") +softmax = mx.symbol.SoftmaxOutput(data=fc, name="loss") ######################################################### diff --git a/example/imagenet/alexnet.py b/example/imagenet/alexnet.py index dbf5e9a28ba4..b933b090e5b2 100644 --- a/example/imagenet/alexnet.py +++ b/example/imagenet/alexnet.py @@ -40,7 +40,7 @@ dropout2 = mx.symbol.Dropout(data=relu7, p=0.5) # stage 6 fc3 = mx.symbol.FullyConnected(data=dropout2, num_hidden=1000) -softmax = mx.symbol.Softmax(data=fc3) +softmax = mx.symbol.SoftmaxOutput(data=fc3) ## data diff --git a/example/imagenet/inception-full.py b/example/imagenet/inception-full.py index d703a6db59a2..1ac0a5c14a68 100644 --- a/example/imagenet/inception-full.py +++ b/example/imagenet/inception-full.py @@ -74,7 +74,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax softmax = inception(21841, 1.0) diff --git a/example/imagenet/inception.py b/example/imagenet/inception.py index 263f3a22733f..a9afe9c01f89 100644 --- a/example/imagenet/inception.py +++ b/example/imagenet/inception.py @@ -73,7 +73,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax softmax = inception(1000, 1.0) diff --git a/example/memcost/inception_memcost.py b/example/memcost/inception_memcost.py index 8183c6774724..eb9e16908035 100644 --- a/example/memcost/inception_memcost.py +++ b/example/memcost/inception_memcost.py @@ -69,7 +69,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax diff --git a/example/mnist/lenet.py b/example/mnist/lenet.py index 40779150ccfb..d8691bbe5867 100644 --- a/example/mnist/lenet.py +++ b/example/mnist/lenet.py @@ -23,7 +23,7 @@ # second fullc fc2 = mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss -lenet = mx.symbol.Softmax(data=fc2) +lenet = mx.symbol.SoftmaxOutput(data=fc2) ## data train, val = mnist_iterator(batch_size=100, input_shape=(1,28,28)) diff --git a/example/mnist/mlp.py b/example/mnist/mlp.py index 0cfffe55cbe4..2bfa55d913ba 100644 --- a/example/mnist/mlp.py +++ b/example/mnist/mlp.py @@ -11,7 +11,7 @@ fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) -mlp = mx.symbol.Softmax(data = fc3, name = 'mlp') +mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp') # data diff --git a/example/mnist/mlp_numpy.py b/example/mnist/mlp_numpy.py index 114a6bf257d5..538aa87c7c23 100644 --- a/example/mnist/mlp_numpy.py +++ b/example/mnist/mlp_numpy.py @@ -11,7 +11,7 @@ fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) -mlp = mx.symbol.Softmax(data = fc3, name = 'mlp') +mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp') # data diff --git a/example/notebooks/cifar-100.ipynb b/example/notebooks/cifar-100.ipynb index 8e8c53a2d75b..bb5cb1b81624 100644 --- a/example/notebooks/cifar-100.ipynb +++ b/example/notebooks/cifar-100.ipynb @@ -131,7 +131,7 @@ " # linear classifier\n", " flatten = mx.symbol.Flatten(data=avg, name='flatten')\n", " fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc')\n", - " softmax = mx.symbol.Softmax(data=fc1, name='softmax')\n", + " softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')\n", " return softmax\n", "\n", "softmax = inception(100, 1.0)" diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index eae38dab736c..7c436554fa47 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -127,7 +127,7 @@ "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7), name=\"global_avg\")\n", "flatten = mx.symbol.Flatten(data=pool)\n", "fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n", - "softmax = mx.symbol.Softmax(data=fc)" + "softmax = mx.symbol.SoftmaxOutput(data=fc)" ] }, { diff --git a/example/notebooks/composite_symbol.ipynb b/example/notebooks/composite_symbol.ipynb index 22966f5fd3f5..1d2cdaec764d 100644 --- a/example/notebooks/composite_symbol.ipynb +++ b/example/notebooks/composite_symbol.ipynb @@ -3691,7 +3691,7 @@ "\n", "softmax0\n", "\n", - "Softmax\n", + "SoftmaxOutput\n", "\n", "\n", "softmax0->fullyconnected0\n", @@ -3739,7 +3739,7 @@ "# linear classifier\n", "flatten = mx.symbol.Flatten(data=avg)\n", "fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000)\n", - "softmax = mx.symbol.Softmax(data=fc1)\n", + "softmax = mx.symbol.SoftmaxOutput(data=fc1)\n", "\n", "# if you like, you can visualize full network structure\n", "mx.viz.plot_network(symbol=softmax, shape={\"data\" : (128, 3, 224, 224)})" diff --git a/example/python-howto/multiple_outputs.py b/example/python-howto/multiple_outputs.py index ab6d6d12356c..97ce469d58a2 100644 --- a/example/python-howto/multiple_outputs.py +++ b/example/python-howto/multiple_outputs.py @@ -8,7 +8,7 @@ fc1 = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) net = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu") net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) -out = mx.symbol.Softmax(data=net, name='softmax') +out = mx.symbol.SoftmaxOutput(data=net, name='softmax') # group fc1 and out together group = mx.symbol.Group([fc1, out]) print group.list_outputs() diff --git a/example/rnn/lstm.py b/example/rnn/lstm.py index 59daacc4e903..4b5706ff7208 100644 --- a/example/rnn/lstm.py +++ b/example/rnn/lstm.py @@ -83,7 +83,7 @@ def lstm_unroll(num_lstm_layer, seq_len, bias=cls_bias, num_hidden=num_label, name="t%d_cls" % seqidx) - sm = mx.sym.Softmax(data=fc, label=label, name="t%d_sm" % seqidx) + sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="t%d_sm" % seqidx) out_prob.append(sm) for i in range(num_lstm_layer): diff --git a/src/operator/native_op-inl.h b/src/operator/native_op-inl.h index 3add8f14ef61..bb0589c14c1c 100644 --- a/src/operator/native_op-inl.h +++ b/src/operator/native_op-inl.h @@ -57,7 +57,7 @@ class NativeOp : public Operator { SyncVec(out_data, "out_data", s, 1); s->Wait(); param_.pinfo->forward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data()); - for (int i = 0; i < out_data.size(); ++i) { + for (index_t i = 0; i < out_data.size(); ++i) { CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output"; if (req[i] != kNullOp) { std::stringstream ss; @@ -90,7 +90,7 @@ class NativeOp : public Operator { } s->Wait(); param_.pinfo->backward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data()); - for (int i = 0; i < in_grad.size(); ++i) { + for (index_t i = 0; i < in_grad.size(); ++i) { CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output"; if (req[i] != kNullOp) { std::stringstream ss; @@ -135,7 +135,7 @@ class NativeOp : public Operator { const std::string &prefix, mshadow::Stream *stream, int tag) { - for (int i = 0; i < vec.size(); ++i) { + for (size_t i = 0; i < vec.size(); ++i) { std::stringstream name; name << prefix << i; SyncBuffer(vec[i], name.str(), stream); diff --git a/src/operator/softmax-inl.h b/src/operator/softmax-inl.h index d1e5331d9d06..5b26ef925184 100644 --- a/src/operator/softmax-inl.h +++ b/src/operator/softmax-inl.h @@ -20,15 +20,15 @@ namespace mxnet { namespace op { -namespace softmax_enum { -enum SoftmaxOpInputs {kData, kLabel}; -enum SoftmaxOpOutputs {kOut}; -} // namespace softmax_enum +namespace softmaxout_enum { +enum SoftmaxOutputOpInputs {kData, kLabel}; +enum SoftmaxOutputOpOutputs {kOut}; +} // namespace softmaxout_enum -struct SoftmaxParam : public dmlc::Parameter { +struct SoftmaxOutputParam : public dmlc::Parameter { float grad_scale; bool multi_output; - DMLC_DECLARE_PARAMETER(SoftmaxParam) { + DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) { DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f) .describe("Scale the gradient by a float factor"); DMLC_DECLARE_FIELD(multi_output).set_default(false) @@ -39,9 +39,9 @@ struct SoftmaxParam : public dmlc::Parameter { }; template -class SoftmaxOp : public Operator { +class SoftmaxOutputOp : public Operator { public: - explicit SoftmaxOp(SoftmaxParam param) : param_(param) {} + explicit SoftmaxOutputOp(SoftmaxOutputParam param) : param_(param) {} virtual void Forward(const OpContext &ctx, const std::vector &in_data, @@ -50,19 +50,19 @@ class SoftmaxOp : public Operator { const std::vector &aux_args) { using namespace mshadow; using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 2) << "Softmax Input: [data, label]"; - CHECK_EQ(out_data.size(), 1) << "Softmax Output: [output]"; + CHECK_EQ(in_data.size(), 2) << "SoftmaxOutput Input: [data, label]"; + CHECK_EQ(out_data.size(), 1) << "SoftmaxOutput Output: [output]"; Stream *s = ctx.get_stream(); if (param_.multi_output) { - int n = in_data[softmax_enum::kData].size(0); - int k = in_data[softmax_enum::kData].size(1); - Shape<3> s3 = Shape3(n, k, static_cast(in_data[softmax_enum::kData].Size()/n/k)); - Tensor data = in_data[softmax_enum::kData].get_with_shape(s3, s); - Tensor out = out_data[softmax_enum::kOut].get_with_shape(s3, s); + int n = in_data[softmaxout_enum::kData].size(0); + int k = in_data[softmaxout_enum::kData].size(1); + Shape<3> s3 = Shape3(n, k, static_cast(in_data[softmaxout_enum::kData].Size()/n/k)); + Tensor data = in_data[softmaxout_enum::kData].get_with_shape(s3, s); + Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); Softmax(out, data); } else { - Tensor data = in_data[softmax_enum::kData].FlatTo2D(s); - Tensor out = out_data[softmax_enum::kOut].FlatTo2D(s); + Tensor data = in_data[softmaxout_enum::kData].FlatTo2D(s); + Tensor out = out_data[softmaxout_enum::kOut].FlatTo2D(s); Softmax(out, data); } } @@ -82,20 +82,20 @@ class SoftmaxOp : public Operator { CHECK_GE(req.size(), 1); Stream *s = ctx.get_stream(); if (param_.multi_output) { - int n = out_data[softmax_enum::kOut].size(0); - int k = out_data[softmax_enum::kOut].size(1); - Shape<3> s3 = Shape3(n, k, static_cast(out_data[softmax_enum::kOut].Size()/n/k)); - Tensor label = in_data[softmax_enum::kLabel].FlatTo2D(s); - Tensor out = out_data[softmax_enum::kOut].get_with_shape(s3, s); - Tensor grad = in_grad[softmax_enum::kData].get_with_shape(s3, s); + int n = out_data[softmaxout_enum::kOut].size(0); + int k = out_data[softmaxout_enum::kOut].size(1); + Shape<3> s3 = Shape3(n, k, static_cast(out_data[softmaxout_enum::kOut].Size()/n/k)); + Tensor label = in_data[softmaxout_enum::kLabel].FlatTo2D(s); + Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); + Tensor grad = in_grad[softmaxout_enum::kData].get_with_shape(s3, s); SoftmaxGrad(grad, out, label); if (param_.grad_scale < 1.0) { grad *= param_.grad_scale; } } else { - Tensor label = in_data[softmax_enum::kLabel].get(s); - Tensor out = out_data[softmax_enum::kOut].FlatTo2D(s); - Tensor grad = in_grad[softmax_enum::kData].FlatTo2D(s); + Tensor label = in_data[softmaxout_enum::kLabel].get(s); + Tensor out = out_data[softmaxout_enum::kOut].FlatTo2D(s); + Tensor grad = in_grad[softmaxout_enum::kData].FlatTo2D(s); SoftmaxGrad(grad, out, label); if (param_.grad_scale < 1.0) { grad *= param_.grad_scale; @@ -104,15 +104,15 @@ class SoftmaxOp : public Operator { } private: - SoftmaxParam param_; -}; // class SoftmaxOp + SoftmaxOutputParam param_; +}; // class SoftmaxOutputOp // Decalre Factory function, used for dispatch specialization template -Operator* CreateOp(SoftmaxParam param); +Operator* CreateOp(SoftmaxOutputParam param); #if DMLC_USE_CXX11 -class SoftmaxProp : public OperatorProperty { +class SoftmaxOutputProp : public OperatorProperty { public: std::vector ListArguments() const override { return {"data", "label"}; @@ -134,10 +134,10 @@ class SoftmaxProp : public OperatorProperty { const TShape &dshape = in_shape->at(0); if (dshape.ndim() == 0) return false; if (param_.multi_output) { - SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel, + SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1])); } else { - SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel, Shape1(dshape[0])); + SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, Shape1(dshape[0])); } out_shape->clear(); out_shape->push_back(dshape); @@ -145,20 +145,20 @@ class SoftmaxProp : public OperatorProperty { } OperatorProperty* Copy() const override { - auto ptr = new SoftmaxProp(); + auto ptr = new SoftmaxOutputProp(); ptr->param_ = param_; return ptr; } std::string TypeString() const override { - return "Softmax"; + return "SoftmaxOutput"; } std::vector DeclareBackwardDependency( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {in_data[softmax_enum::kLabel], out_data[softmax_enum::kOut]}; + return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; } std::vector > BackwardInplaceOption( @@ -166,20 +166,29 @@ class SoftmaxProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data, const std::vector &in_grad) const override { - return {{out_data[softmax_enum::kOut], in_grad[softmax_enum::kData]}}; + return {{out_data[softmaxout_enum::kOut], in_grad[softmaxout_enum::kData]}}; } std::vector > ForwardInplaceOption( const std::vector &in_data, const std::vector &out_data) const override { - return {{in_data[softmax_enum::kData], out_data[softmax_enum::kOut]}}; + return {{in_data[softmaxout_enum::kData], out_data[softmaxout_enum::kOut]}}; } Operator* CreateOperator(Context ctx) const; - private: - SoftmaxParam param_; -}; // class SoftmaxProp + protected: + SoftmaxOutputParam param_; +}; // class SoftmaxOutputProp + +class DeprecatedSoftmaxProp : public SoftmaxOutputProp { + public: + std::string TypeString() const override { + LOG(INFO) << "Softmax symbol is renamed to SoftmaxOutput. " + << "This API will be deprecated in Dec, 2015"; + return "Softmax"; + } +}; #endif // DMLC_USE_CXX11 } // namespace op diff --git a/src/operator/softmax.cc b/src/operator/softmax.cc index 2c2516ba9bc9..4b8a1649e065 100644 --- a/src/operator/softmax.cc +++ b/src/operator/softmax.cc @@ -9,20 +9,25 @@ namespace mxnet { namespace op { template<> -Operator *CreateOp(SoftmaxParam param) { - return new SoftmaxOp(param); +Operator *CreateOp(SoftmaxOutputParam param) { + return new SoftmaxOutputOp(param); } -Operator *SoftmaxProp::CreateOperator(Context ctx) const { +Operator *SoftmaxOutputProp::CreateOperator(Context ctx) const { DO_BIND_DISPATCH(CreateOp, param_); } -DMLC_REGISTER_PARAMETER(SoftmaxParam); +DMLC_REGISTER_PARAMETER(SoftmaxOutputParam); -MXNET_REGISTER_OP_PROPERTY(Softmax, SoftmaxProp) -.describe("Perform a softmax transformation on input.") +MXNET_REGISTER_OP_PROPERTY(SoftmaxOutput, SoftmaxOutputProp) +.describe("Perform a softmax transformation on input, backprop with logloss.") .add_argument("data", "Symbol", "Input data to softmax.") -.add_arguments(SoftmaxParam::__FIELDS__()); +.add_arguments(SoftmaxOutputParam::__FIELDS__()); + +MXNET_REGISTER_OP_PROPERTY(Softmax, DeprecatedSoftmaxProp) +.describe("DEPRECATED: Perform a softmax transformation on input. Please use SoftmaxOutput") +.add_argument("data", "Symbol", "Input data to softmax.") +.add_arguments(SoftmaxOutputParam::__FIELDS__()); } // namespace op } // namespace mxnet diff --git a/src/operator/softmax.cu b/src/operator/softmax.cu index 0ebbfc16ce68..b2940f38fd61 100644 --- a/src/operator/softmax.cu +++ b/src/operator/softmax.cu @@ -10,8 +10,8 @@ namespace mxnet { namespace op { template<> -Operator *CreateOp(SoftmaxParam param) { - return new SoftmaxOp(param); +Operator *CreateOp(SoftmaxOutputParam param) { + return new SoftmaxOutputOp(param); } } // namespace op diff --git a/tests/python/common/models.py b/tests/python/common/models.py index 71df3f07cf47..2c998afcd1db 100644 --- a/tests/python/common/models.py +++ b/tests/python/common/models.py @@ -24,6 +24,6 @@ def conv(): fl = mx.symbol.Flatten(data = mp2, name="flatten") fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10) - softmax = mx.symbol.Softmax(data = fc2, name = 'sm') + softmax = mx.symbol.SoftmaxOutput(data = fc2, name = 'sm') return softmax diff --git a/tests/python/multi-node/common.py b/tests/python/multi-node/common.py index 0db092462a78..d35a1a1fe3f4 100644 --- a/tests/python/multi-node/common.py +++ b/tests/python/multi-node/common.py @@ -86,7 +86,7 @@ def mlp(): fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10) - softmax = mx.symbol.Softmax(fc3, name = 'sm') + softmax = mx.symbol.SoftmaxOutput(fc3, name = 'sm') return softmax def lenet(): @@ -109,7 +109,7 @@ def lenet(): # second fullc fc2 = mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss - lenet = mx.symbol.Softmax(data=fc2) + lenet = mx.symbol.SoftmaxOutput(data=fc2) return lenet # Basic Conv + BN + ReLU factory @@ -155,5 +155,5 @@ def inception(): pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="global_pool") flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") - softmax = mx.symbol.Softmax(data=fc, name="loss") + softmax = mx.symbol.SoftmaxOutput(data=fc, name="loss") return softmax diff --git a/tests/python/multi-node/imagenet.py b/tests/python/multi-node/imagenet.py index 7663df8d1bad..f4d7c1e35bb3 100644 --- a/tests/python/multi-node/imagenet.py +++ b/tests/python/multi-node/imagenet.py @@ -97,5 +97,5 @@ def inception(nhidden): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SotfmaxOutput(data=fc1, name='softmax') return softmax diff --git a/tests/python/train/test_conv.py b/tests/python/train/test_conv.py index bc068153c24e..9d8f77fd7c65 100644 --- a/tests/python/train/test_conv.py +++ b/tests/python/train/test_conv.py @@ -24,7 +24,7 @@ fl = mx.symbol.Flatten(data = mp2, name="flatten") fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10) -softmax = mx.symbol.Softmax(data = fc2, name = 'sm') +softmax = mx.symbol.SoftmaxOutput(data = fc2, name = 'sm') num_epoch = 1 model = mx.model.FeedForward(softmax, mx.cpu(), diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 5f1c27062066..84a6f17f47d5 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -14,7 +14,7 @@ fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10) -softmax = mx.symbol.Softmax(fc3, name = 'sm') +softmax = mx.symbol.SoftmaxOutput(fc3, name = 'sm') def accuracy(label, pred): py = np.argmax(pred, axis=1) diff --git a/tools/caffe_converter/convert_model.py b/tools/caffe_converter/convert_model.py old mode 100755 new mode 100644 diff --git a/tools/caffe_converter/convert_symbol.py b/tools/caffe_converter/convert_symbol.py index 9b5bcde99848..88acea0b7c4e 100644 --- a/tools/caffe_converter/convert_symbol.py +++ b/tools/caffe_converter/convert_symbol.py @@ -41,7 +41,7 @@ def proto2script(proto_file): name = layer[i].name.replace('/', '_') if layer[i].type == 'Convolution' or layer[i].type == 4: type_string = 'mx.symbol.Convolution' - param = layer[i].convolution_param + param = layer[i].convolution_param pad = 0 if len(param.pad) == 0 else param.pad[0] stride = 1 if len(param.stride) == 0 else param.stride[0] param_string = "num_filter=%d, pad=(%d,%d), kernel=(%d,%d), stride=(%d,%d), no_bias=%s" %\ @@ -67,7 +67,7 @@ def proto2script(proto_file): need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'LRN' or layer[i].type == 15: type_string = 'mx.symbol.LRN' - param = layer[i].lrn_param + param = layer[i].lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" %\ (param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True @@ -82,7 +82,7 @@ def proto2script(proto_file): param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Softmax' or layer[i].type == 20: - type_string = 'mx.symbol.Softmax' + type_string = 'mx.symbol.SoftmaxOutput' # We only support single output network for now. output_name = name @@ -96,7 +96,7 @@ def proto2script(proto_file): need_flatten[name] = True if type_string == '': raise Exception('Unknown Layer %s!' % layer[i].type) - + if type_string != 'split': bottom = layer[i].bottom if param_string != "": @@ -137,4 +137,4 @@ def main(): print(symbol_string) if __name__ == '__main__': - main() \ No newline at end of file + main()