Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[Rename] Softmax->SoftmaxOutput #444

Merged
merged 1 commit into from
Nov 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/python/model.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data, name='fc1', num_hidden=128)
act1 = mx.symbol.Activation(fc1, name='relu1', act_type='relu')
fc2 = mx.symbol.FullyConnected(act1, name='fc2', num_hidden=64)
softmax = mx.symbol.Softmax(fc2, name='sm')
softmax = mx.symbol.SoftmaxOutput(fc2, name='sm')
# create a model
model = mx.model.FeedForward.create(
softmax,
Expand Down
2 changes: 1 addition & 1 deletion doc/python/symbol.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ The following code gives an example of two layer neural network configuration.
>>> net = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
>>> net = mx.symbol.Activation(data=net, name='relu1', act_type="relu")
>>> net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64)
>>> net = mx.symbol.Softmax(data=net, name='out')
>>> net = mx.symbol.SoftmaxOutput(data=net, name='out')
>>> type(net)
<class 'mxnet.symbol.Symbol'>
```
Expand Down
2 changes: 1 addition & 1 deletion example/cifar10/cifar10.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3):
pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="global_pool")
flatten = mx.symbol.Flatten(data=pool, name="flatten1")
fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1")
softmax = mx.symbol.Softmax(data=fc, name="loss")
softmax = mx.symbol.SoftmaxOutput(data=fc, name="loss")

#########################################################

Expand Down
2 changes: 1 addition & 1 deletion example/imagenet/alexnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
dropout2 = mx.symbol.Dropout(data=relu7, p=0.5)
# stage 6
fc3 = mx.symbol.FullyConnected(data=dropout2, num_hidden=1000)
softmax = mx.symbol.Softmax(data=fc3)
softmax = mx.symbol.SoftmaxOutput(data=fc3)


## data
Expand Down
2 changes: 1 addition & 1 deletion example/imagenet/inception-full.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def inception(nhidden, grad_scale):
# linear classifier
flatten = mx.symbol.Flatten(data=avg, name='flatten')
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1')
softmax = mx.symbol.Softmax(data=fc1, name='softmax')
softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
return softmax

softmax = inception(21841, 1.0)
Expand Down
2 changes: 1 addition & 1 deletion example/imagenet/inception.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def inception(nhidden, grad_scale):
# linear classifier
flatten = mx.symbol.Flatten(data=avg, name='flatten')
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1')
softmax = mx.symbol.Softmax(data=fc1, name='softmax')
softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
return softmax

softmax = inception(1000, 1.0)
Expand Down
2 changes: 1 addition & 1 deletion example/memcost/inception_memcost.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def inception(nhidden, grad_scale):
# linear classifier
flatten = mx.symbol.Flatten(data=avg, name='flatten')
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1')
softmax = mx.symbol.Softmax(data=fc1, name='softmax')
softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
return softmax


Expand Down
2 changes: 1 addition & 1 deletion example/mnist/lenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# second fullc
fc2 = mx.symbol.FullyConnected(data=tanh3, num_hidden=10)
# loss
lenet = mx.symbol.Softmax(data=fc2)
lenet = mx.symbol.SoftmaxOutput(data=fc2)

## data
train, val = mnist_iterator(batch_size=100, input_shape=(1,28,28))
Expand Down
2 changes: 1 addition & 1 deletion example/mnist/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
mlp = mx.symbol.Softmax(data = fc3, name = 'mlp')
mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp')

# data

Expand Down
2 changes: 1 addition & 1 deletion example/mnist/mlp_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
mlp = mx.symbol.Softmax(data = fc3, name = 'mlp')
mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp')

# data

Expand Down
2 changes: 1 addition & 1 deletion example/notebooks/cifar-100.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
" # linear classifier\n",
" flatten = mx.symbol.Flatten(data=avg, name='flatten')\n",
" fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc')\n",
" softmax = mx.symbol.Softmax(data=fc1, name='softmax')\n",
" softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')\n",
" return softmax\n",
"\n",
"softmax = inception(100, 1.0)"
Expand Down
2 changes: 1 addition & 1 deletion example/notebooks/cifar-recipe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
"pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7), name=\"global_avg\")\n",
"flatten = mx.symbol.Flatten(data=pool)\n",
"fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n",
"softmax = mx.symbol.Softmax(data=fc)"
"softmax = mx.symbol.SoftmaxOutput(data=fc)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions example/notebooks/composite_symbol.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3691,7 +3691,7 @@
"<!-- softmax0 -->\n",
"<g id=\"node234\" class=\"node\"><title>softmax0</title>\n",
"<polygon fill=\"#b3de69\" stroke=\"black\" points=\"428,-14976 334,-14976 334,-14918 428,-14918 428,-14976\"/>\n",
"<text text-anchor=\"middle\" x=\"381\" y=\"-14943.3\" font-family=\"Times,serif\" font-size=\"14.00\">Softmax</text>\n",
"<text text-anchor=\"middle\" x=\"381\" y=\"-14943.3\" font-family=\"Times,serif\" font-size=\"14.00\">SoftmaxOutput</text>\n",
"</g>\n",
"<!-- softmax0&#45;&gt;fullyconnected0 -->\n",
"<g id=\"edge261\" class=\"edge\"><title>softmax0&#45;&gt;fullyconnected0</title>\n",
Expand Down Expand Up @@ -3739,7 +3739,7 @@
"# linear classifier\n",
"flatten = mx.symbol.Flatten(data=avg)\n",
"fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000)\n",
"softmax = mx.symbol.Softmax(data=fc1)\n",
"softmax = mx.symbol.SoftmaxOutput(data=fc1)\n",
"\n",
"# if you like, you can visualize full network structure\n",
"mx.viz.plot_network(symbol=softmax, shape={\"data\" : (128, 3, 224, 224)})"
Expand Down
2 changes: 1 addition & 1 deletion example/python-howto/multiple_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
fc1 = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
net = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu")
net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64)
out = mx.symbol.Softmax(data=net, name='softmax')
out = mx.symbol.SoftmaxOutput(data=net, name='softmax')
# group fc1 and out together
group = mx.symbol.Group([fc1, out])
print group.list_outputs()
Expand Down
2 changes: 1 addition & 1 deletion example/rnn/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def lstm_unroll(num_lstm_layer, seq_len,
bias=cls_bias,
num_hidden=num_label,
name="t%d_cls" % seqidx)
sm = mx.sym.Softmax(data=fc, label=label, name="t%d_sm" % seqidx)
sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="t%d_sm" % seqidx)
out_prob.append(sm)

for i in range(num_lstm_layer):
Expand Down
6 changes: 3 additions & 3 deletions src/operator/native_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class NativeOp : public Operator {
SyncVec(out_data, "out_data", s, 1);
s->Wait();
param_.pinfo->forward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data());
for (int i = 0; i < out_data.size(); ++i) {
for (index_t i = 0; i < out_data.size(); ++i) {
CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output";
if (req[i] != kNullOp) {
std::stringstream ss;
Expand Down Expand Up @@ -90,7 +90,7 @@ class NativeOp : public Operator {
}
s->Wait();
param_.pinfo->backward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data());
for (int i = 0; i < in_grad.size(); ++i) {
for (index_t i = 0; i < in_grad.size(); ++i) {
CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output";
if (req[i] != kNullOp) {
std::stringstream ss;
Expand Down Expand Up @@ -135,7 +135,7 @@ class NativeOp : public Operator {
const std::string &prefix,
mshadow::Stream<xpu> *stream,
int tag) {
for (int i = 0; i < vec.size(); ++i) {
for (size_t i = 0; i < vec.size(); ++i) {
std::stringstream name;
name << prefix << i;
SyncBuffer(vec[i], name.str(), stream);
Expand Down
89 changes: 49 additions & 40 deletions src/operator/softmax-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
namespace mxnet {
namespace op {

namespace softmax_enum {
enum SoftmaxOpInputs {kData, kLabel};
enum SoftmaxOpOutputs {kOut};
} // namespace softmax_enum
namespace softmaxout_enum {
enum SoftmaxOutputOpInputs {kData, kLabel};
enum SoftmaxOutputOpOutputs {kOut};
} // namespace softmaxout_enum

struct SoftmaxParam : public dmlc::Parameter<SoftmaxParam> {
struct SoftmaxOutputParam : public dmlc::Parameter<SoftmaxOutputParam> {
float grad_scale;
bool multi_output;
DMLC_DECLARE_PARAMETER(SoftmaxParam) {
DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) {
DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f)
.describe("Scale the gradient by a float factor");
DMLC_DECLARE_FIELD(multi_output).set_default(false)
Expand All @@ -39,9 +39,9 @@ struct SoftmaxParam : public dmlc::Parameter<SoftmaxParam> {
};

template<typename xpu>
class SoftmaxOp : public Operator {
class SoftmaxOutputOp : public Operator {
public:
explicit SoftmaxOp(SoftmaxParam param) : param_(param) {}
explicit SoftmaxOutputOp(SoftmaxOutputParam param) : param_(param) {}

virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
Expand All @@ -50,19 +50,19 @@ class SoftmaxOp : public Operator {
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 2) << "Softmax Input: [data, label]";
CHECK_EQ(out_data.size(), 1) << "Softmax Output: [output]";
CHECK_EQ(in_data.size(), 2) << "SoftmaxOutput Input: [data, label]";
CHECK_EQ(out_data.size(), 1) << "SoftmaxOutput Output: [output]";
Stream<xpu> *s = ctx.get_stream<xpu>();
if (param_.multi_output) {
int n = in_data[softmax_enum::kData].size(0);
int k = in_data[softmax_enum::kData].size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data[softmax_enum::kData].Size()/n/k));
Tensor<xpu, 3> data = in_data[softmax_enum::kData].get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3> out = out_data[softmax_enum::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
int n = in_data[softmaxout_enum::kData].size(0);
int k = in_data[softmaxout_enum::kData].size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(in_data[softmaxout_enum::kData].Size()/n/k));
Tensor<xpu, 3> data = in_data[softmaxout_enum::kData].get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3> out = out_data[softmaxout_enum::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
Softmax(out, data);
} else {
Tensor<xpu, 2> data = in_data[softmax_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data[softmax_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> data = in_data[softmaxout_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data[softmaxout_enum::kOut].FlatTo2D<xpu, real_t>(s);
Softmax(out, data);
}
}
Expand All @@ -82,20 +82,20 @@ class SoftmaxOp : public Operator {
CHECK_GE(req.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
if (param_.multi_output) {
int n = out_data[softmax_enum::kOut].size(0);
int k = out_data[softmax_enum::kOut].size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(out_data[softmax_enum::kOut].Size()/n/k));
Tensor<xpu, 2> label = in_data[softmax_enum::kLabel].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 3> out = out_data[softmax_enum::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3> grad = in_grad[softmax_enum::kData].get_with_shape<xpu, 3, real_t>(s3, s);
int n = out_data[softmaxout_enum::kOut].size(0);
int k = out_data[softmaxout_enum::kOut].size(1);
Shape<3> s3 = Shape3(n, k, static_cast<int>(out_data[softmaxout_enum::kOut].Size()/n/k));
Tensor<xpu, 2> label = in_data[softmaxout_enum::kLabel].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 3> out = out_data[softmaxout_enum::kOut].get_with_shape<xpu, 3, real_t>(s3, s);
Tensor<xpu, 3> grad = in_grad[softmaxout_enum::kData].get_with_shape<xpu, 3, real_t>(s3, s);
SoftmaxGrad(grad, out, label);
if (param_.grad_scale < 1.0) {
grad *= param_.grad_scale;
}
} else {
Tensor<xpu, 1> label = in_data[softmax_enum::kLabel].get<xpu, 1, real_t>(s);
Tensor<xpu, 2> out = out_data[softmax_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> grad = in_grad[softmax_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 1> label = in_data[softmaxout_enum::kLabel].get<xpu, 1, real_t>(s);
Tensor<xpu, 2> out = out_data[softmaxout_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> grad = in_grad[softmaxout_enum::kData].FlatTo2D<xpu, real_t>(s);
SoftmaxGrad(grad, out, label);
if (param_.grad_scale < 1.0) {
grad *= param_.grad_scale;
Expand All @@ -104,15 +104,15 @@ class SoftmaxOp : public Operator {
}

private:
SoftmaxParam param_;
}; // class SoftmaxOp
SoftmaxOutputParam param_;
}; // class SoftmaxOutputOp

// Decalre Factory function, used for dispatch specialization
template<typename xpu>
Operator* CreateOp(SoftmaxParam param);
Operator* CreateOp(SoftmaxOutputParam param);

#if DMLC_USE_CXX11
class SoftmaxProp : public OperatorProperty {
class SoftmaxOutputProp : public OperatorProperty {
public:
std::vector<std::string> ListArguments() const override {
return {"data", "label"};
Expand All @@ -134,52 +134,61 @@ class SoftmaxProp : public OperatorProperty {
const TShape &dshape = in_shape->at(0);
if (dshape.ndim() == 0) return false;
if (param_.multi_output) {
SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel,
SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel,
Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]));
} else {
SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel, Shape1(dshape[0]));
SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, Shape1(dshape[0]));
}
out_shape->clear();
out_shape->push_back(dshape);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new SoftmaxProp();
auto ptr = new SoftmaxOutputProp();
ptr->param_ = param_;
return ptr;
}

std::string TypeString() const override {
return "Softmax";
return "SoftmaxOutput";
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {in_data[softmax_enum::kLabel], out_data[softmax_enum::kOut]};
return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]};
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
return {{out_data[softmax_enum::kOut], in_grad[softmax_enum::kData]}};
return {{out_data[softmaxout_enum::kOut], in_grad[softmaxout_enum::kData]}};
}

std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[softmax_enum::kData], out_data[softmax_enum::kOut]}};
return {{in_data[softmaxout_enum::kData], out_data[softmaxout_enum::kOut]}};
}

Operator* CreateOperator(Context ctx) const;

private:
SoftmaxParam param_;
}; // class SoftmaxProp
protected:
SoftmaxOutputParam param_;
}; // class SoftmaxOutputProp

class DeprecatedSoftmaxProp : public SoftmaxOutputProp {
public:
std::string TypeString() const override {
LOG(INFO) << "Softmax symbol is renamed to SoftmaxOutput. "
<< "This API will be deprecated in Dec, 2015";
return "Softmax";
}
};
#endif // DMLC_USE_CXX11

} // namespace op
Expand Down
19 changes: 12 additions & 7 deletions src/operator/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@
namespace mxnet {
namespace op {
template<>
Operator *CreateOp<cpu>(SoftmaxParam param) {
return new SoftmaxOp<cpu>(param);
Operator *CreateOp<cpu>(SoftmaxOutputParam param) {
return new SoftmaxOutputOp<cpu>(param);
}

Operator *SoftmaxProp::CreateOperator(Context ctx) const {
Operator *SoftmaxOutputProp::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateOp, param_);
}

DMLC_REGISTER_PARAMETER(SoftmaxParam);
DMLC_REGISTER_PARAMETER(SoftmaxOutputParam);

MXNET_REGISTER_OP_PROPERTY(Softmax, SoftmaxProp)
.describe("Perform a softmax transformation on input.")
MXNET_REGISTER_OP_PROPERTY(SoftmaxOutput, SoftmaxOutputProp)
.describe("Perform a softmax transformation on input, backprop with logloss.")
.add_argument("data", "Symbol", "Input data to softmax.")
.add_arguments(SoftmaxParam::__FIELDS__());
.add_arguments(SoftmaxOutputParam::__FIELDS__());

MXNET_REGISTER_OP_PROPERTY(Softmax, DeprecatedSoftmaxProp)
.describe("DEPRECATED: Perform a softmax transformation on input. Please use SoftmaxOutput")
.add_argument("data", "Symbol", "Input data to softmax.")
.add_arguments(SoftmaxOutputParam::__FIELDS__());

} // namespace op
} // namespace mxnet
Expand Down
4 changes: 2 additions & 2 deletions src/operator/softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
namespace mxnet {
namespace op {
template<>
Operator *CreateOp<gpu>(SoftmaxParam param) {
return new SoftmaxOp<gpu>(param);
Operator *CreateOp<gpu>(SoftmaxOutputParam param) {
return new SoftmaxOutputOp<gpu>(param);
}

} // namespace op
Expand Down
Loading