diff --git a/examples/v1alpha3/bayesianoptimization-example.yaml b/examples/v1alpha3/bayesianoptimization-example.yaml
index 8cb97a180ff..ffec2f89bce 100644
--- a/examples/v1alpha3/bayesianoptimization-example.yaml
+++ b/examples/v1alpha3/bayesianoptimization-example.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: bayesianoptimization
     algorithmSettings:
@@ -51,10 +51,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: docker.io/kubeflowkatib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                   {{- with .HyperParameters}}
                   {{- range .}}
diff --git a/examples/v1alpha3/grid-example.yaml b/examples/v1alpha3/grid-example.yaml
index ce3beae687b..26f5514335c 100644
--- a/examples/v1alpha3/grid-example.yaml
+++ b/examples/v1alpha3/grid-example.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: grid
   parallelTrialCount: 3
@@ -32,8 +32,8 @@ spec:
     - name: --num-epochs
       parameterType: int
       feasibleSpace:
-        min: "20"
-        max: "40"
+        min: "10"
+        max: "15"
     # Grid doesn't support categorical, refer to https://chocolate.readthedocs.io/api/sample.html#chocolate.Grid
     # - name: --optimizer
     #   parameterType: categorical
@@ -55,10 +55,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: docker.io/kubeflowkatib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                   {{- with .HyperParameters}}
                   {{- range .}}
diff --git a/examples/v1alpha3/hyperband-example.yaml b/examples/v1alpha3/hyperband-example.yaml
index f3f697285e9..d2001a64ea0 100644
--- a/examples/v1alpha3/hyperband-example.yaml
+++ b/examples/v1alpha3/hyperband-example.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: hyperband
     algorithmSettings:
@@ -58,10 +58,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: kubeflowkatib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                   {{- with .HyperParameters}}
                   {{- range .}}
diff --git a/examples/v1alpha3/mxnet-mnist/Dockerfile b/examples/v1alpha3/mxnet-mnist/Dockerfile
new file mode 100644
index 00000000000..a08dc032d6b
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:16.04
+
+RUN apt-get update && \
+    apt-get install -y wget python3-dev gcc && \
+    wget https://bootstrap.pypa.io/get-pip.py && \
+    python3 get-pip.py
+
+RUN pip3 install mxnet
+
+ADD . /opt/mxnet-mnist
+WORKDIR /opt/mxnet-mnist
+
+ENTRYPOINT ["python3", "/opt/mxnet-mnist/mnist.py"]
diff --git a/examples/v1alpha3/mxnet-mnist/README.md b/examples/v1alpha3/mxnet-mnist/README.md
new file mode 100644
index 00000000000..5707be4696a
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/README.md
@@ -0,0 +1,6 @@
+# Mxnet image classification example
+This is Mxnet image classification training container with recording time of the metrics.
+
+It uses only simple multilayer perceptron network (mlp).
+
+If you want to read more about this example, visit official [incubator-mxnet](https://github.com/apache/incubator-mxnet/tree/master/example/image-classification) github repository.
diff --git a/examples/v1alpha3/mxnet-mnist/common/__init__.py b/examples/v1alpha3/mxnet-mnist/common/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/v1alpha3/mxnet-mnist/common/fit.py b/examples/v1alpha3/mxnet-mnist/common/fit.py
new file mode 100644
index 00000000000..8e8b0197960
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/common/fit.py
@@ -0,0 +1,340 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" example train fit utility """
+import logging
+import os
+import time
+import re
+import math
+import mxnet as mx
+
+def get_epoch_size(args, kv):
+    return math.ceil(int(args.num_examples / kv.num_workers) / args.batch_size)
+
+def _get_lr_scheduler(args, kv):
+    if 'lr_factor' not in args or args.lr_factor >= 1:
+        return (args.lr, None)
+    epoch_size = get_epoch_size(args, kv)
+    begin_epoch = args.load_epoch if args.load_epoch else 0
+    if 'pow' in args.lr_step_epochs:
+        lr = args.lr
+        max_up = args.num_epochs * epoch_size
+        pwr = float(re.sub('pow[- ]*', '', args.lr_step_epochs))
+        poly_sched = mx.lr_scheduler.PolyScheduler(max_up, lr, pwr)
+        return (lr, poly_sched)
+    step_epochs = [int(l) for l in args.lr_step_epochs.split(',')]
+    lr = args.lr
+    for s in step_epochs:
+        if begin_epoch >= s:
+            lr *= args.lr_factor
+    if lr != args.lr:
+        logging.info('Adjust learning rate to %e for epoch %d',
+                     lr, begin_epoch)
+
+    steps = [epoch_size * (x - begin_epoch)
+             for x in step_epochs if x - begin_epoch > 0]
+    if steps:
+        return (lr, mx.lr_scheduler.MultiFactorScheduler(step=steps, factor=args.lr_factor,
+                                                         base_lr=args.lr))
+    else:
+        return (lr, None)
+
+def _load_model(args, rank=0):
+    if 'load_epoch' not in args or args.load_epoch is None:
+        return (None, None, None)
+    assert args.model_prefix is not None
+    model_prefix = args.model_prefix
+    if rank > 0 and os.path.exists("%s-%d-symbol.json" % (model_prefix, rank)):
+        model_prefix += "-%d" % (rank)
+    sym, arg_params, aux_params = mx.model.load_checkpoint(
+        model_prefix, args.load_epoch)
+    logging.info('Loaded model %s_%04d.params', model_prefix, args.load_epoch)
+    return (sym, arg_params, aux_params)
+
+
+def _save_model(args, rank=0):
+    if args.model_prefix is None:
+        return None
+    return mx.callback.do_checkpoint(args.model_prefix if rank == 0 else "%s-%d" % (
+        args.model_prefix, rank), period=args.save_period)
+
+
+def add_fit_args(parser):
+    """
+    parser : argparse.ArgumentParser
+    return a parser added with args required by fit
+    """
+    train = parser.add_argument_group('Training', 'model training')
+    train.add_argument('--network', type=str,
+                       help='the neural network to use')
+    train.add_argument('--num-layers', type=int,
+                       help='number of layers in the neural network, \
+                             required by some networks such as resnet')
+    train.add_argument('--gpus', type=str,
+                       help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu')
+    train.add_argument('--kv-store', type=str, default='device',
+                       help='key-value store type')
+    train.add_argument('--num-epochs', type=int, default=100,
+                       help='max num of epochs')
+    train.add_argument('--lr', type=float, default=0.1,
+                       help='initial learning rate')
+    train.add_argument('--lr-factor', type=float, default=0.1,
+                       help='the ratio to reduce lr on each step')
+    train.add_argument('--lr-step-epochs', type=str,
+                       help='the epochs to reduce the lr, e.g. 30,60')
+    train.add_argument('--initializer', type=str, default='default',
+                       help='the initializer type')
+    train.add_argument('--optimizer', type=str, default='sgd',
+                       help='the optimizer type')
+    train.add_argument('--mom', type=float, default=0.9,
+                       help='momentum for sgd')
+    train.add_argument('--wd', type=float, default=0.0001,
+                       help='weight decay for sgd')
+    train.add_argument('--batch-size', type=int, default=128,
+                       help='the batch size')
+    train.add_argument('--disp-batches', type=int, default=20,
+                       help='show progress for every n batches')
+    train.add_argument('--model-prefix', type=str,
+                       help='model prefix')
+    train.add_argument('--save-period', type=int, default=1, help='params saving period')
+    parser.add_argument('--monitor', dest='monitor', type=int, default=0,
+                        help='log network parameters every N iters if larger than 0')
+    train.add_argument('--load-epoch', type=int,
+                       help='load the model on an epoch using the model-load-prefix')
+    train.add_argument('--top-k', type=int, default=0,
+                       help='report the top-k accuracy. 0 means no report.')
+    train.add_argument('--loss', type=str, default='',
+                       help='show the cross-entropy or nll loss. ce strands for cross-entropy, nll-loss stands for likelihood loss')
+    train.add_argument('--test-io', type=int, default=0,
+                       help='1 means test reading speed without training')
+    train.add_argument('--dtype', type=str, default='float32',
+                       help='precision: float32 or float16')
+    train.add_argument('--gc-type', type=str, default='none',
+                       help='type of gradient compression to use, \
+                             takes `2bit` or `none` for now')
+    train.add_argument('--gc-threshold', type=float, default=0.5,
+                       help='threshold for 2bit gradient compression')
+    # additional parameters for large batch sgd
+    train.add_argument('--macrobatch-size', type=int, default=0,
+                       help='distributed effective batch size')
+    train.add_argument('--warmup-epochs', type=int, default=5,
+                       help='the epochs to ramp-up lr to scaled large-batch value')
+    train.add_argument('--warmup-strategy', type=str, default='linear',
+                       help='the ramping-up strategy for large batch sgd')
+    train.add_argument('--profile-worker-suffix', type=str, default='',
+                       help='profile workers actions into this file. During distributed training\
+                             filename saved will be rank1_ followed by this suffix')
+    train.add_argument('--profile-server-suffix', type=str, default='',
+                       help='profile server actions into a file with name like rank1_ followed by this suffix \
+                             during distributed training')
+    train.add_argument('--use-imagenet-data-augmentation', type=int, default=0,
+                       help='enable data augmentation of ImageNet data, default disabled')
+    return train
+
+
+def fit(args, network, data_loader, **kwargs):
+    """
+    train a model
+    args : argparse returns
+    network : the symbol definition of the nerual network
+    data_loader : function that returns the train and val data iterators
+    """
+    # kvstore
+    kv = mx.kvstore.create(args.kv_store)
+    if args.gc_type != 'none':
+        kv.set_gradient_compression({'type': args.gc_type,
+                                     'threshold': args.gc_threshold})
+    if args.profile_server_suffix:
+        mx.profiler.set_config(filename=args.profile_server_suffix, profile_all=True, profile_process='server')
+        mx.profiler.set_state(state='run', profile_process='server')
+
+    if args.profile_worker_suffix:
+        if kv.num_workers > 1:
+            filename = 'rank' + str(kv.rank) + '_' + args.profile_worker_suffix
+        else:
+            filename = args.profile_worker_suffix
+        mx.profiler.set_config(filename=filename, profile_all=True, profile_process='worker')
+        mx.profiler.set_state(state='run', profile_process='worker')
+
+    # logging
+    head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s'
+    logging.basicConfig(level=logging.DEBUG, format=head)
+    logging.info('start with arguments %s', args)
+    
+    epoch_size = get_epoch_size(args, kv)
+
+    # data iterators
+    (train, val) = data_loader(args, kv)
+    if 'dist' in args.kv_store and not 'async' in args.kv_store:
+        logging.info('Resizing training data to %d batches per machine', epoch_size)
+        # resize train iter to ensure each machine has same number of batches per epoch
+        # if not, dist_sync can hang at the end with one machine waiting for other machines
+        train = mx.io.ResizeIter(train, epoch_size)
+
+    if args.test_io:
+        tic = time.time()
+        for i, batch in enumerate(train):
+            if isinstance(batch, list):
+                for b in batch:
+                    for j in b.data:
+                        j.wait_to_read()
+            else:
+                for j in batch.data:
+                    j.wait_to_read()
+            if (i + 1) % args.disp_batches == 0:
+                logging.info('Batch [%d]\tSpeed: %.2f samples/sec', i,
+                             args.disp_batches * args.batch_size / (time.time() - tic))
+                tic = time.time()
+        return
+
+    # load model
+    if 'arg_params' in kwargs and 'aux_params' in kwargs:
+        arg_params = kwargs['arg_params']
+        aux_params = kwargs['aux_params']
+    else:
+        sym, arg_params, aux_params = _load_model(args, kv.rank)
+        if sym is not None:
+            assert sym.tojson() == network.tojson()
+
+    # save model
+    checkpoint = _save_model(args, kv.rank)
+
+    # devices for training
+    devs = mx.cpu() if args.gpus is None or args.gpus == "" else [
+        mx.gpu(int(i)) for i in args.gpus.split(',')]
+
+    # learning rate
+    lr, lr_scheduler = _get_lr_scheduler(args, kv)
+
+    # create model
+    model = mx.mod.Module(
+        context=devs,
+        symbol=network
+    )
+
+    lr_scheduler = lr_scheduler
+    optimizer_params = {
+        'learning_rate': lr,
+        'wd': args.wd,
+        'lr_scheduler': lr_scheduler,
+        'multi_precision': True}
+
+    # Only a limited number of optimizers have 'momentum' property
+    has_momentum = {'sgd', 'dcasgd', 'nag', 'signum', 'lbsgd'}
+    if args.optimizer in has_momentum:
+        optimizer_params['momentum'] = args.mom
+
+    monitor = mx.mon.Monitor(
+        args.monitor, pattern=".*") if args.monitor > 0 else None
+
+    # A limited number of optimizers have a warmup period
+    has_warmup = {'lbsgd', 'lbnag'}
+    if args.optimizer in has_warmup:
+        nworkers = kv.num_workers
+        if epoch_size < 1:
+            epoch_size = 1
+        macrobatch_size = args.macrobatch_size
+        if macrobatch_size < args.batch_size * nworkers:
+            macrobatch_size = args.batch_size * nworkers
+        #batch_scale = round(float(macrobatch_size) / args.batch_size / nworkers +0.4999)
+        batch_scale = math.ceil(
+            float(macrobatch_size) / args.batch_size / nworkers)
+        optimizer_params['updates_per_epoch'] = epoch_size
+        optimizer_params['begin_epoch'] = args.load_epoch if args.load_epoch else 0
+        optimizer_params['batch_scale'] = batch_scale
+        optimizer_params['warmup_strategy'] = args.warmup_strategy
+        optimizer_params['warmup_epochs'] = args.warmup_epochs
+        optimizer_params['num_epochs'] = args.num_epochs
+
+    if args.initializer == 'default':
+        if args.network == 'alexnet':
+            # AlexNet will not converge using Xavier
+            initializer = mx.init.Normal()
+            # VGG will not trend to converge using Xavier-Gaussian
+        elif args.network and 'vgg' in args.network:
+            initializer = mx.init.Xavier()
+        else:
+            initializer = mx.init.Xavier(
+                rnd_type='gaussian', factor_type="in", magnitude=2)
+    # initializer   = mx.init.Xavier(factor_type="in", magnitude=2.34),
+    elif args.initializer == 'xavier':
+        initializer = mx.init.Xavier()
+    elif args.initializer == 'msra':
+        initializer = mx.init.MSRAPrelu()
+    elif args.initializer == 'orthogonal':
+        initializer = mx.init.Orthogonal()
+    elif args.initializer == 'normal':
+        initializer = mx.init.Normal()
+    elif args.initializer == 'uniform':
+        initializer = mx.init.Uniform()
+    elif args.initializer == 'one':
+        initializer = mx.init.One()
+    elif args.initializer == 'zero':
+        initializer = mx.init.Zero()
+
+    # evaluation metrices
+    eval_metrics = ['accuracy']
+    if args.top_k > 0:
+        eval_metrics.append(mx.metric.create(
+            'top_k_accuracy', top_k=args.top_k))
+
+    supported_loss = ['ce', 'nll_loss']
+    if len(args.loss) > 0:
+        # ce or nll loss is only applicable to softmax output
+        loss_type_list = args.loss.split(',')
+        if 'softmax_output' in network.list_outputs():
+            for loss_type in loss_type_list:
+                loss_type = loss_type.strip()
+                if loss_type == 'nll':
+                    loss_type = 'nll_loss'
+                if loss_type not in supported_loss:
+                    logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \
+                                    'negative likelihood loss is supported!')
+                else:
+                    eval_metrics.append(mx.metric.create(loss_type))
+        else:
+            logging.warning("The output is not softmax_output, loss argument will be skipped!")
+
+    # callbacks that run after each batch
+    batch_end_callbacks = [mx.callback.Speedometer(
+        args.batch_size, args.disp_batches)]
+    if 'batch_end_callback' in kwargs:
+        cbs = kwargs['batch_end_callback']
+        batch_end_callbacks += cbs if isinstance(cbs, list) else [cbs]
+
+    # run
+    model.fit(train,
+              begin_epoch=args.load_epoch if args.load_epoch else 0,
+              num_epoch=args.num_epochs,
+              eval_data=val,
+              eval_metric=eval_metrics,
+              kvstore=kv,
+              optimizer=args.optimizer,
+              optimizer_params=optimizer_params,
+              initializer=initializer,
+              arg_params=arg_params,
+              aux_params=aux_params,
+              batch_end_callback=batch_end_callbacks,
+              epoch_end_callback=checkpoint,
+              allow_missing=True,
+              monitor=monitor)
+
+    if args.profile_server_suffix:
+        mx.profiler.set_state(state='run', profile_process='server')
+    if args.profile_worker_suffix:
+        mx.profiler.set_state(state='run', profile_process='worker')
diff --git a/examples/v1alpha3/mxnet-mnist/common/utils.py b/examples/v1alpha3/mxnet-mnist/common/utils.py
new file mode 100644
index 00000000000..87717020fdc
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/common/utils.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import errno
+
+import mxnet as mx
+
+def download_file(url, local_fname=None, force_write=False):
+    # requests is not default installed
+    import requests
+    if local_fname is None:
+        local_fname = url.split('/')[-1]
+    if not force_write and os.path.exists(local_fname):
+        return local_fname
+
+    dir_name = os.path.dirname(local_fname)
+
+    if dir_name != "":
+        if not os.path.exists(dir_name):
+            try: # try to create the directory if it doesn't exists
+                os.makedirs(dir_name)
+            except OSError as exc:
+                if exc.errno != errno.EEXIST:
+                    raise
+
+    r = requests.get(url, stream=True)
+    assert r.status_code == 200, "failed to open %s" % url
+    with open(local_fname, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=1024):
+            if chunk: # filter out keep-alive new chunks
+                f.write(chunk)
+    return local_fname
diff --git a/examples/v1alpha3/mxnet-mnist/mnist.py b/examples/v1alpha3/mxnet-mnist/mnist.py
new file mode 100644
index 00000000000..b3478fdc29e
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/mnist.py
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Train mnist, see more explanation at https://mxnet.io/tutorials/python/mnist.html
+"""
+import os
+import argparse
+import logging
+import mxnet as mx
+import numpy as np
+import gzip, struct
+from common import fit
+from common import utils
+# This example only for mlp network 
+from symbols import mlp
+
+# Use this format (%Y-%m-%dT%H:%M:%SZ) to record timestamp of the metrics
+logging.basicConfig(
+    format="%(asctime)s %(levelname)-8s %(message)s",
+    datefmt="%Y-%m-%dT%H:%M:%SZ",
+    level=logging.DEBUG)
+
+def read_data(label, image):
+    """
+    download and read data into numpy
+    """
+    base_url = 'http://yann.lecun.com/exdb/mnist/'
+    with gzip.open(utils.download_file(base_url+label, os.path.join('data',label))) as flbl:
+        magic, num = struct.unpack(">II", flbl.read(8))
+        label = np.fromstring(flbl.read(), dtype=np.int8)
+    with gzip.open(utils.download_file(base_url+image, os.path.join('data',image)), 'rb') as fimg:
+        magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
+        image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
+    return (label, image)
+
+
+def to4d(img):
+    """
+    reshape to 4D arrays
+    """
+    return img.reshape(img.shape[0], 1, 28, 28).astype(np.float32)/255
+
+def get_mnist_iter(args, kv):
+    """
+    create data iterator with NDArrayIter
+    """
+    (train_lbl, train_img) = read_data(
+            'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz')
+    (val_lbl, val_img) = read_data(
+            't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz')
+    train = mx.io.NDArrayIter(
+        to4d(train_img), train_lbl, args.batch_size, shuffle=True)
+    val = mx.io.NDArrayIter(
+        to4d(val_img), val_lbl, args.batch_size)
+    return (train, val)
+
+if __name__ == '__main__':
+    # parse args
+    parser = argparse.ArgumentParser(description="train mnist",
+                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--num-classes', type=int, default=10,
+                        help='the number of classes')
+    parser.add_argument('--num-examples', type=int, default=60000,
+                        help='the number of training examples')
+
+    parser.add_argument('--add_stn',  action="store_true", default=False, help='Add Spatial Transformer Network Layer (lenet only)')
+    parser.add_argument('--image_shape', default='1, 28, 28', help='shape of training images')
+
+    fit.add_fit_args(parser)
+    parser.set_defaults(
+        # network
+        network        = 'mlp',
+        # train
+        gpus           = None,
+        batch_size     = 64,
+        disp_batches   = 100,
+        num_epochs     = 10,
+        lr             = .05,
+        lr_step_epochs = '10'
+    )
+    args = parser.parse_args()
+
+    # load mlp network
+    sym = mlp.get_symbol(**vars(args))
+
+    # train
+    fit.fit(args, sym, get_mnist_iter)
diff --git a/examples/v1alpha3/mxnet-mnist/symbols/__init__.py b/examples/v1alpha3/mxnet-mnist/symbols/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/examples/v1alpha3/mxnet-mnist/symbols/mlp.py b/examples/v1alpha3/mxnet-mnist/symbols/mlp.py
new file mode 100644
index 00000000000..4b190b29db9
--- /dev/null
+++ b/examples/v1alpha3/mxnet-mnist/symbols/mlp.py
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+a simple multilayer perceptron
+"""
+import mxnet as mx
+
+def get_symbol(num_classes=10, **kwargs):
+    data = mx.symbol.Variable('data')
+    data = mx.sym.Flatten(data=data)
+    fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
+    act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
+    fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
+    act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
+    fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
+    mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
+    return mlp
diff --git a/examples/v1alpha3/random-example.yaml b/examples/v1alpha3/random-example.yaml
index 26e0a61a81f..9742e2b2706 100644
--- a/examples/v1alpha3/random-example.yaml
+++ b/examples/v1alpha3/random-example.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: random
   parallelTrialCount: 3
@@ -48,10 +48,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: docker.io/kubeflowkatib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                   {{- with .HyperParameters}}
                   {{- range .}}
diff --git a/examples/v1alpha3/tpe-example.yaml b/examples/v1alpha3/tpe-example.yaml
index 664976ce720..524dfdb2d17 100644
--- a/examples/v1alpha3/tpe-example.yaml
+++ b/examples/v1alpha3/tpe-example.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: tpe
   parallelTrialCount: 3
@@ -48,10 +48,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: docker.io/kubeflowkatib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                   {{- with .HyperParameters}}
                   {{- range .}}
diff --git a/manifests/v1alpha3/katib-controller/trialTemplateConfigmap.yaml b/manifests/v1alpha3/katib-controller/trialTemplateConfigmap.yaml
index c0e9f0009d6..2b9e8e56727 100644
--- a/manifests/v1alpha3/katib-controller/trialTemplateConfigmap.yaml
+++ b/manifests/v1alpha3/katib-controller/trialTemplateConfigmap.yaml
@@ -15,10 +15,10 @@ data:
         spec:
           containers:
           - name: {{.Trial}}
-            image: docker.io/katib/mxnet-mnist-example
+            image: docker.io/kubeflowkatib/mxnet-mnist
             command:
-            - "python"
-            - "/mxnet/example/image-classification/train_mnist.py"
+            - "python3"
+            - "/opt/mxnet-mnist/mnist.py"
             - "--batch-size=64"
             {{- with .HyperParameters}}
             {{- range .}}
diff --git a/test/e2e/v1alpha3/invalid-experiment.yaml b/test/e2e/v1alpha3/invalid-experiment.yaml
index 918e2143c84..ca6065346fb 100644
--- a/test/e2e/v1alpha3/invalid-experiment.yaml
+++ b/test/e2e/v1alpha3/invalid-experiment.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: random
   trialTemplate:
@@ -27,10 +27,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: katib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                 restartPolicy: Never
   parameters:
diff --git a/test/e2e/v1alpha3/valid-experiment.yaml b/test/e2e/v1alpha3/valid-experiment.yaml
index e1453a9093a..487d6a581b5 100644
--- a/test/e2e/v1alpha3/valid-experiment.yaml
+++ b/test/e2e/v1alpha3/valid-experiment.yaml
@@ -11,7 +11,7 @@ spec:
     goal: 0.99
     objectiveMetricName: Validation-accuracy
     additionalMetricNames:
-      - accuracy
+      - Train-accuracy
   algorithm:
     algorithmName: random
   trialTemplate:
@@ -27,10 +27,10 @@ spec:
               spec:
                 containers:
                 - name: {{.Trial}}
-                  image: katib/mxnet-mnist-example
+                  image: docker.io/kubeflowkatib/mxnet-mnist
                   command:
-                  - "python"
-                  - "/mxnet/example/image-classification/train_mnist.py"
+                  - "python3"
+                  - "/opt/mxnet-mnist/mnist.py"
                   - "--batch-size=64"
                 restartPolicy: Never
   parameters:
diff --git a/test/scripts/v1alpha3/create-cluster.sh b/test/scripts/v1alpha3/create-cluster.sh
index c409eceb874..15b420e14fb 100755
--- a/test/scripts/v1alpha3/create-cluster.sh
+++ b/test/scripts/v1alpha3/create-cluster.sh
@@ -33,6 +33,7 @@ echo "Creating GPU cluster"
 gcloud --project ${PROJECT} beta container clusters create ${CLUSTER_NAME} \
     --zone ${ZONE} \
     --machine-type=n1-standard-8 \
+    --num-nodes=6 \
     --cluster-version 1.14
 echo "Configuring kubectl"
 gcloud --project ${PROJECT} container clusters get-credentials ${CLUSTER_NAME} \