address comments

dmlc · Jul 23, 2019 · c39ea0c · c39ea0c
1 parent 5712bd5
commit c39ea0c
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 19 deletions.
diff --git a/docs/tutorials/deployment/int8_inference.py b/docs/tutorials/deployment/int8_inference.py
@@ -12,7 +12,7 @@
 GluonCV delivered some quantized models to improve the performance and reduce the deployment costs for the computer vision inference tasks. In real production, there are two main benefits of lower precision (INT8). First, the computation can be accelerated by the low precision instruction, like Intel Vector Neural Network Instruction (VNNI). Second, lower precision data type would save the memory bandwidth and allow for better cache locality and save the power. The new feature can get up to 4X performance speedup in the latest `AWS EC2 C5 instances <https://aws.amazon.com/blogs/aws/now-available-new-c5-instance-sizes-and-bare-metal-instances/>`_ under the `Intel Deep Learning Boost (VNNI) <https://www.intel.ai/intel-deep-learning-boost/>`_ enabled hardware with less than 0.5% accuracy drop.
 
 Please checkout `verify_pretrained.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/classification/imagenet/verify_pretrained.py>`_ for imagenet inference,
-`eval_ssd.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/detection/ssd/eval_ssd.py>`_ for SSD inference, and `eval_segmentation.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/segmentation/eval_segmentation.py>`_ 
+`eval_ssd.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/detection/ssd/eval_ssd.py>`_ for SSD inference, and `test.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/segmentation/test.py>`_ 
 for FCN inference.
 
 Performance

diff --git a/scripts/segmentation/eval_segmentation.py → scripts/segmentation/test.py b/scripts/segmentation/eval_segmentation.py → scripts/segmentation/test.py
@@ -6,7 +6,7 @@
 import sys
 
 import mxnet as mx
-from mxnet import gluon
+from mxnet import gluon, ndarray as nd
 from mxnet.gluon.data.vision import transforms
 
 import gluoncv
@@ -22,6 +22,8 @@ def parse_args():
                         help='model name (default: fcn)')
     parser.add_argument('--backbone', type=str, default='resnet101',
                         help='base network')
+    parser.add_argument('--image-shape', type=int, default=480,
+                        help='image shape')
     parser.add_argument('--base-size', type=int, default=520,
                         help='base image size')
     parser.add_argument('--crop-size', type=int, default=480,
@@ -33,7 +35,8 @@ def parse_args():
     parser.add_argument('--quantized', action='store_true', 
                         help='whether to use quantized model')
     parser.add_argument('--batch-size', type=int, default=16)
-    parser.add_argument('--num-batches', type=int, default=100)
+    parser.add_argument('--num-iterations', type=int, default=100,
+                        help='number of benchmarking iterations.')
     parser.add_argument('--workers', type=int, default=4,
                         help='number of workers for data loading')
     parser.add_argument('--pretrained', action="store_true",
@@ -49,6 +52,9 @@ def parse_args():
     # evaluation only
     parser.add_argument('--eval', action='store_true', default=False,
                         help='evaluation only')
+    # dummy benchmark
+    parser.add_argument('--benchmark', action='store_true', default=False,
+                        help='whether to use dummy data for benchmark')
 
     args = parser.parse_args()
 
@@ -81,13 +87,13 @@ def test(args, model):
         testset = get_segmentation_dataset(
             args.dataset, split='test', mode=args.mode, **data_kwargs)
     size = len(testset)
+
     # get dataloader
+    batchify_fn = ms_batchify_fn if args.mode == 'test' else None
     test_data = gluon.data.DataLoader(
-            testset, args.batch_size, last_batch='keep', shuffle=False, num_workers=args.workers)
+            testset, args.batch_size, batchify_fn=batchify_fn, last_batch='keep', shuffle=False, num_workers=args.workers)
 
     print(model)
-    if not args.eval:
-        evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx)
     metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class)
 
     tbar = tqdm(test_data)
@@ -99,17 +105,18 @@ def test(args, model):
             data = mx.gluon.utils.split_and_load(batch, ctx_list=args.ctx, batch_axis=0, even_split=False)
             outputs = None
             for x in data:
-                output = model.forward(x)[0]
+                output = model.forward(x)
                 outputs = output if outputs is None else nd.concat(outputs, output, axis=0)
-            outputs = [outputs]
             metric.update(targets, outputs)
             pixAcc, mIoU = metric.get()
             tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
         else:
-            im_paths = dsts
-            predicts = evaluator.parallel_forward(batch)
-            for predict, impath in zip(predicts, im_paths):
-                predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 1)).asnumpy() + \
+            for data, impath in zip(batch, im_paths):
+                data = data.as_in_context(args.ctx[0])
+                if len(data.shape) < 4:
+                    data = nd.expand_dims(data, axis=0)
+                predict = model.forward(data)[0]
+                predict = mx.nd.squeeze(mx.nd.argmax(predict, 1)).asnumpy() + \
                     testset.pred_offset
                 mask = get_color_pallete(predict, args.dataset)
                 outname = os.path.splitext(impath)[0] + '.png'
@@ -118,6 +125,26 @@ def test(args, model):
     print('Inference speed with batchsize %d is %.2f img/sec' % (args.batch_size, speed))
 
 
+def benchmarking(args, model):
+    print('-----benchmarking on %s -----' % args.model)
+    bs = args.batch_size
+    num_iterations = args.num_iterations
+    input_shape = (bs, 3, args.image_shape, args.image_shape)
+    size = num_iterations * bs
+    data = [mx.random.uniform(-1.0, 1.0, shape=input_shape, ctx=args.ctx[0], dtype='float32')]
+    dry_run = 5
+    with tqdm(total=size+dry_run*bs) as pbar:
+        for n in range(dry_run + num_iterations):
+            if n == dry_run:
+                tic = time.time()
+            outputs = model.forward(data[0])
+            for output in outputs:
+                output.wait_to_read()
+            pbar.update(bs)
+    speed = size / (time.time() - tic)
+    print('Throughput is %f imgs/sec' % speed)
+
+
 if __name__ == "__main__":
     args = parse_args()
 
@@ -139,12 +166,6 @@ def test(args, model):
     if withQuantization and args.quantized:
         model_prefix += '_int8'
 
-    if args.quantized and args.mode != 'val':
-        raise ValueError("Currently, %s mode or is not supported by quantized model." % args.mode)
-
-    if args.quantized and args.eval == False:
-        raise ValueError("Currently, only evaluation is supported by quantized model.")
-
      # create network
     if args.pretrained:
         model = get_model(model_prefix, pretrained=True)
@@ -169,4 +190,7 @@ def test(args, model):
         model.hybridize()
 
     print('Testing model: ', args.resume)
-    test(args, model)
+    if not args.benchmark:
+        test(args, model)
+    else:
+        benchmarking(args, model)
diff --git a/scripts/segmentation/train_segmentation.py → scripts/segmentation/train.py b/scripts/segmentation/train_segmentation.py → scripts/segmentation/train.py
diff --git a/tests/unittests/test_model_zoo.py b/tests/unittests/test_model_zoo.py
@@ -411,6 +411,15 @@ def test_quantized_ssd_models():
     _test_model_list(model_list, ctx, x)
 
 
+@unittest.skip("temporarily disabled to fallback to non-mkl version")
+@with_cpu(0)
+def test_quantized_fcn_models():
+    model_list = ['fcn_resnet101_voc_int8', 'fcn_resnet101_coco_int8-symbol']
+    ctx = mx.context.current_context()
+    x = mx.random.uniform(shape=(1, 3, 480, 480), ctx=ctx)
+    _test_model_list(model_list, ctx, x)
+
+
 if __name__ == '__main__':
     import nose