diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index 01051ab7c8e4..a56f8531aa49 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -27,6 +27,7 @@ import logging import os import shutil +import warnings import numpy as np from ..base import _LIB, check_call, py_str from ..base import c_array, c_str, mx_uint, c_str_array @@ -419,6 +420,7 @@ def __init__(self, calib_data): else: data_example = [data_example] # suppose there must be one label in data_example + # TODO(xinyu-intel): little tricky here, need to refactor. num_data = len(data_example) assert num_data > 0 # here reshape is to handle the 5D/6D input data @@ -426,6 +428,10 @@ def __init__(self, calib_data): data_example[0] = data_example[0].reshape((-1,) + data_example[0].shape[2:]) self.provide_data = [DataDesc(name='data', shape=(data_example[0].shape))] self.provide_data += [DataDesc(name='data{}'.format(i), shape=x.shape) for i, x in enumerate(data_example[1:])] + # data0, data1, ..., label + if num_data >= 3: + self.provide_data = [DataDesc(name='data{}'.format(i), shape=x.shape) + for i, x in enumerate(data_example[0:])] self.batch_size = data_example[0].shape[0] self.reset() @@ -627,8 +633,9 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, return qsym, qarg_params, aux_params def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), - excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', - quantized_dtype='int8', quantize_mode='full', logger=None): + excluded_sym_names=None, excluded_op_names=None, + calib_mode='entropy', quantized_dtype='int8', quantize_mode='full', + LayerOutputCollector=None, logger=None): """User-level API for generating a quantized model from a FP32 model w/o calibration and a collector for naive or entropy calibration. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -667,6 +674,8 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), The mode that quantization pass to apply. Support 'full' and 'smart'. 'full' means quantize all operator if possible. 'smart' means quantization pass will smartly choice which operator should be quantized. + LayerOutputCollector : class + For customize calibration method usage. logger : Object A logging object for printing information during the process of quantization. Returns @@ -711,9 +720,14 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), if logger: logger.info( 'Create a layer output minmax collector for naive calibration') + elif calib_mode == 'customize' and LayerOutputCollector is not None: + collector = LayerOutputCollector + if logger: + logger.info( + 'Create a customize layer output minmax collector for calibration') else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) if logger: logger.info('Collector created, please use set_monitor_callback' ' to collect calibration information.') @@ -770,9 +784,11 @@ def calib_graph(qsym, arg_params, aux_params, collector, collector.hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict = collector.min_max_dict + elif calib_mode == 'customize': + th_dict = collector.min_max_dict else: raise ValueError('unknown calibration mode %s received,' - ' expected `none`, `naive`, or `entropy`' % calib_mode) + ' expected `none`, `naive`, `entropy` or `customize`' % calib_mode) qsym = _calibrate_quantized_sym(qsym, th_dict) else: raise ValueError('please set calibration mode to naive or entropy.') @@ -783,10 +799,10 @@ def calib_graph(qsym, arg_params, aux_params, collector, return qsym, qarg_params, aux_params -def quantize_net(network, quantized_dtype='auto', quantize_mode='full', - exclude_layers=None, exclude_layers_match=None, exclude_operators=None, - calib_data=None, data_shapes=None, calib_mode='none', - num_calib_examples=None, ctx=cpu(), logger=None): +def quantize_net_v2(network, quantized_dtype='auto', quantize_mode='full', + exclude_layers=None, exclude_layers_match=None, exclude_operators=None, + calib_data=None, data_shapes=None, calib_mode='none', + num_calib_examples=None, ctx=cpu(), LayerOutputCollector=None, logger=None): """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -830,6 +846,8 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', ctx : Context Defines the device that users want to run forward propagation on the calibration dataset for collecting layer output statistics. Currently, only supports single context. + LayerOutputCollector : class + For customize calibration method usage. logger : Object A logging object for printing information during the process of quantization. @@ -906,7 +924,8 @@ def __exit__(self, exc_type, exc_value, traceback): qsym, qarg_params, aux_params, collector = quantize_graph( sym=symnet, arg_params=args, aux_params=auxs, ctx=ctx, excluded_sym_names=exclude_layers, excluded_op_names=exclude_operators, - calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, logger=logger) + calib_mode=calib_mode, quantized_dtype=quantized_dtype, quantize_mode=quantize_mode, + LayerOutputCollector=LayerOutputCollector, logger=logger) if calib_mode is not None and calib_mode != 'none': if not isinstance(ctx, Context): @@ -915,7 +934,7 @@ def __exit__(self, exc_type, exc_value, traceback): if calib_data is None: raise ValueError( 'calib_data must be provided when calib_mode=%s' % calib_mode) - if calib_mode in ['naive', 'entropy']: + if calib_mode in ['naive', 'entropy', 'customize']: data_names = [pair[0] for pair in calib_data.provide_data] mod = Module(symbol=symnet, context=ctx, data_names=data_names, label_names=None) @@ -956,3 +975,19 @@ def __exit__(self, exc_type, exc_value, traceback): net.collect_params().load(param_name, cast_dtype=True, dtype_source='saved') net.collect_params().reset_ctx(ctx) return net + +def quantize_net(network, quantized_dtype='auto', quantize_mode='full', + exclude_layers=None, exclude_layers_match=None, exclude_operators=None, + calib_data=None, data_shapes=None, calib_mode='none', + num_calib_examples=None, ctx=cpu(), logger=None): + """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. + Will be deprecated after MXNet 2.0, please use quantize_net_v2. + """ + warnings.warn('WARNING: This will be deprecated after MXNet 2.0, please use quantize_net_v2.') + return quantize_net_v2(network=network, quantized_dtype=quantized_dtype, + quantize_mode=quantize_mode, exclude_layers=exclude_layers, + exclude_layers_match=exclude_layers_match, + exclude_operators=exclude_operators, + calib_data=calib_data, data_shapes=data_shapes, + calib_mode=calib_mode, num_calib_examples=num_calib_examples, + ctx=ctx, LayerOutputCollector=None, logger=logger)